feat: update as large model
Browse files- 1_Pooling/config.json +1 -1
- README.md +5 -5
- config.json +5 -5
- eval/similarity_evaluation_valid_results.csv +55 -55
- pytorch_model.bin +2 -2
- similarity_evaluation_test_results.csv +1 -1
1_Pooling/config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"word_embedding_dimension":
|
3 |
"pooling_mode_cls_token": false,
|
4 |
"pooling_mode_mean_tokens": true,
|
5 |
"pooling_mode_max_tokens": false,
|
|
|
1 |
{
|
2 |
+
"word_embedding_dimension": 1024,
|
3 |
"pooling_mode_cls_token": false,
|
4 |
"pooling_mode_mean_tokens": true,
|
5 |
"pooling_mode_max_tokens": false,
|
README.md
CHANGED
@@ -10,7 +10,7 @@ tags:
|
|
10 |
|
11 |
# {MODEL_NAME}
|
12 |
|
13 |
-
This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a
|
14 |
|
15 |
<!--- Describe your model here -->
|
16 |
|
@@ -85,7 +85,7 @@ The model was trained with the parameters:
|
|
85 |
|
86 |
**DataLoader**:
|
87 |
|
88 |
-
`torch.utils.data.dataloader.DataLoader` of length
|
89 |
```
|
90 |
{'batch_size': 16, 'sampler': 'torch.utils.data.sampler.RandomSampler', 'batch_sampler': 'torch.utils.data.sampler.BatchSampler'}
|
91 |
```
|
@@ -98,7 +98,7 @@ Parameters of the fit()-Method:
|
|
98 |
```
|
99 |
{
|
100 |
"epochs": 5,
|
101 |
-
"evaluation_steps":
|
102 |
"evaluator": "sentence_transformers.evaluation.EmbeddingSimilarityEvaluator.EmbeddingSimilarityEvaluator",
|
103 |
"max_grad_norm": 1,
|
104 |
"optimizer_class": "<class 'torch.optim.adamw.AdamW'>",
|
@@ -107,7 +107,7 @@ Parameters of the fit()-Method:
|
|
107 |
},
|
108 |
"scheduler": "WarmupLinear",
|
109 |
"steps_per_epoch": null,
|
110 |
-
"warmup_steps":
|
111 |
"weight_decay": 0.01
|
112 |
}
|
113 |
```
|
@@ -117,7 +117,7 @@ Parameters of the fit()-Method:
|
|
117 |
```
|
118 |
SentenceTransformer(
|
119 |
(0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: RobertaModel
|
120 |
-
(1): Pooling({'word_embedding_dimension':
|
121 |
)
|
122 |
```
|
123 |
|
|
|
10 |
|
11 |
# {MODEL_NAME}
|
12 |
|
13 |
+
This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 1024 dimensional dense vector space and can be used for tasks like clustering or semantic search.
|
14 |
|
15 |
<!--- Describe your model here -->
|
16 |
|
|
|
85 |
|
86 |
**DataLoader**:
|
87 |
|
88 |
+
`torch.utils.data.dataloader.DataLoader` of length 657 with parameters:
|
89 |
```
|
90 |
{'batch_size': 16, 'sampler': 'torch.utils.data.sampler.RandomSampler', 'batch_sampler': 'torch.utils.data.sampler.BatchSampler'}
|
91 |
```
|
|
|
98 |
```
|
99 |
{
|
100 |
"epochs": 5,
|
101 |
+
"evaluation_steps": 65,
|
102 |
"evaluator": "sentence_transformers.evaluation.EmbeddingSimilarityEvaluator.EmbeddingSimilarityEvaluator",
|
103 |
"max_grad_norm": 1,
|
104 |
"optimizer_class": "<class 'torch.optim.adamw.AdamW'>",
|
|
|
107 |
},
|
108 |
"scheduler": "WarmupLinear",
|
109 |
"steps_per_epoch": null,
|
110 |
+
"warmup_steps": 329,
|
111 |
"weight_decay": 0.01
|
112 |
}
|
113 |
```
|
|
|
117 |
```
|
118 |
SentenceTransformer(
|
119 |
(0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: RobertaModel
|
120 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
|
121 |
)
|
122 |
```
|
123 |
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"RobertaModel"
|
5 |
],
|
@@ -10,14 +10,14 @@
|
|
10 |
"gradient_checkpointing": false,
|
11 |
"hidden_act": "gelu",
|
12 |
"hidden_dropout_prob": 0.1,
|
13 |
-
"hidden_size":
|
14 |
"initializer_range": 0.02,
|
15 |
-
"intermediate_size":
|
16 |
"layer_norm_eps": 1e-05,
|
17 |
"max_position_embeddings": 514,
|
18 |
"model_type": "roberta",
|
19 |
-
"num_attention_heads":
|
20 |
-
"num_hidden_layers":
|
21 |
"pad_token_id": 1,
|
22 |
"position_embedding_type": "absolute",
|
23 |
"tokenizer_class": "BertTokenizer",
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "data/klue-roberta-large-nli1-bs16-msl512/",
|
3 |
"architectures": [
|
4 |
"RobertaModel"
|
5 |
],
|
|
|
10 |
"gradient_checkpointing": false,
|
11 |
"hidden_act": "gelu",
|
12 |
"hidden_dropout_prob": 0.1,
|
13 |
+
"hidden_size": 1024,
|
14 |
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 4096,
|
16 |
"layer_norm_eps": 1e-05,
|
17 |
"max_position_embeddings": 514,
|
18 |
"model_type": "roberta",
|
19 |
+
"num_attention_heads": 16,
|
20 |
+
"num_hidden_layers": 24,
|
21 |
"pad_token_id": 1,
|
22 |
"position_embedding_type": "absolute",
|
23 |
"tokenizer_class": "BertTokenizer",
|
eval/similarity_evaluation_valid_results.csv
CHANGED
@@ -1,56 +1,56 @@
|
|
1 |
epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
|
2 |
-
0,
|
3 |
-
0,
|
4 |
-
0,
|
5 |
-
0,
|
6 |
-
0,
|
7 |
-
0,
|
8 |
-
0,
|
9 |
-
0,
|
10 |
-
0,
|
11 |
-
0,
|
12 |
-
0,-1,0.
|
13 |
-
1,
|
14 |
-
1,
|
15 |
-
1,
|
16 |
-
1,
|
17 |
-
1,
|
18 |
-
1,
|
19 |
-
1,
|
20 |
-
1,
|
21 |
-
1,
|
22 |
-
1,
|
23 |
-
1,-1,0.
|
24 |
-
2,
|
25 |
-
2,
|
26 |
-
2,
|
27 |
-
2,
|
28 |
-
2,
|
29 |
-
2,
|
30 |
-
2,
|
31 |
-
2,
|
32 |
-
2,
|
33 |
-
2,
|
34 |
-
2,-1,0.
|
35 |
-
3,
|
36 |
-
3,
|
37 |
-
3,
|
38 |
-
3,
|
39 |
-
3,
|
40 |
-
3,
|
41 |
-
3,
|
42 |
-
3,
|
43 |
-
3,
|
44 |
-
3,
|
45 |
-
3,-1,0.
|
46 |
-
4,
|
47 |
-
4,
|
48 |
-
4,
|
49 |
-
4,
|
50 |
-
4,
|
51 |
-
4,
|
52 |
-
4,
|
53 |
-
4,
|
54 |
-
4,
|
55 |
-
4,
|
56 |
-
4,-1,0.
|
|
|
1 |
epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
|
2 |
+
0,65,0.9450962885766199,0.9037485312968464,0.9300528284029176,0.8961820342448713,0.930168949196146,0.896110016861719,0.933104700089962,0.8879552570118354
|
3 |
+
0,130,0.9526485401319882,0.9062624461649387,0.9348572633179625,0.8976952155060833,0.9347841568754535,0.8973433877113094,0.9427269587858736,0.8931837048675654
|
4 |
+
0,195,0.9595303579974707,0.9142733521375342,0.9442939418418763,0.9078559841512914,0.9441828535745902,0.9074786717054953,0.9478106148875446,0.8986859957448045
|
5 |
+
0,260,0.9617001459144195,0.9188656765282182,0.9497835035141037,0.9136206882736468,0.9497293369045067,0.9132892165241399,0.9508986039244776,0.9011062635391401
|
6 |
+
0,325,0.9628843334863715,0.9195580934253044,0.9500758792170405,0.9151045140421136,0.949929744304782,0.9147803738509395,0.9569920092930422,0.907382567446735
|
7 |
+
0,390,0.9639579818543964,0.9223004332520758,0.9547931997051272,0.9190447827776115,0.9546934717175326,0.9187055407061121,0.9594420682106848,0.9112284935694275
|
8 |
+
0,455,0.9662851052923659,0.9234145621939238,0.9562164179474448,0.9204125035516133,0.9561325870578272,0.9201735977449076,0.9592004342631336,0.9091295996563256
|
9 |
+
0,520,0.9694668380056263,0.9309343328286745,0.9570074669966071,0.925301327121036,0.9569165617673985,0.9250269387014896,0.9582035592261776,0.9124855539569819
|
10 |
+
0,585,0.9723612652107801,0.9324385623020064,0.9600483906788069,0.9276242140710466,0.9600082335271236,0.9273525809703603,0.9634036126741529,0.9166930002769497
|
11 |
+
0,650,0.9744469915615347,0.9353950953638092,0.9628905748830648,0.9301869586433137,0.9627792555434287,0.9298928854822393,0.9635921674947908,0.9157777609860347
|
12 |
+
0,-1,0.9744524509263498,0.9355745161025236,0.9630585226647224,0.9304102648275934,0.9629404281301782,0.9300652088036956,0.9647233290303684,0.917322511532086
|
13 |
+
1,65,0.9760382480370817,0.9370389924076827,0.9659784341543698,0.9339729211138691,0.9659088531046812,0.9336654226335102,0.9671644650379402,0.9193094198718994
|
14 |
+
1,130,0.9768567008880943,0.9383797185233935,0.966976866585407,0.9350778362724242,0.9669284723078647,0.9349076368023743,0.9683039280036203,0.9209162665876254
|
15 |
+
1,195,0.9778746440494153,0.9409410098053567,0.9683395404189199,0.9387545930602372,0.9683065192454419,0.9386532386868401,0.9688243931890176,0.9235225014434626
|
16 |
+
1,260,0.9795497244284369,0.9449748441785889,0.9698341704645701,0.9410306774423478,0.9697687450014839,0.9408439718266611,0.9702414461023302,0.9257443725758133
|
17 |
+
1,325,0.9798225721923421,0.947249561028625,0.9682651531514844,0.9409950130566243,0.9682122729790582,0.9408348449079516,0.9685372609627604,0.9273494992106206
|
18 |
+
1,390,0.9803734736419105,0.9481844196889749,0.971003886781885,0.9432195157262822,0.9709454052933055,0.942975312728328,0.9718693716702692,0.9295814148732897
|
19 |
+
1,455,0.9815065852342996,0.9489358917276964,0.9726105667397389,0.944342114777752,0.9725523836956391,0.9440875389655905,0.9717673072973531,0.9275553295106307
|
20 |
+
1,520,0.9824674746258831,0.950574690083999,0.9732375048994956,0.9454818584640391,0.9731859620813956,0.9452850977967319,0.9736496764064112,0.9309509005669244
|
21 |
+
1,585,0.9834244099703123,0.9533429772388795,0.9735770845392127,0.9481193454290129,0.9735277109299411,0.9478712714901714,0.9742870584082444,0.9331610785707243
|
22 |
+
1,650,0.9844838828609993,0.9569825284960917,0.9753458221354517,0.9517483778762286,0.9752657262291878,0.9514221836233381,0.9764416951453416,0.9385130733527801
|
23 |
+
1,-1,0.9847396924311966,0.9573263321941934,0.9755750797328923,0.9521299235341704,0.9755060804202205,0.9517926830564324,0.9767299780374573,0.9392059022779248
|
24 |
+
2,65,0.9849280550428714,0.9564386346887174,0.975355278091752,0.9518105539947014,0.975273110867893,0.9514462527083342,0.9761829280721366,0.9374334883451071
|
25 |
+
2,130,0.9856071157384126,0.9597032713473029,0.9751640796679493,0.9529847039974815,0.9750701628991254,0.9526615731033029,0.9755695268811876,0.9390378797393223
|
26 |
+
2,195,0.9859334513411054,0.9605144457246071,0.9759017943922763,0.9548045124621254,0.9758099432015181,0.954389330316847,0.9771623084737303,0.9417788847508552
|
27 |
+
2,260,0.9862758435673878,0.9611431344587528,0.9762307964622666,0.9560592418460738,0.9761557714559368,0.9557421850879082,0.9766933081487434,0.9412265727418067
|
28 |
+
2,325,0.9868534453389665,0.9626816722592928,0.9777151758527289,0.9580903465752495,0.9776304245311046,0.957749154892145,0.978140305171663,0.9435958064140455
|
29 |
+
2,390,0.9869902318860228,0.9623139746784058,0.9765050263219914,0.9569004021910574,0.9764176476661701,0.956613016647335,0.9774514690603672,0.9428688563376247
|
30 |
+
2,455,0.9875622597019832,0.9646683713862236,0.9764541224999095,0.9578370641968195,0.9763978432964757,0.9575549338083919,0.9785823442285312,0.9459736774907043
|
31 |
+
2,520,0.9878622009005984,0.9654818255623808,0.9766720669973291,0.9579808535828294,0.9765788407528522,0.9576038087013262,0.9778498306464314,0.9451623766005413
|
32 |
+
2,585,0.9881254815995715,0.9661134656570236,0.9760380605821765,0.9579104664382305,0.9759601781446547,0.9576136431845318,0.9783430528545717,0.9467766862217248
|
33 |
+
2,650,0.9886687528782107,0.9669586479759691,0.9769656870044339,0.9595981068654681,0.9768811949697955,0.9592689895091854,0.9798147834272309,0.9486770933526824
|
34 |
+
2,-1,0.9884827920605181,0.9662499919106681,0.9765310081820143,0.9591638819397147,0.9764470520894734,0.9588334152165847,0.9798888388856032,0.9484838323008439
|
35 |
+
3,65,0.9891468349701465,0.9680061058709235,0.9780550002341166,0.9612016071048037,0.9779603797040222,0.9607824470643986,0.9801733116896864,0.9490462250289583
|
36 |
+
3,130,0.9896395834058724,0.9687165648456506,0.9784713356979137,0.9615203187022282,0.9783683017773172,0.9611338072637498,0.9802643917898448,0.9491347957130004
|
37 |
+
3,195,0.9897943528902955,0.9704400258640831,0.977684348319498,0.961913896188716,0.9775909726989168,0.9615426048714073,0.9792587420039076,0.9499707021943303
|
38 |
+
3,260,0.9900803714929103,0.9709038571238544,0.9783607650547733,0.9620548257187613,0.978254529996118,0.9616923013274344,0.9801511799902952,0.9510189267532941
|
39 |
+
3,325,0.9902257454808906,0.9701505636668742,0.9787095535380529,0.9626983327671962,0.9786018335836466,0.9623218673256017,0.9810608236727453,0.9514222030256614
|
40 |
+
3,390,0.9903217377212674,0.9703480447649762,0.9783905028282679,0.9623447102360272,0.9782825293533796,0.9619087690576225,0.9802648493385162,0.9503120805052606
|
41 |
+
3,455,0.9909482066923018,0.9724131088754565,0.9789971373937102,0.9637042934503356,0.9789004077421916,0.9632790954634505,0.9810104730908307,0.9523355014930789
|
42 |
+
3,520,0.9912414015262909,0.9733805352917818,0.979178746031331,0.9644451628779105,0.9790602590272242,0.9640127472405658,0.9813014392180701,0.9536155427498092
|
43 |
+
3,585,0.9913366846799521,0.9735695665751978,0.9793307301387646,0.9646584746716608,0.9792220380326317,0.9642132063622015,0.9815417308818629,0.9541096803248864
|
44 |
+
3,650,0.9914706908746421,0.9737326611560257,0.9791627724732839,0.9646147822116262,0.9790615569247757,0.9641776269370242,0.9814915525192658,0.9541220076242296
|
45 |
+
3,-1,0.9914733788590939,0.9738092628072146,0.9791244723756433,0.9646293060736866,0.9790228285435293,0.9641859627968589,0.9815264393377655,0.9542978513061603
|
46 |
+
4,65,0.9916588471098222,0.974415181844439,0.979153847946894,0.9646976740349861,0.9790519724212301,0.9642751132333449,0.9819891938092719,0.9552999150826269
|
47 |
+
4,130,0.9917996515382658,0.974651140032634,0.9792893424778295,0.9648144645370323,0.9791774185573862,0.9643468329454702,0.9820253411134156,0.9552227857882973
|
48 |
+
4,195,0.9920554135053716,0.9752445257156087,0.9793309829778141,0.9652100065627497,0.9792220929725468,0.9647258537753207,0.9823864256448313,0.9562413029594432
|
49 |
+
4,260,0.9922657266011131,0.9761242934389289,0.9794334206785208,0.9655279616341972,0.9793211175347241,0.9650452166091836,0.9822848287988739,0.9564869412099499
|
50 |
+
4,325,0.9924025874561615,0.9765859794441941,0.9795358668079737,0.9657344209526362,0.9794208049989456,0.9652410097926988,0.9822421810797303,0.9565666751032983
|
51 |
+
4,390,0.9924248342054038,0.9765355938472762,0.9793553360408334,0.9657949666764678,0.9792455728290694,0.9652942962433387,0.9826434029143594,0.957296959186484
|
52 |
+
4,455,0.9925654346756603,0.9769371667211181,0.9796608241691501,0.9663312165501814,0.9795530520865743,0.9658446248131752,0.983011574149525,0.9579941854963273
|
53 |
+
4,520,0.9925661999747731,0.9768303498144754,0.9796286752981723,0.9662626671930274,0.9795220803093988,0.9657850962295496,0.9830883486843035,0.9580017313731142
|
54 |
+
4,585,0.9926228347498095,0.9769527790770869,0.9797660980161369,0.9663102418931035,0.9796594311389403,0.9658401494432395,0.9830880548631337,0.9579560631976755
|
55 |
+
4,650,0.992678343704566,0.9771605378302233,0.9798170442514417,0.9664383685259873,0.9797116474885933,0.9659760169510083,0.9830724557942112,0.9580649340081575
|
56 |
+
4,-1,0.9926791071392681,0.977163214893553,0.9798184006952244,0.9664402041301481,0.9797130071640839,0.9659775184666868,0.9830726261956798,0.9580662121233869
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a8bad88a4baa6dcbea5e20b8f66b8c0069fd71d7793cadd6b94c0e1e31cc4be
|
3 |
+
size 1346757353
|
similarity_evaluation_test_results.csv
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
|
2 |
-
-1,-1,0.
|
|
|
1 |
epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
|
2 |
+
-1,-1,0.9040766817071945,0.9021119177397706,0.8982409026032018,0.8979277574268917,0.8994350526345978,0.8988094489080267,0.8912904104683059,0.8895395612884133
|