Spaces:
Build error
Build error
ready to eval phi-3.5
Browse files
notebooks/00e_Data Analysis_Fine_Tuned_RPP_MNT_2048.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
results/mac-results_rpp_with_mnt_2048_metrics.csv
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model,rpp,meteor,spbleu,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rap,num_incomplete_translations,num_max_output_tokens,eval_time
|
2 |
+
Qwen/Qwen2-72B-Instruct,1.00,0.47892081926016034,18.953782447211417,0.18953782447211412,0.4521569211424892,0.0,0.09885260370697264,0.09885260370697264,0.4768835566987496,0,0,15.931156222418359
|
3 |
+
Qwen/Qwen2-72B-Instruct,1.02,0.47968366756320546,19.0137490702917,0.190137490702917,0.45197054564395384,0.0,0.08120035304501325,0.08120035304501325,0.47800479714301924,0,0,15.855251544571933
|
4 |
+
Qwen/Qwen2-72B-Instruct,1.04,0.476362681282195,18.52063321160408,0.1852063321160408,0.44894205112535357,0.0,0.10503089143865843,0.10503089143865843,0.4742108804113156,0,0,15.78640776699029
|
5 |
+
Qwen/Qwen2-72B-Instruct,1.06,0.46796363839506494,17.982473951038504,0.17982473951038513,0.441041000758417,0.0,0.07325684024713151,0.07325684024713151,0.466484924380041,0,0,15.872021182700795
|
6 |
+
Qwen/Qwen2-7B-Instruct,1.00,0.442240791493943,14.38814929350883,0.1438814929350883,0.4161653982436496,0.0,12.81288614298323,12.81288614298323,0.3256127432585341,6,2,2.2162400706090026
|
7 |
+
Qwen/Qwen2-7B-Instruct,1.02,0.4400998640836595,15.16172261831792,0.1516172261831792,0.41646468384393903,0.0,7.1562224183583405,7.1562224183583405,0.356523119707599,6,1,1.586054721977052
|
8 |
+
Qwen/Qwen2-7B-Instruct,1.04,0.4390136558190875,14.958631815014014,0.14958631815014015,0.41437075031278525,0.0,0.1853486319505737,0.1853486319505737,0.43553983411076963,1,0,1.0123565754633717
|
9 |
+
Qwen/Qwen2-7B-Instruct,1.06,0.43283215765150845,14.28087386760537,0.14280873867605376,0.4072536098321874,0.0,0.2030008826125331,0.2030008826125331,0.42908711780306746,2,0,1.002647837599294
|
10 |
+
Qwen/Qwen2-7B-Instruct,1.08,0.423560805217557,13.659683698817107,0.1365968369881711,0.3967743499621005,0.0,0.22153574580759047,0.22153574580759047,0.4195681312114654,0,0,1.000882612533098
|
11 |
+
Qwen/Qwen2-7B-Instruct,1.10,0.41350531365414334,12.922649874705083,0.12922649874705075,0.38793653105711157,0.0,0.17740511915269197,0.17740511915269197,0.410371282612889,0,0,1.0114739629302736
|
12 |
+
internlm/internlm2_5-7b-chat,1.00,0.4289996929258777,14.734881589173108,0.1473488158917311,0.4096656779566272,0.0,12.751103265666373,12.751103265666373,0.3161377411176559,0,2,3.063548102383054
|
13 |
+
internlm/internlm2_5-7b-chat,1.02,0.42662469043021944,14.583816688798015,0.1458381668879802,0.4073494324538721,0.0,9.824360105913504,9.824360105913504,0.3288814067035907,0,1,2.127978817299206
|
14 |
+
internlm/internlm2_5-7b-chat,1.04,0.4154585167056314,13.534659133050223,0.13534659133050217,0.3970519149664059,0.0,6.527802294792586,6.527802294792586,0.3410387160469389,0,1,2.136804942630185
|
15 |
+
internlm/internlm2_5-7b-chat,1.06,0.40395886478553783,12.346740971499404,0.12346740971499397,0.38739729431897413,0.0,6.533980582524272,6.533980582524272,0.3315547851383537,1,1,2.153574580759047
|
16 |
+
internlm/internlm2_5-7b-chat,1.08,0.3873176839854818,11.075674965706343,0.11075674965706349,0.3727703497077143,0.0,9.820829655781113,9.820829655781113,0.2985977687581126,1,1,2.148278905560459
|
17 |
+
internlm/internlm2_5-7b-chat,1.10,0.3769306874386757,10.305163787094209,0.10305163787094214,0.3636303538397795,0.0,6.525154457193292,6.525154457193292,0.30942993176825007,1,1,2.172109443954104
|
18 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.00,0.46111105085070175,17.87914973742753,0.17879149737427524,0.4340569906195885,0.0,0.088261253309797,0.088261253309797,0.4593579935085897,0,0,16.127978817299205
|
19 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.02,0.45715172190795766,17.436884594979905,0.17436884594979907,0.4309998159694175,0.0,0.09267431597528684,0.09267431597528684,0.45532756172969063,0,0,15.987643424536628
|
20 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.04,0.45604679603642545,17.440173470996626,0.17440173470996626,0.43028250668592427,0.0,0.13062665489849956,0.13062665489849956,0.4534907811633413,0,0,15.958517210944395
|
21 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.06,0.45204163612198556,16.89523258317781,0.16895232583177808,0.42594768159318286,0.0,0.12533097969991175,0.12533097969991175,0.44960959510411197,0,0,15.998234774933804
|
22 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.08,0.4442106511292453,16.16623784482793,0.16166237844827927,0.4196173445876259,0.0,0.18711385701676964,0.18711385701676964,0.4406628043373358,0,0,15.89143865842895
|
23 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.10,0.43798379261381615,15.601722576240661,0.1560172257624066,0.4135114396257313,0.0,0.08649602824360106,0.08649602824360106,0.4363516968290564,0,0,15.981465136804943
|
24 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.00,0.43363250166217604,15.209540658023398,0.1520954065802339,0.4087331128794971,0.0,5.798764342453663,5.798764342453663,0.36177551894565085,0,1,1.7625772285966461
|
25 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.02,0.4329054166518245,15.19102241646024,0.1519102241646024,0.4070425935096388,0.0,5.77846425419241,5.77846425419241,0.3613372514106729,0,1,1.7493380406001766
|
26 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.04,0.43204787009562073,15.051351661582961,0.15051351661582968,0.40610700867871297,0.0,0.11827007943512798,0.11827007943512798,0.42985292605837966,0,0,1.0467784642541924
|
27 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.06,0.42754293149125455,14.449130821290163,0.1444913082129016,0.4003685841336065,0.0,0.176522506619594,0.176522506619594,0.4243183675786234,0,0,1.0450132391879965
|
28 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.08,0.4206626658729054,13.933703757385222,0.13933703757385224,0.3962652599741041,0.0,0.21888790820829657,0.21888790820829657,0.41674375951677545,0,0,1.0423654015887025
|
29 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.10,0.41112112403991513,13.303738403756983,0.1330373840375698,0.3870517415572443,0.0,0.13857016769638128,0.13857016769638128,0.4086785702502848,0,0,1.0458958517210943
|
30 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.00,0.4068335357738006,13.565136550617618,0.13565136550617618,0.38651661218475886,0.0,0.1059135039717564,0.1059135039717564,0.40498051631383686,0,0,1.1871138570167696
|
31 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.02,0.4064914781094827,13.42987641622816,0.1342987641622816,0.3863631817026095,0.0,6.238305383936452,6.238305383936452,0.3357933166106685,0,1,1.8102383053839364
|
32 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.04,0.40548598969949756,13.295092218891954,0.13295092218891957,0.3851644729228478,0.0,0.1297440423654016,0.1297440423654016,0.403228526175785,0,0,1.1597528684907326
|
33 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.06,0.4014727027723293,13.10860799057166,0.1310860799057166,0.3808877762129881,0.0,0.20741394527802295,0.20741394527802295,0.3979249127866333,0,0,1.1650485436893203
|
34 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.08,0.3987018836449559,12.850537785783194,0.1285053778578319,0.3791139249625647,0.0,0.2903795233892321,0.2903795233892321,0.39380632270315097,0,0,1.1685789938217124
|
35 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.10,0.39322399484561765,12.361161644811926,0.12361161644811926,0.3735331110324942,0.0,0.1500441306266549,0.1500441306266549,0.3906969938466816,0,0,1.1562224183583407
|
scripts/eval-mac.sh
CHANGED
@@ -24,6 +24,8 @@ grep MemTotal /proc/meminfo
|
|
24 |
|
25 |
# ./scripts/eval-model.sh shenzhi-wang/Llama3.1-8B-Chinese-Chat
|
26 |
|
|
|
|
|
27 |
# ./scripts/eval-epochs.sh internlm internlm2_5-7b-chat
|
28 |
|
29 |
# ./scripts/eval-epochs.sh Qwen Qwen2-7B-Instruct
|
@@ -37,10 +39,10 @@ export START_REPETITION_PENALTY=1.0
|
|
37 |
export END_REPETITION_PENALTY=1.1
|
38 |
export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048.csv
|
39 |
|
40 |
-
./scripts/eval-rpp.sh internlm internlm2_5-7b-chat checkpoint-140
|
41 |
|
42 |
-
./scripts/eval-rpp.sh Qwen Qwen2-7B-Instruct checkpoint-105
|
43 |
|
44 |
-
./scripts/eval-rpp.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat checkpoint-70
|
45 |
|
46 |
-
./scripts/eval-rpp.sh shenzhi-wang Llama3.1-8B-Chinese-Chat checkpoint-105
|
|
|
24 |
|
25 |
# ./scripts/eval-model.sh shenzhi-wang/Llama3.1-8B-Chinese-Chat
|
26 |
|
27 |
+
./scripts/eval-model.sh microsoft/Phi-3.5-mini-instruct
|
28 |
+
|
29 |
# ./scripts/eval-epochs.sh internlm internlm2_5-7b-chat
|
30 |
|
31 |
# ./scripts/eval-epochs.sh Qwen Qwen2-7B-Instruct
|
|
|
39 |
export END_REPETITION_PENALTY=1.1
|
40 |
export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048.csv
|
41 |
|
42 |
+
# ./scripts/eval-rpp.sh internlm internlm2_5-7b-chat checkpoint-140
|
43 |
|
44 |
+
# ./scripts/eval-rpp.sh Qwen Qwen2-7B-Instruct checkpoint-105
|
45 |
|
46 |
+
# ./scripts/eval-rpp.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat checkpoint-70
|
47 |
|
48 |
+
# ./scripts/eval-rpp.sh shenzhi-wang Llama3.1-8B-Chinese-Chat checkpoint-105
|