Spaces:
Build error
Build error
final results
Browse files- data/Qwen2-72B-Instruct_shots_metrics.csv +2 -0
- data/Qwen2-7B-Instruct_shots_metrics.csv +6 -1
- data/Qwen2.5-1.5B-Instruct_metrics.csv +11 -11
- data/Qwen2.5-1.5B-Instruct_shots_metrics.csv +7 -7
- data/Qwen2.5-7B-Instruct_metrics.csv +11 -11
- data/Qwen2.5-7B-Instruct_shots_metrics.csv +7 -7
- data/best_metrics.csv +16 -17
- data/best_results.csv +0 -0
- data/few-shots_metrics.csv +15 -8
- data/fine-tuning_metrics.csv +11 -11
- llm_toolkit/logical_reasoning_utils.py +15 -16
- logs/{Qwen2.5-72B-shots.txt → Qwen2-72B-shots.txt} +0 -0
- notebooks/00_Data Analysis.ipynb +2 -2
- notebooks/01a_internlm2_5-20b-chat_analysis.ipynb +2 -2
- notebooks/01a_internlm2_5-7b-chat-1m_analysis.ipynb +2 -2
- notebooks/01a_internlm2_5-7b-chat_analysis.ipynb +2 -2
- notebooks/01b_Mistral-7B-v0.3-Chinese-Chat_analysis.ipynb +2 -2
- notebooks/02a_Qwen2-7B-Instruct_analysis.ipynb +2 -2
- notebooks/02b_Qwen2-72B-Instruct_analysis.ipynb +2 -2
- notebooks/02c_Qwen2.5-3B-Instruct_analysis.ipynb +2 -2
- notebooks/02d_Qwen2.5-7B-Instruct_analysis.ipynb +2 -2
- notebooks/02g_Qwen2.5-72B-Instruct_analysis.ipynb +2 -2
- notebooks/03a_Llama3.1-8B-Chinese-Chat_analysis.ipynb +2 -2
- notebooks/03b_Llama3.1-70B-Chinese-Chat_analysis.ipynb +2 -2
- notebooks/04b_OpenAI-Models_analysis.ipynb +2 -2
data/Qwen2-72B-Instruct_shots_metrics.csv
CHANGED
@@ -1,2 +1,4 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
|
|
|
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
|
3 |
+
5,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/shots-05,0.778,0.79531681283817,0.778,0.7722405723376975,0.9876666666666667
|
4 |
+
10,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/shots-10,0.775,0.7935761766767606,0.775,0.7740445924385057,0.9946666666666667
|
data/Qwen2-7B-Instruct_shots_metrics.csv
CHANGED
@@ -1,3 +1,8 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666667
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666667
|
3 |
+
5,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-05,0.357,0.7558028637770273,0.357,0.4365296526050415,0.997
|
4 |
+
10,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-10,0.5593333333333333,0.7492096614172068,0.5593333333333333,0.5991418028711349,0.99
|
5 |
+
20,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-20,0.5686666666666667,0.7520662661534714,0.5686666666666667,0.606675877273536,0.9976666666666667
|
6 |
+
30,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-30,0.658,0.7560839971561838,0.658,0.6783328633107383,0.9886666666666667
|
7 |
+
40,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-40,0.685,0.748405672861726,0.685,0.6925462636609232,0.991
|
8 |
+
50,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-50,0.6303333333333333,0.7557444295881455,0.6303333333333333,0.6534318328499943,0.983
|
data/Qwen2.5-1.5B-Instruct_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct_torch.bfloat16_lf,0.
|
3 |
-
0.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-35_torch.bfloat16_lf,0.
|
4 |
-
0.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-70_torch.bfloat16_lf,0.
|
5 |
-
0.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-105_torch.bfloat16_lf,0.
|
6 |
-
0.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-140_torch.bfloat16_lf,0.
|
7 |
-
1.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-175_torch.bfloat16_lf,0.
|
8 |
-
1.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-210_torch.bfloat16_lf,0.
|
9 |
-
1.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-245_torch.bfloat16_lf,0.
|
10 |
-
1.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-280_torch.bfloat16_lf,0.
|
11 |
-
1.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-315_torch.bfloat16_lf,0.
|
12 |
-
2.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-350_torch.bfloat16_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct_torch.bfloat16_lf,0.18066666666666667,0.5190896501490828,0.18066666666666667,0.2279835258033138,0.931
|
3 |
+
0.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-35_torch.bfloat16_lf,0.518,0.6492636460363231,0.518,0.5572067605634098,1.0
|
4 |
+
0.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-70_torch.bfloat16_lf,0.5773333333333334,0.6947418792907191,0.5773333333333334,0.6099122869891875,1.0
|
5 |
+
0.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-105_torch.bfloat16_lf,0.538,0.7167362916136871,0.538,0.5968520817286569,1.0
|
6 |
+
0.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-140_torch.bfloat16_lf,0.6583333333333333,0.7364753067653045,0.6583333333333333,0.6860876366125171,1.0
|
7 |
+
1.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-175_torch.bfloat16_lf,0.634,0.728416475102691,0.634,0.6585911813020522,1.0
|
8 |
+
1.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-210_torch.bfloat16_lf,0.604,0.739271718192585,0.604,0.650207257048292,1.0
|
9 |
+
1.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-245_torch.bfloat16_lf,0.627,0.7239095156237754,0.627,0.6595024812639674,1.0
|
10 |
+
1.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-280_torch.bfloat16_lf,0.6203333333333333,0.7299176022060372,0.6203333333333333,0.6547846347747823,1.0
|
11 |
+
1.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-315_torch.bfloat16_lf,0.591,0.7339511078925822,0.591,0.6350661217426716,1.0
|
12 |
+
2.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-350_torch.bfloat16_lf,0.5963333333333334,0.7286221052443161,0.5963333333333334,0.6381137491097367,1.0
|
data/Qwen2.5-1.5B-Instruct_shots_metrics.csv
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-00,0.
|
3 |
-
5,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-05,0.
|
4 |
-
10,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-10,0.
|
5 |
-
20,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-20,0.
|
6 |
-
30,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-30,0.
|
7 |
-
40,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-40,0.
|
8 |
-
50,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-50,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-00,0.18066666666666667,0.5190896501490828,0.18066666666666667,0.2279835258033138,0.931
|
3 |
+
5,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-05,0.3466666666666667,0.5781128109800681,0.3466666666666667,0.37886593168708843,0.9756666666666667
|
4 |
+
10,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-10,0.4523333333333333,0.5964896895382023,0.4523333333333333,0.46219676531721876,0.9933333333333333
|
5 |
+
20,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-20,0.243,0.5419108277814879,0.243,0.31071147199535726,0.8263333333333334
|
6 |
+
30,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-30,0.23033333333333333,0.55368556787824,0.23033333333333333,0.3067125355762305,0.661
|
7 |
+
40,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-40,0.292,0.5667420801465655,0.292,0.375496356843247,0.5206666666666667
|
8 |
+
50,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-50,0.2876666666666667,0.5660207537890989,0.2876666666666667,0.36627420118815035,0.4603333333333333
|
data/Qwen2.5-7B-Instruct_metrics.csv
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct_torch.bfloat16_lf,0.
|
3 |
-
0.2,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-35_torch.bfloat16_lf,0.
|
4 |
-
0.4,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-70_torch.bfloat16_lf,0.
|
5 |
-
0.6,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-105_torch.bfloat16_lf,0.
|
6 |
-
0.8,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-140_torch.bfloat16_lf,0.
|
7 |
-
1.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-175_torch.bfloat16_lf,0.
|
8 |
-
1.2,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-210_torch.bfloat16_lf,0.
|
9 |
-
1.4,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-245_torch.bfloat16_lf,0.
|
10 |
-
1.6,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-280_torch.bfloat16_lf,0.
|
11 |
-
1.8,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-315_torch.bfloat16_lf,0.
|
12 |
-
2.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-350_torch.bfloat16_lf,0.
|
|
|
1 |
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct_torch.bfloat16_lf,0.644,0.7200261355300325,0.644,0.6101052277961244,1.0
|
3 |
+
0.2,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-35_torch.bfloat16_lf,0.745,0.7643041174791825,0.745,0.7482828029872421,0.998
|
4 |
+
0.4,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-70_torch.bfloat16_lf,0.7446666666666667,0.7800215227839997,0.7446666666666667,0.7576550061479678,0.9996666666666667
|
5 |
+
0.6,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-105_torch.bfloat16_lf,0.7513333333333333,0.7996792149630704,0.7513333333333333,0.7693730206330721,0.9996666666666667
|
6 |
+
0.8,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-140_torch.bfloat16_lf,0.75,0.7923028105975739,0.75,0.7665531868559959,1.0
|
7 |
+
1.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-175_torch.bfloat16_lf,0.771,0.8005814962709542,0.771,0.7814602739241332,0.9993333333333333
|
8 |
+
1.2,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-210_torch.bfloat16_lf,0.7443333333333333,0.79978900243777,0.7443333333333333,0.7660506505481828,1.0
|
9 |
+
1.4,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-245_torch.bfloat16_lf,0.7486666666666667,0.7974562319123832,0.7486666666666667,0.7655275916268014,0.9993333333333333
|
10 |
+
1.6,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7566666666666667,0.7939852407869384,0.7566666666666667,0.7689495073735431,0.9996666666666667
|
11 |
+
1.8,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-315_torch.bfloat16_lf,0.755,0.7940575522966016,0.755,0.7681326415137147,0.9993333333333333
|
12 |
+
2.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-350_torch.bfloat16_lf,0.756,0.7982464722401461,0.756,0.7704035278260453,0.9996666666666667
|
data/Qwen2.5-7B-Instruct_shots_metrics.csv
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-00,0.
|
3 |
-
5,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-05,0.
|
4 |
-
10,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-10,0.
|
5 |
-
20,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-20,0.
|
6 |
-
30,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-30,0.
|
7 |
-
40,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-40,0.
|
8 |
-
50,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-50,0.
|
|
|
1 |
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-00,0.644,0.7200261355300325,0.644,0.6101052277961244,1.0
|
3 |
+
5,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-05,0.6346666666666667,0.7653343185471776,0.6346666666666667,0.6219419633691871,0.998
|
4 |
+
10,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-10,0.678,0.7675951017673515,0.678,0.6790860659550377,0.9796666666666667
|
5 |
+
20,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-20,0.7353333333333333,0.7702034737275962,0.7353333333333333,0.7278047438569933,0.807
|
6 |
+
30,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-30,0.7646666666666667,0.7787918401418651,0.7646666666666667,0.7527649874769439,0.805
|
7 |
+
40,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-40,0.759,0.7736852689131295,0.759,0.7472252604775926,0.8546666666666667
|
8 |
+
50,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-50,0.7586666666666667,0.7640431634617543,0.7586666666666667,0.7414332963557551,0.7563333333333333
|
data/best_metrics.csv
CHANGED
@@ -1,18 +1,17 @@
|
|
1 |
index,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
-
1,
|
3 |
-
2,Llama3.1-
|
4 |
-
3,
|
5 |
-
4,
|
6 |
-
5,InternLM2.5-7B (0.8-epoch),InternLM2.5-7B (0.8-epoch),0.
|
7 |
-
6,InternLM2.5-
|
8 |
-
7,
|
9 |
-
8,Qwen2-
|
10 |
-
9,Qwen2-
|
11 |
-
10,Qwen2.5-
|
12 |
-
11,Qwen2.5-
|
13 |
-
12,
|
14 |
-
13,gpt-4o
|
15 |
-
14,
|
16 |
-
15,o1-
|
17 |
-
16,
|
18 |
-
17,Qwen2.5-72B (10-shot),Qwen2.5-72B (10-shot),0.8103333333333333,0.8136844357537636,0.8103333333333333,0.8088046626262355,0.998
|
|
|
1 |
index,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
1,Llama3.1-8B (1.0-epoch),Llama3.1-8B (1.0-epoch),0.78,0.810582723471486,0.78,0.7924651054056209,1.0
|
3 |
+
2,Llama3.1-70B (1.0-epoch),Llama3.1-70B (1.0-epoch),0.7963333333333333,0.8248972880055918,0.7963333333333333,0.8076868978089201,1.0
|
4 |
+
3,Mistral-7B (1.4-epoch),Mistral-7B (1.4-epoch),0.75,0.7885868317699068,0.75,0.7648234347578796,1.0
|
5 |
+
4,InternLM2.5-7B (0.8-epoch),InternLM2.5-7B (0.8-epoch),0.7496666666666667,0.8041871978859686,0.7496666666666667,0.7660159670998776,1.0
|
6 |
+
5,InternLM2.5-7B-1M (0.8-epoch),InternLM2.5-7B-1M (0.8-epoch),0.803,0.8031411888150441,0.803,0.8028064320197301,1.0
|
7 |
+
6,InternLM2.5-20B (0.8-epoch),InternLM2.5-20B (0.8-epoch),0.795,0.817457691710893,0.795,0.8027552955647029,1.0
|
8 |
+
7,Qwen2-7B (0.4-epoch),Qwen2-7B (0.4-epoch),0.759,0.8005303465799652,0.759,0.7748745026535183,1.0
|
9 |
+
8,Qwen2-72B (1.8-epoch),Qwen2-72B (1.8-epoch),0.784,0.8354349234761956,0.784,0.804194683154365,1.0
|
10 |
+
9,Qwen2.5-3B (1.4-epoch),Qwen2.5-3B (1.4-epoch),0.7233333333333334,0.7720989063414209,0.7233333333333334,0.7410476466041488,1.0
|
11 |
+
10,Qwen2.5-7B (1.0-epoch),Qwen2.5-7B (1.0-epoch),0.771,0.8005814962709542,0.771,0.7814602739241332,0.9993333333333333
|
12 |
+
11,Qwen2.5-72B (0.8-epoch),Qwen2.5-72B (0.8-epoch),0.7846666666666666,0.8199033961265727,0.7846666666666666,0.7983932694517433,1.0
|
13 |
+
12,gpt-4o-mini (0-shot),gpt-4o-mini (0-shot),0.7176666666666667,0.785706730193659,0.7176666666666667,0.7296061848734905,1.0
|
14 |
+
13,gpt-4o (10-shot),gpt-4o (10-shot),0.7916666666666666,0.8227707658360168,0.7916666666666666,0.803614688453356,0.9996666666666667
|
15 |
+
14,o1-mini (50-shot),o1-mini (50-shot),0.75,0.7767849265833893,0.75,0.7590020698968893,1.0
|
16 |
+
15,o1-preview (50-shot),o1-preview (50-shot),0.7546666666666667,0.7979981023789272,0.7546666666666667,0.7708181822112403,0.9996666666666667
|
17 |
+
16,Qwen2.5-72B (10-shot),Qwen2.5-72B (10-shot),0.8103333333333333,0.8136844357537636,0.8103333333333333,0.8088046626262355,0.998
|
|
data/best_results.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/few-shots_metrics.csv
CHANGED
@@ -61,8 +61,15 @@ shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
|
61 |
50,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-50,0.7213333333333334,0.7546008508718184,0.7213333333333334,0.70308601382351,0.8846666666666667
|
62 |
0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
|
63 |
0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666668
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
65 |
0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
|
|
|
|
|
66 |
0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
|
67 |
5,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-05,0.639,0.7226431221398603,0.639,0.641568790114368,0.9973333333333332
|
68 |
10,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-10,0.625,0.7164154004131771,0.625,0.6402584852791593,0.995
|
@@ -70,13 +77,13 @@ shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
|
70 |
30,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-30,0.475,0.6880994914236809,0.475,0.5310948082593374,0.904
|
71 |
40,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-40,0.584,0.7065303262365236,0.584,0.6214992664375876,0.7173333333333334
|
72 |
50,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-50,0.6093333333333333,0.7120506480394511,0.6093333333333333,0.6451959368825358,0.574
|
73 |
-
0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-00,0.
|
74 |
-
5,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-05,0.
|
75 |
-
10,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-10,0.
|
76 |
-
20,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-20,0.
|
77 |
-
30,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-30,0.
|
78 |
-
40,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-40,0.
|
79 |
-
50,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-50,0.
|
80 |
0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-00,0.7856666666666666,0.7942511546806512,0.7856666666666666,0.7699212943617263,0.994
|
81 |
5,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-05,0.8113333333333334,0.8112264644451684,0.8113333333333334,0.8039596846574816,0.9416666666666668
|
82 |
10,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-10,0.8103333333333333,0.8136844357537636,0.8103333333333333,0.8088046626262355,0.9123333333333332
|
|
|
61 |
50,internlm2_5-7b-chat-1m,internlm/internlm2_5-7b-chat-1m/shots-50,0.7213333333333334,0.7546008508718184,0.7213333333333334,0.70308601382351,0.8846666666666667
|
62 |
0,internlm2_5-20b-chat,internlm/internlm2_5-20b-chat/shots-00,0.564,0.7745256693833624,0.564,0.6352190975436365,0.6726666666666666
|
63 |
0,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-00,0.683,0.7493103872717293,0.683,0.710140098232232,0.9996666666666668
|
64 |
+
5,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-05,0.357,0.7558028637770273,0.357,0.4365296526050415,0.997
|
65 |
+
10,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-10,0.5593333333333333,0.7492096614172068,0.5593333333333333,0.5991418028711349,0.99
|
66 |
+
20,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-20,0.5686666666666667,0.7520662661534714,0.5686666666666667,0.606675877273536,0.9976666666666668
|
67 |
+
30,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-30,0.658,0.7560839971561838,0.658,0.6783328633107383,0.9886666666666668
|
68 |
+
40,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-40,0.685,0.748405672861726,0.685,0.6925462636609232,0.991
|
69 |
+
50,Qwen2-7B-Instruct,Qwen/Qwen2-7B-Instruct/shots-50,0.6303333333333333,0.7557444295881455,0.6303333333333333,0.6534318328499943,0.983
|
70 |
0,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct_torch/shots-00,0.7516666666666667,0.7949378981748352,0.7516666666666667,0.7572499605227642,0.9773333333333334
|
71 |
+
5,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/shots-05,0.778,0.79531681283817,0.778,0.7722405723376975,0.9876666666666668
|
72 |
+
10,Qwen2-72B-Instruct,Qwen/Qwen2-72B-Instruct/shots-10,0.775,0.7935761766767606,0.775,0.7740445924385057,0.9946666666666668
|
73 |
0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
|
74 |
5,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-05,0.639,0.7226431221398603,0.639,0.641568790114368,0.9973333333333332
|
75 |
10,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-10,0.625,0.7164154004131771,0.625,0.6402584852791593,0.995
|
|
|
77 |
30,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-30,0.475,0.6880994914236809,0.475,0.5310948082593374,0.904
|
78 |
40,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-40,0.584,0.7065303262365236,0.584,0.6214992664375876,0.7173333333333334
|
79 |
50,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-50,0.6093333333333333,0.7120506480394511,0.6093333333333333,0.6451959368825358,0.574
|
80 |
+
0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-00,0.644,0.7200261355300325,0.644,0.6101052277961244,1.0
|
81 |
+
5,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-05,0.6346666666666667,0.7653343185471776,0.6346666666666667,0.6219419633691871,0.998
|
82 |
+
10,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-10,0.678,0.7675951017673515,0.678,0.6790860659550377,0.9796666666666668
|
83 |
+
20,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-20,0.7353333333333333,0.7702034737275962,0.7353333333333333,0.7278047438569933,0.807
|
84 |
+
30,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-30,0.7646666666666667,0.7787918401418651,0.7646666666666667,0.7527649874769439,0.805
|
85 |
+
40,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-40,0.759,0.7736852689131295,0.759,0.7472252604775926,0.8546666666666667
|
86 |
+
50,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/shots-50,0.7586666666666667,0.7640431634617543,0.7586666666666667,0.7414332963557551,0.7563333333333333
|
87 |
0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-00,0.7856666666666666,0.7942511546806512,0.7856666666666666,0.7699212943617263,0.994
|
88 |
5,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-05,0.8113333333333334,0.8112264644451684,0.8113333333333334,0.8039596846574816,0.9416666666666668
|
89 |
10,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/shots-10,0.8103333333333333,0.8136844357537636,0.8103333333333333,0.8088046626262355,0.9123333333333332
|
data/fine-tuning_metrics.csv
CHANGED
@@ -98,17 +98,17 @@ epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
|
98 |
1.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7156666666666667,0.7724266286892245,0.7156666666666667,0.7356331945937126,1.0
|
99 |
1.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-315_torch.bfloat16_lf,0.6986666666666667,0.7734046031514225,0.6986666666666667,0.7262724373234384,1.0
|
100 |
2.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-350_torch.bfloat16_lf,0.704,0.7725944595890188,0.704,0.7290337960305111,1.0
|
101 |
-
0.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct_torch.bfloat16_lf,0.
|
102 |
-
0.2,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-35_torch.bfloat16_lf,0.
|
103 |
-
0.4,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-70_torch.bfloat16_lf,0.
|
104 |
-
0.6,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-105_torch.bfloat16_lf,0.
|
105 |
-
0.8,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-140_torch.bfloat16_lf,0.
|
106 |
-
1.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-175_torch.bfloat16_lf,0.
|
107 |
-
1.2,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-210_torch.bfloat16_lf,0.
|
108 |
-
1.4,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-245_torch.bfloat16_lf,0.
|
109 |
-
1.6,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-280_torch.bfloat16_lf,0.
|
110 |
-
1.8,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-315_torch.bfloat16_lf,0.
|
111 |
-
2.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-350_torch.bfloat16_lf,0.
|
112 |
0.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct_torch.bfloat16_4bit_lf,0.7856666666666666,0.7942511546806512,0.7856666666666666,0.7699212943617263,0.994
|
113 |
0.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.7736666666666666,0.8102875293385203,0.7736666666666666,0.7874095844134584,1.0
|
114 |
0.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.748,0.8094861650366822,0.748,0.7718522396481117,1.0
|
|
|
98 |
1.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7156666666666667,0.7724266286892245,0.7156666666666667,0.7356331945937126,1.0
|
99 |
1.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-315_torch.bfloat16_lf,0.6986666666666667,0.7734046031514225,0.6986666666666667,0.7262724373234384,1.0
|
100 |
2.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-350_torch.bfloat16_lf,0.704,0.7725944595890188,0.704,0.7290337960305111,1.0
|
101 |
+
0.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct_torch.bfloat16_lf,0.644,0.7200261355300325,0.644,0.6101052277961244,1.0
|
102 |
+
0.2,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-35_torch.bfloat16_lf,0.745,0.7643041174791825,0.745,0.7482828029872421,0.998
|
103 |
+
0.4,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-70_torch.bfloat16_lf,0.7446666666666667,0.7800215227839997,0.7446666666666667,0.7576550061479678,0.9996666666666668
|
104 |
+
0.6,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-105_torch.bfloat16_lf,0.7513333333333333,0.7996792149630704,0.7513333333333333,0.7693730206330721,0.9996666666666668
|
105 |
+
0.8,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-140_torch.bfloat16_lf,0.75,0.7923028105975739,0.75,0.7665531868559959,1.0
|
106 |
+
1.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-175_torch.bfloat16_lf,0.771,0.8005814962709542,0.771,0.7814602739241332,0.9993333333333332
|
107 |
+
1.2,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-210_torch.bfloat16_lf,0.7443333333333333,0.79978900243777,0.7443333333333333,0.7660506505481828,1.0
|
108 |
+
1.4,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-245_torch.bfloat16_lf,0.7486666666666667,0.7974562319123832,0.7486666666666667,0.7655275916268014,0.9993333333333332
|
109 |
+
1.6,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7566666666666667,0.7939852407869384,0.7566666666666667,0.7689495073735431,0.9996666666666668
|
110 |
+
1.8,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-315_torch.bfloat16_lf,0.755,0.7940575522966016,0.755,0.7681326415137147,0.9993333333333332
|
111 |
+
2.0,Qwen2.5-7B-Instruct,Qwen/Qwen2.5-7B-Instruct/checkpoint-350_torch.bfloat16_lf,0.756,0.7982464722401461,0.756,0.7704035278260453,0.9996666666666668
|
112 |
0.0,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct_torch.bfloat16_4bit_lf,0.7856666666666666,0.7942511546806512,0.7856666666666666,0.7699212943617263,0.994
|
113 |
0.2,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-35_torch.bfloat16_4bit_lf,0.7736666666666666,0.8102875293385203,0.7736666666666666,0.7874095844134584,1.0
|
114 |
0.4,Qwen2.5-72B-Instruct,Qwen/Qwen2.5-72B-Instruct/checkpoint-70_torch.bfloat16_4bit_lf,0.748,0.8094861650366822,0.748,0.7718522396481117,1.0
|
llm_toolkit/logical_reasoning_utils.py
CHANGED
@@ -518,27 +518,16 @@ def plot_metrics(perf_df, model_name, variant="epoch", offset=0.01):
|
|
518 |
)
|
519 |
perf_df = perf_df.iloc[:min_length]
|
520 |
|
521 |
-
# Plot accuracy and f1 on the same chart with different markers
|
522 |
ax.plot(
|
523 |
-
perf_df[variant], perf_df["
|
524 |
-
)
|
525 |
-
ax.plot(
|
526 |
-
perf_df[variant], perf_df["f1"], marker="s", label="F1 Score", color="b"
|
527 |
) # Square marker for F1 Score
|
|
|
|
|
|
|
528 |
|
529 |
# Add values on top of points
|
530 |
for i in range(min_length):
|
531 |
print(f"{perf_df[variant].iloc[i]}: {perf_df['run'].iloc[i]}")
|
532 |
-
ax.annotate(
|
533 |
-
f"{perf_df['accuracy'].iloc[i]*100:.2f}%",
|
534 |
-
(perf_df[variant].iloc[i], perf_df["accuracy"].iloc[i]),
|
535 |
-
ha="center",
|
536 |
-
va="bottom", # Move accuracy numbers below the points
|
537 |
-
xytext=(0, -15),
|
538 |
-
textcoords="offset points",
|
539 |
-
fontsize=10,
|
540 |
-
color="r",
|
541 |
-
)
|
542 |
ax.annotate(
|
543 |
f"{perf_df['f1'].iloc[i]*100:.2f}%",
|
544 |
(perf_df[variant].iloc[i], perf_df["f1"].iloc[i]),
|
@@ -549,6 +538,16 @@ def plot_metrics(perf_df, model_name, variant="epoch", offset=0.01):
|
|
549 |
fontsize=10,
|
550 |
color="b",
|
551 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
552 |
|
553 |
# Set y-axis limit
|
554 |
ylimits = ax.get_ylim()
|
@@ -560,7 +559,7 @@ def plot_metrics(perf_df, model_name, variant="epoch", offset=0.01):
|
|
560 |
if variant == "epoch"
|
561 |
else "Number of Shots"
|
562 |
)
|
563 |
-
ax.set_ylabel("
|
564 |
|
565 |
ax.xaxis.set_major_locator(MultipleLocator(0.2 if variant == "epoch" else 5))
|
566 |
ax.set_title(f"Performance Analysis Across {'Checkpoints' if variant == 'epoch' else 'Shots'} for the {model_name} Model")
|
|
|
518 |
)
|
519 |
perf_df = perf_df.iloc[:min_length]
|
520 |
|
|
|
521 |
ax.plot(
|
522 |
+
perf_df[variant], perf_df["f1"], marker="s", label="F1", color="b"
|
|
|
|
|
|
|
523 |
) # Square marker for F1 Score
|
524 |
+
ax.plot(
|
525 |
+
perf_df[variant], perf_df["ratio_valid_classifications"], marker="o", label="VCR", color="r"
|
526 |
+
)
|
527 |
|
528 |
# Add values on top of points
|
529 |
for i in range(min_length):
|
530 |
print(f"{perf_df[variant].iloc[i]}: {perf_df['run'].iloc[i]}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
ax.annotate(
|
532 |
f"{perf_df['f1'].iloc[i]*100:.2f}%",
|
533 |
(perf_df[variant].iloc[i], perf_df["f1"].iloc[i]),
|
|
|
538 |
fontsize=10,
|
539 |
color="b",
|
540 |
)
|
541 |
+
ax.annotate(
|
542 |
+
f"{perf_df['ratio_valid_classifications'].iloc[i]*100:.2f}%",
|
543 |
+
(perf_df[variant].iloc[i], perf_df["ratio_valid_classifications"].iloc[i]),
|
544 |
+
ha="center",
|
545 |
+
va="bottom", # Move accuracy numbers below the points
|
546 |
+
xytext=(0, 5),
|
547 |
+
textcoords="offset points",
|
548 |
+
fontsize=10,
|
549 |
+
color="r",
|
550 |
+
)
|
551 |
|
552 |
# Set y-axis limit
|
553 |
ylimits = ax.get_ylim()
|
|
|
559 |
if variant == "epoch"
|
560 |
else "Number of Shots"
|
561 |
)
|
562 |
+
ax.set_ylabel("Valid Classification Ratio (VCR) and F1 Score")
|
563 |
|
564 |
ax.xaxis.set_major_locator(MultipleLocator(0.2 if variant == "epoch" else 5))
|
565 |
ax.set_title(f"Performance Analysis Across {'Checkpoints' if variant == 'epoch' else 'Shots'} for the {model_name} Model")
|
logs/{Qwen2.5-72B-shots.txt → Qwen2-72B-shots.txt}
RENAMED
File without changes
|
notebooks/00_Data Analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41c186e8985991dc385de77805508e5e588926f797cf30047be750fec3eef330
|
3 |
+
size 1240375
|
notebooks/01a_internlm2_5-20b-chat_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0cea5cdad0ca0abe4b744b55b5eb5d578f7e517323228aeb37281265a535d9d
|
3 |
+
size 6378310
|
notebooks/01a_internlm2_5-7b-chat-1m_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e227a94216b212a5cfd4a4a9b71f546d3f1002669264a1a82205d09070a91945
|
3 |
+
size 2366870
|
notebooks/01a_internlm2_5-7b-chat_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:160bb2fe298b5f9dfd5f63f2e70e5f95849df97feb00a8da00fd1ab3e320c529
|
3 |
+
size 6092005
|
notebooks/01b_Mistral-7B-v0.3-Chinese-Chat_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f6addfbd7ca2686025b0a37ca511f29c4b6373d19965e5450edac30e62616ee
|
3 |
+
size 14736032
|
notebooks/02a_Qwen2-7B-Instruct_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f344c1583e389861aa33f01884078e2a1779e36e41b775505c3a669da84040b
|
3 |
+
size 3426960
|
notebooks/02b_Qwen2-72B-Instruct_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22e99daa352a629d6d8b591032b94f12a0d66c11e3b04f6744423e7d72fbf039
|
3 |
+
size 2425374
|
notebooks/02c_Qwen2.5-3B-Instruct_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb7915eae3b9005684a4ecc9c60fcd6880eee21ee4f21123ef8f2d43c4c1612a
|
3 |
+
size 7185712
|
notebooks/02d_Qwen2.5-7B-Instruct_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa59b9e345312b8ba34a7379c49c82e0f8d05c34f322b65cc754c320007748fa
|
3 |
+
size 7862529
|
notebooks/02g_Qwen2.5-72B-Instruct_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41817d8ed0faae7016ee54185d22b1a0ba9f766373ea2ea589306ef19bd25513
|
3 |
+
size 1637565
|
notebooks/03a_Llama3.1-8B-Chinese-Chat_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14fd3f1c5211d9c791f276f760c1171e29e2110a965c64eeb34ad4e5c680aa43
|
3 |
+
size 6542205
|
notebooks/03b_Llama3.1-70B-Chinese-Chat_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:163c79dd0f5dd1e1112fe00626f3bee46bd0a1df12ecc551579c01ac3c33d929
|
3 |
+
size 2035718
|
notebooks/04b_OpenAI-Models_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fae89e9c9fdd3d2c744c5aee8f4b06d303473cd439c050c660f0bdc9a5d6d62c
|
3 |
+
size 4815051
|