Bram Vanroy commited on
Commit
b1e6575
·
1 Parent(s): 08f6f54

fix retention of fields

Browse files
Files changed (2) hide show
  1. evals/models.json +45 -21
  2. generate_overview_json.py +5 -0
evals/models.json CHANGED
@@ -3,96 +3,120 @@
3
  "compute_dtype": "bfloat16",
4
  "model_name": "yhavinga/gpt-neo-1.3B-dutch",
5
  "num_parameters": 1315575808,
6
- "quantization": "8-bit"
 
 
7
  },
8
  "gpt-neo-125m-dutch": {
9
  "compute_dtype": "bfloat16",
10
  "model_name": "yhavinga/gpt-neo-125M-dutch",
11
  "num_parameters": 125198592,
12
- "quantization": "8-bit"
 
 
13
  },
14
  "gpt2-large-dutch": {
15
  "compute_dtype": "bfloat16",
16
  "model_name": "yhavinga/gpt2-large-dutch",
17
  "num_parameters": 774030080,
18
- "quantization": "8-bit"
 
 
19
  },
20
  "gpt2-medium-dutch": {
21
  "compute_dtype": "bfloat16",
22
  "model_name": "yhavinga/gpt2-medium-dutch",
23
  "num_parameters": 354823168,
24
- "quantization": "8-bit"
 
 
25
  },
26
  "llama-2-13b-chat-dutch": {
27
  "compute_dtype": "bfloat16",
28
  "model_name": "BramVanroy/Llama-2-13b-chat-dutch",
29
  "num_parameters": 13015864320,
30
- "quantization": "8-bit"
 
 
31
  },
32
  "llama-2-13b-chat-hf": {
33
  "compute_dtype": "bfloat16",
34
  "model_name": "meta-llama/Llama-2-13b-chat-hf",
35
  "num_parameters": 13015864320,
36
- "quantization": "8-bit"
 
 
37
  },
38
  "llama-2-13b-hf": {
39
  "compute_dtype": "bfloat16",
40
  "model_name": "meta-llama/Llama-2-13b-hf",
41
  "num_parameters": 13015864320,
42
- "quantization": "8-bit"
 
 
43
  },
44
  "llama-2-7b-chat-hf": {
45
  "compute_dtype": "bfloat16",
46
  "model_name": "meta-llama/Llama-2-7b-chat-hf",
47
  "num_parameters": 6738415616,
48
- "quantization": "8-bit"
 
 
49
  },
50
  "llama-2-7b-hf": {
51
  "compute_dtype": "bfloat16",
52
  "model_name": "meta-llama/Llama-2-7b-hf",
53
  "num_parameters": 6738415616,
54
- "quantization": "8-bit"
 
 
55
  },
56
  "llama2-13b-ft-mc4_nl_cleaned_tiny": {
57
  "compute_dtype": "bfloat16",
58
  "model_name": "BramVanroy/llama2-13b-ft-mc4_nl_cleaned_tiny",
59
  "num_parameters": 13015864320,
60
- "quantization": "8-bit"
 
 
61
  },
62
  "mistral-7b-v0.1": {
63
  "compute_dtype": "bfloat16",
64
  "model_name": "mistralai/Mistral-7B-v0.1",
65
  "num_parameters": 7241732096,
66
- "quantization": "8-bit"
67
- },
68
- "mixtral-8x7b-v0.1": {
69
- "compute_dtype": "auto",
70
- "model_name": "mistralai/Mixtral-8x7B-v0.1",
71
- "num_parameters": 46702792704,
72
- "quantization": null
73
  },
74
  "neural-chat-7b-v3-1": {
75
  "compute_dtype": "bfloat16",
76
  "model_name": "Intel/neural-chat-7b-v3-1",
77
  "num_parameters": 7241732096,
78
- "quantization": "8-bit"
 
 
79
  },
80
  "orca-2-13b": {
81
  "compute_dtype": "bfloat16",
82
  "model_name": "microsoft/Orca-2-13b",
83
  "num_parameters": 13015895040,
84
- "quantization": "8-bit"
 
 
85
  },
86
  "orca-2-7b": {
87
  "compute_dtype": "bfloat16",
88
  "model_name": "microsoft/Orca-2-7b",
89
  "num_parameters": 6738440192,
90
- "quantization": "8-bit"
 
 
91
  },
92
  "zephyr-7b-beta": {
93
  "compute_dtype": "bfloat16",
94
  "model_name": "HuggingFaceH4/zephyr-7b-beta",
95
  "num_parameters": 7241732096,
96
- "quantization": "8-bit"
 
 
97
  }
98
  }
 
3
  "compute_dtype": "bfloat16",
4
  "model_name": "yhavinga/gpt-neo-1.3B-dutch",
5
  "num_parameters": 1315575808,
6
+ "quantization": "8-bit",
7
+ "model_type": "pretrained",
8
+ "dutch_coverage": "pretrained"
9
  },
10
  "gpt-neo-125m-dutch": {
11
  "compute_dtype": "bfloat16",
12
  "model_name": "yhavinga/gpt-neo-125M-dutch",
13
  "num_parameters": 125198592,
14
+ "quantization": "8-bit",
15
+ "model_type": "pretrained",
16
+ "dutch_coverage": "pretrained"
17
  },
18
  "gpt2-large-dutch": {
19
  "compute_dtype": "bfloat16",
20
  "model_name": "yhavinga/gpt2-large-dutch",
21
  "num_parameters": 774030080,
22
+ "quantization": "8-bit",
23
+ "model_type": "pretrained",
24
+ "dutch_coverage": "pretrained"
25
  },
26
  "gpt2-medium-dutch": {
27
  "compute_dtype": "bfloat16",
28
  "model_name": "yhavinga/gpt2-medium-dutch",
29
  "num_parameters": 354823168,
30
+ "quantization": "8-bit",
31
+ "model_type": "pretrained",
32
+ "dutch_coverage": "pretrained"
33
  },
34
  "llama-2-13b-chat-dutch": {
35
  "compute_dtype": "bfloat16",
36
  "model_name": "BramVanroy/Llama-2-13b-chat-dutch",
37
  "num_parameters": 13015864320,
38
+ "quantization": "8-bit",
39
+ "model_type": "instruction-tuned",
40
+ "dutch_coverage": "fine-tuned"
41
  },
42
  "llama-2-13b-chat-hf": {
43
  "compute_dtype": "bfloat16",
44
  "model_name": "meta-llama/Llama-2-13b-chat-hf",
45
  "num_parameters": 13015864320,
46
+ "quantization": "8-bit",
47
+ "model_type": "instruction-tuned",
48
+ "dutch_coverage": "none"
49
  },
50
  "llama-2-13b-hf": {
51
  "compute_dtype": "bfloat16",
52
  "model_name": "meta-llama/Llama-2-13b-hf",
53
  "num_parameters": 13015864320,
54
+ "quantization": "8-bit",
55
+ "model_type": "pretrained",
56
+ "dutch_coverage": "none"
57
  },
58
  "llama-2-7b-chat-hf": {
59
  "compute_dtype": "bfloat16",
60
  "model_name": "meta-llama/Llama-2-7b-chat-hf",
61
  "num_parameters": 6738415616,
62
+ "quantization": "8-bit",
63
+ "model_type": "instruction-tuned",
64
+ "dutch_coverage": "none"
65
  },
66
  "llama-2-7b-hf": {
67
  "compute_dtype": "bfloat16",
68
  "model_name": "meta-llama/Llama-2-7b-hf",
69
  "num_parameters": 6738415616,
70
+ "quantization": "8-bit",
71
+ "model_type": "pretrained",
72
+ "dutch_coverage": "none"
73
  },
74
  "llama2-13b-ft-mc4_nl_cleaned_tiny": {
75
  "compute_dtype": "bfloat16",
76
  "model_name": "BramVanroy/llama2-13b-ft-mc4_nl_cleaned_tiny",
77
  "num_parameters": 13015864320,
78
+ "quantization": "8-bit",
79
+ "model_type": "fine-tuned",
80
+ "dutch_coverage": "fine-tuned"
81
  },
82
  "mistral-7b-v0.1": {
83
  "compute_dtype": "bfloat16",
84
  "model_name": "mistralai/Mistral-7B-v0.1",
85
  "num_parameters": 7241732096,
86
+ "quantization": "8-bit",
87
+ "model_type": "pretrained",
88
+ "dutch_coverage": "none"
 
 
 
 
89
  },
90
  "neural-chat-7b-v3-1": {
91
  "compute_dtype": "bfloat16",
92
  "model_name": "Intel/neural-chat-7b-v3-1",
93
  "num_parameters": 7241732096,
94
+ "quantization": "8-bit",
95
+ "model_type": "RL-tuned",
96
+ "dutch_coverage": "none"
97
  },
98
  "orca-2-13b": {
99
  "compute_dtype": "bfloat16",
100
  "model_name": "microsoft/Orca-2-13b",
101
  "num_parameters": 13015895040,
102
+ "quantization": "8-bit",
103
+ "model_type": "fine-tuned",
104
+ "dutch_coverage": "none"
105
  },
106
  "orca-2-7b": {
107
  "compute_dtype": "bfloat16",
108
  "model_name": "microsoft/Orca-2-7b",
109
  "num_parameters": 6738440192,
110
+ "quantization": "8-bit",
111
+ "model_type": "fine-tuned",
112
+ "dutch_coverage": "none"
113
  },
114
  "zephyr-7b-beta": {
115
  "compute_dtype": "bfloat16",
116
  "model_name": "HuggingFaceH4/zephyr-7b-beta",
117
  "num_parameters": 7241732096,
118
+ "quantization": "8-bit",
119
+ "model_type": "RL-tuned",
120
+ "dutch_coverage": "none"
121
  }
122
  }
generate_overview_json.py CHANGED
@@ -37,6 +37,11 @@ def main():
37
  "num_parameters": results[short_name]["num_parameters"]
38
  if short_name in results and "num_parameters" in results[short_name]
39
  else get_num_parameters(model_args["pretrained"]),
 
 
 
 
 
40
  }
41
 
42
  if "load_in_8bit" in model_args:
 
37
  "num_parameters": results[short_name]["num_parameters"]
38
  if short_name in results and "num_parameters" in results[short_name]
39
  else get_num_parameters(model_args["pretrained"]),
40
+ "model_type": results[short_name]["model_type"]
41
+ if short_name in results and "model_type" in results[short_name]
42
+ else "not-given",
43
+ "dutch_coverage": results[short_name]["dutch_coverage"] if short_name in results and "dutch_coverage" in results[short_name]
44
+ else "not-given",
45
  }
46
 
47
  if "load_in_8bit" in model_args: