Bram Vanroy commited on
Commit
2686c5b
1 Parent(s): c4c7f48

add models.json

Browse files
Files changed (1) hide show
  1. evals/models.json +142 -68
evals/models.json CHANGED
@@ -1,70 +1,144 @@
1
  {
2
- "bloom-7b1": {
3
- "model_name": "pretrained=bigscience/bloom-7b1",
4
- "args": "pretrained=bigscience/bloom-7b1"
5
- },
6
- "gpt-neo-1.3b-dutch": {
7
- "model_name": "yhavinga/gpt-neo-1.3B-dutch",
8
- "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
9
- },
10
- "gpt-neo-125m-dutch": {
11
- "model_name": "yhavinga/gpt-neo-125M-dutch",
12
- "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
13
- },
14
- "gpt2-large-dutch": {
15
- "model_name": "yhavinga/gpt2-large-dutch",
16
- "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
17
- },
18
- "gpt2-medium-dutch": {
19
- "model_name": "yhavinga/gpt2-medium-dutch",
20
- "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
21
- },
22
- "llama-2-13b-chat-dutch": {
23
- "model_name": "BramVanroy/Llama-2-13b-chat-dutch",
24
- "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
25
- },
26
- "llama-2-13b-chat-hf": {
27
- "model_name": "meta-llama/Llama-2-13b-chat-hf",
28
- "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
29
- },
30
- "llama-2-13b-hf": {
31
- "model_name": "meta-llama/Llama-2-13b-hf",
32
- "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
33
- },
34
- "llama-2-7b-chat-hf": {
35
- "model_name": "meta-llama/Llama-2-7b-chat-hf",
36
- "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
37
- },
38
- "llama-2-7b-hf": {
39
- "model_name": "meta-llama/Llama-2-7b-hf",
40
- "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
41
- },
42
- "llama-7b": {
43
- "model_name": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
44
- "args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B"
45
- },
46
- "llama2-13b-ft-mc4_nl_cleaned_tiny": {
47
- "model_name": "BramVanroy/llama2-13b-ft-mc4_nl_cleaned_tiny",
48
- "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
49
- },
50
- "mistral-7b-v0.1": {
51
- "model_name": "mistralai/Mistral-7B-v0.1",
52
- "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
53
- },
54
- "neural-chat-7b-v3-1": {
55
- "model_name": "Intel/neural-chat-7b-v3-1",
56
- "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
57
- },
58
- "orca-2-13b": {
59
- "model_name": "microsoft/Orca-2-13b",
60
- "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
61
- },
62
- "orca-2-7b": {
63
- "model_name": "microsoft/Orca-2-7b",
64
- "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
65
- },
66
- "zephyr-7b-beta": {
67
- "model_name": "HuggingFaceH4/zephyr-7b-beta",
68
- "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True"
69
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  }
 
1
  {
2
+ "Llama-2-13b-chat-dutch": {
3
+ "compute_dtype": "bfloat16",
4
+ "model_name": "BramVanroy/Llama-2-13b-chat-dutch",
5
+ "num_parameters": 13015864320,
6
+ "quantization": "8-bit"
7
+ },
8
+ "Llama-2-13b-chat-hf": {
9
+ "compute_dtype": "bfloat16",
10
+ "model_name": "meta-llama/Llama-2-13b-chat-hf",
11
+ "num_parameters": 13015864320,
12
+ "quantization": "8-bit"
13
+ },
14
+ "Llama-2-13b-hf": {
15
+ "compute_dtype": "bfloat16",
16
+ "model_name": "meta-llama/Llama-2-13b-hf",
17
+ "num_parameters": 13015864320,
18
+ "quantization": "8-bit"
19
+ },
20
+ "Llama-2-7b-chat-hf": {
21
+ "compute_dtype": "bfloat16",
22
+ "model_name": "meta-llama/Llama-2-7b-chat-hf",
23
+ "num_parameters": 6738415616,
24
+ "quantization": "8-bit"
25
+ },
26
+ "Llama-2-7b-hf": {
27
+ "compute_dtype": "bfloat16",
28
+ "model_name": "meta-llama/Llama-2-7b-hf",
29
+ "num_parameters": 6738415616,
30
+ "quantization": "8-bit"
31
+ },
32
+ "Mistral-7B-v0.1": {
33
+ "compute_dtype": "bfloat16",
34
+ "model_name": "mistralai/Mistral-7B-v0.1",
35
+ "num_parameters": 7241732096,
36
+ "quantization": "8-bit"
37
+ },
38
+ "Orca-2-13b": {
39
+ "compute_dtype": "bfloat16",
40
+ "model_name": "microsoft/Orca-2-13b",
41
+ "num_parameters": 13015895040,
42
+ "quantization": "8-bit"
43
+ },
44
+ "Orca-2-7b": {
45
+ "compute_dtype": "bfloat16",
46
+ "model_name": "microsoft/Orca-2-7b",
47
+ "num_parameters": 6738440192,
48
+ "quantization": "8-bit"
49
+ },
50
+ "bloom-7b1": {
51
+ "args": "pretrained=bigscience/bloom-7b1",
52
+ "model_name": "pretrained=bigscience/bloom-7b1"
53
+ },
54
+ "gpt-neo-1.3B-dutch": {
55
+ "compute_dtype": "bfloat16",
56
+ "model_name": "yhavinga/gpt-neo-1.3B-dutch",
57
+ "num_parameters": 1315575808,
58
+ "quantization": "8-bit"
59
+ },
60
+ "gpt-neo-1.3b-dutch": {
61
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True",
62
+ "model_name": "yhavinga/gpt-neo-1.3B-dutch"
63
+ },
64
+ "gpt-neo-125M-dutch": {
65
+ "compute_dtype": "bfloat16",
66
+ "model_name": "yhavinga/gpt-neo-125M-dutch",
67
+ "num_parameters": 125198592,
68
+ "quantization": "8-bit"
69
+ },
70
+ "gpt-neo-125m-dutch": {
71
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True",
72
+ "model_name": "yhavinga/gpt-neo-125M-dutch"
73
+ },
74
+ "gpt2-large-dutch": {
75
+ "compute_dtype": "bfloat16",
76
+ "model_name": "yhavinga/gpt2-large-dutch",
77
+ "num_parameters": 774030080,
78
+ "quantization": "8-bit"
79
+ },
80
+ "gpt2-medium-dutch": {
81
+ "compute_dtype": "bfloat16",
82
+ "model_name": "yhavinga/gpt2-medium-dutch",
83
+ "num_parameters": 354823168,
84
+ "quantization": "8-bit"
85
+ },
86
+ "llama-2-13b-chat-dutch": {
87
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True",
88
+ "model_name": "BramVanroy/Llama-2-13b-chat-dutch"
89
+ },
90
+ "llama-2-13b-chat-hf": {
91
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True",
92
+ "model_name": "meta-llama/Llama-2-13b-chat-hf"
93
+ },
94
+ "llama-2-13b-hf": {
95
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True",
96
+ "model_name": "meta-llama/Llama-2-13b-hf"
97
+ },
98
+ "llama-2-7b-chat-hf": {
99
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True",
100
+ "model_name": "meta-llama/Llama-2-7b-chat-hf"
101
+ },
102
+ "llama-2-7b-hf": {
103
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True",
104
+ "model_name": "meta-llama/Llama-2-7b-hf"
105
+ },
106
+ "llama-7b": {
107
+ "args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B",
108
+ "model_name": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B"
109
+ },
110
+ "llama2-13b-ft-mc4": {
111
+ "compute_dtype": "bfloat16",
112
+ "model_name": "BramVanroy/llama2-13b-ft-mc4_nl_cleaned_tiny",
113
+ "num_parameters": 13015864320,
114
+ "quantization": "8-bit"
115
+ },
116
+ "llama2-13b-ft-mc4_nl_cleaned_tiny": {
117
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True",
118
+ "model_name": "BramVanroy/llama2-13b-ft-mc4_nl_cleaned_tiny"
119
+ },
120
+ "mistral-7b-v0.1": {
121
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True",
122
+ "model_name": "mistralai/Mistral-7B-v0.1"
123
+ },
124
+ "neural-chat-7b-v3-1": {
125
+ "compute_dtype": "bfloat16",
126
+ "model_name": "Intel/neural-chat-7b-v3-1",
127
+ "num_parameters": 7241732096,
128
+ "quantization": "8-bit"
129
+ },
130
+ "orca-2-13b": {
131
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True",
132
+ "model_name": "microsoft/Orca-2-13b"
133
+ },
134
+ "orca-2-7b": {
135
+ "args": "use_accelerate=True,device_map_option=auto,dtype=bfloat16,load_in_8bit=True",
136
+ "model_name": "microsoft/Orca-2-7b"
137
+ },
138
+ "zephyr-7b-beta": {
139
+ "compute_dtype": "bfloat16",
140
+ "model_name": "HuggingFaceH4/zephyr-7b-beta",
141
+ "num_parameters": 7241732096,
142
+ "quantization": "8-bit"
143
+ }
144
  }