Text Generation
Transformers
PyTorch
English
crystalcoder
llm
code
custom_code
Eval Results
hunterhector commited on
Commit
5e89f20
1 Parent(s): e3e4276

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +100 -101
README.md CHANGED
@@ -27,152 +27,151 @@ datasets:
27
  - nickrosh/Evol-Instruct-Code-80k-v1
28
  - open-phi/textbooks
29
  - open-phi/programming_books_llama
30
-
31
  model-index:
32
  - name: CrystalChat
33
  results:
34
  - task:
35
- type: text-generation # Required. Example: automatic-speech-recognition
36
  dataset:
37
- type: openai_humanneval # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
38
- name: OpenAI HumanEval # Required. A pretty name for the dataset. Example: Common Voice (French)
39
  metrics:
40
- - name: pass@1 (t=0.01)
41
- type: pass@1 # Required. Example: wer. Use metric id from https://hf.co/metrics
42
- value: 31.707 # Required. Example: 41.148
43
- - name: pass@10 (t=0.8)
44
- type: pass@10
45
- value: 65.755
46
  - task:
47
- type: text-generation # Required. Example: automatic-speech-recognition
48
  dataset:
49
- type: mbpp # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
50
- name: Mostly Basic Python Problems (mbpp) # Required. A pretty name for the dataset. Example: Common Voice (French)
51
  metrics:
52
- - name: pass@1 (t=0.01)
53
- type: pass@1 # Required. Example: wer. Use metric id from https://hf.co/metrics
54
- value: 39.4 # Required. Example: 41.148
55
- - name: pass@10 (t=0.8)
56
- type: pass@10
57
- value: 59.895
58
-
59
  - task:
60
- type: multiple-choice # Required. Example: automatic-speech-recognition
61
  dataset:
62
- type: race # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
63
- name: RACE # Required. A pretty name for the dataset. Example: Common Voice (French)
64
  metrics:
65
- - name: accuracy
66
- type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
67
- value: 41.148 # Required. Example: 41.148
68
  - task:
69
- type: multiple-choice # Required. Example: automatic-speech-recognition
70
  dataset:
71
- type: mmlu # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
72
- name: Measuring Massive Multitask Language Understanding (MMLU) # Required. A pretty name for the dataset. Example: Common Voice (French)
73
  metrics:
74
- - name: accuracy
75
- type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
76
- value: 52.789 # Required. Example: 41.148
77
  - task:
78
- type: multiple-choice # Required. Example: automatic-speech-recognition
79
  dataset:
80
- type: truthful_qa # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
81
- name: Truthful QA # Required. A pretty name for the dataset. Example: Common Voice (French)
82
  metrics:
83
- - name: accuracy
84
- type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
85
- value: 47.29 # Required. Example: 41.148
86
  - task:
87
- type: multiple-choice # Required. Example: automatic-speech-recognition
88
  dataset:
89
- type: winogrande # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
90
- name: Winogrande # Required. A pretty name for the dataset. Example: Common Voice (French)
91
  metrics:
92
- - name: accuracy (5 shot)
93
- type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
94
- value: 70.639 # Required. Example: 41.148
95
- - name: accuracy (0 shot)
96
- type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
97
- value: 68.114 # Required. Example: 41.148
98
  - task:
99
- type: multiple-choice # Required. Example: automatic-speech-recognition
100
  dataset:
101
- type: copa # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
102
- name: COPA # Required. A pretty name for the dataset. Example: Common Voice (French)
103
  metrics:
104
- - name: accuracy
105
- type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
106
- value: 85 # Required. Example: 41.148
107
  - task:
108
- type: text-classification # Required. Example: automatic-speech-recognition
109
  dataset:
110
- type: boolq # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
111
- name: Boolq # Required. A pretty name for the dataset. Example: Common Voice (French)
112
  metrics:
113
- - name: accuracy
114
- type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
115
- value: 82.783 # Required. Example: 41.148
116
  - task:
117
- type: question-answering # Required. Example: automatic-speech-recognition
118
  dataset:
119
- type: openbookqa # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
120
- name: Openbook QA # Required. A pretty name for the dataset. Example: Common Voice (French)
121
  metrics:
122
- - name: accuracy
123
- type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
124
- value: 42 # Required. Example: 41.148
125
  - task:
126
- type: multiple-choice # Required. Example: automatic-speech-recognition
127
  dataset:
128
- type: hellaSwag # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
129
- name: HellaSwag # Required. A pretty name for the dataset. Example: Common Voice (French)
130
  metrics:
131
- - name: accuracy (10-shot)
132
- type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
133
- value: 76.12 # Required. Example: 41.148
134
- - name: accuracy (0-shot)
135
- type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
136
- value: 73.312 # Required. Example: 41.148
137
  - task:
138
- type: question-answering # Required. Example: automatic-speech-recognition
139
  dataset:
140
- type: piqa # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
141
- name: PIQA # Required. A pretty name for the dataset. Example: Common Voice (French)
142
  metrics:
143
- - name: accuracy
144
- type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
145
- value: 77.856 # Required. Example: 41.148
146
  - task:
147
- type: question-answering # Required. Example: automatic-speech-recognition
148
  dataset:
149
- type: ai2_arc # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
150
- name: ARC (Easy) # Required. A pretty name for the dataset. Example: Common Voice (French)
151
  metrics:
152
- - name: accuracy
153
- type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
154
- value: 70.328 # Required. Example: 41.148
155
  - task:
156
- type: question-answering # Required. Example: automatic-speech-recognition
157
  dataset:
158
- type: ai2_arc # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
159
- name: ARC (Challenge) # Required. A pretty name for the dataset. Example: Common Voice (French)
160
  metrics:
161
- - name: accuracy (25-shot)
162
- type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
163
- value: 51.706 # Required. Example: 41.148
164
- - name: accuracy (0-shot)
165
- type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
166
- value: 44.625 # Required. Example: 41.148
167
  - task:
168
- type: text-generation # Required. Example: automatic-speech-recognition
169
  dataset:
170
- type: gsm8k # Required. Example: common_voice. Use dataset id from https://hf.co/datasets
171
- name: GSM8K (Grade School Math 8K) # Required. A pretty name for the dataset. Example: Common Voice (French)
172
  metrics:
173
- - name: Accuracy (5 shot)
174
- type: accuracy # Required. Example: wer. Use metric id from https://hf.co/metrics
175
- value: 28.052 # Required. Example: 41.148
176
  ---
177
 
178
  # CrystalChat
 
27
  - nickrosh/Evol-Instruct-Code-80k-v1
28
  - open-phi/textbooks
29
  - open-phi/programming_books_llama
30
+ - LLM360/CrystalCoderDatasets
31
  model-index:
32
  - name: CrystalChat
33
  results:
34
  - task:
35
+ type: text-generation
36
  dataset:
37
+ type: openai_humanneval
38
+ name: OpenAI HumanEval
39
  metrics:
40
+ - name: pass@1 (t=0.01)
41
+ type: pass@1
42
+ value: 31.707
43
+ - name: pass@10 (t=0.8)
44
+ type: pass@10
45
+ value: 65.755
46
  - task:
47
+ type: text-generation
48
  dataset:
49
+ type: mbpp
50
+ name: Mostly Basic Python Problems (mbpp)
51
  metrics:
52
+ - name: pass@1 (t=0.01)
53
+ type: pass@1
54
+ value: 39.4
55
+ - name: pass@10 (t=0.8)
56
+ type: pass@10
57
+ value: 59.895
 
58
  - task:
59
+ type: multiple-choice
60
  dataset:
61
+ type: race
62
+ name: RACE
63
  metrics:
64
+ - name: accuracy
65
+ type: accuracy
66
+ value: 41.148
67
  - task:
68
+ type: multiple-choice
69
  dataset:
70
+ type: mmlu
71
+ name: Measuring Massive Multitask Language Understanding (MMLU)
72
  metrics:
73
+ - name: accuracy
74
+ type: accuracy
75
+ value: 52.789
76
  - task:
77
+ type: multiple-choice
78
  dataset:
79
+ type: truthful_qa
80
+ name: Truthful QA
81
  metrics:
82
+ - name: accuracy
83
+ type: accuracy
84
+ value: 47.29
85
  - task:
86
+ type: multiple-choice
87
  dataset:
88
+ type: winogrande
89
+ name: Winogrande
90
  metrics:
91
+ - name: accuracy (5 shot)
92
+ type: accuracy
93
+ value: 70.639
94
+ - name: accuracy (0 shot)
95
+ type: accuracy
96
+ value: 68.114
97
  - task:
98
+ type: multiple-choice
99
  dataset:
100
+ type: copa
101
+ name: COPA
102
  metrics:
103
+ - name: accuracy
104
+ type: accuracy
105
+ value: 85
106
  - task:
107
+ type: text-classification
108
  dataset:
109
+ type: boolq
110
+ name: Boolq
111
  metrics:
112
+ - name: accuracy
113
+ type: accuracy
114
+ value: 82.783
115
  - task:
116
+ type: question-answering
117
  dataset:
118
+ type: openbookqa
119
+ name: Openbook QA
120
  metrics:
121
+ - name: accuracy
122
+ type: accuracy
123
+ value: 42
124
  - task:
125
+ type: multiple-choice
126
  dataset:
127
+ type: hellaSwag
128
+ name: HellaSwag
129
  metrics:
130
+ - name: accuracy (10-shot)
131
+ type: accuracy
132
+ value: 76.12
133
+ - name: accuracy (0-shot)
134
+ type: accuracy
135
+ value: 73.312
136
  - task:
137
+ type: question-answering
138
  dataset:
139
+ type: piqa
140
+ name: PIQA
141
  metrics:
142
+ - name: accuracy
143
+ type: accuracy
144
+ value: 77.856
145
  - task:
146
+ type: question-answering
147
  dataset:
148
+ type: ai2_arc
149
+ name: ARC (Easy)
150
  metrics:
151
+ - name: accuracy
152
+ type: accuracy
153
+ value: 70.328
154
  - task:
155
+ type: question-answering
156
  dataset:
157
+ type: ai2_arc
158
+ name: ARC (Challenge)
159
  metrics:
160
+ - name: accuracy (25-shot)
161
+ type: accuracy
162
+ value: 51.706
163
+ - name: accuracy (0-shot)
164
+ type: accuracy
165
+ value: 44.625
166
  - task:
167
+ type: text-generation
168
  dataset:
169
+ type: gsm8k
170
+ name: GSM8K (Grade School Math 8K)
171
  metrics:
172
+ - name: Accuracy (5 shot)
173
+ type: accuracy
174
+ value: 28.052
175
  ---
176
 
177
  # CrystalChat