chiliu
commited on
Commit
•
e54792e
1
Parent(s):
5a5269f
add reference
Browse files- README.md +13 -33
- adapter_config.json +21 -0
README.md
CHANGED
@@ -186,39 +186,19 @@ LlamaForCausalLM(
|
|
186 |
)
|
187 |
```
|
188 |
|
189 |
-
##
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
| arc_easy/acc | **0.71** | 0.69 |
|
203 |
-
| arc_easy/acc_norm | 0.65 | 0.65 |
|
204 |
-
| boolq/acc | **0.72** | 0.66 |
|
205 |
-
| hellaswag/acc | **0.49** | 0.43 |
|
206 |
-
| hellaswag/acc_norm | 0.66 | **0.67** |
|
207 |
-
| openbookqa/acc | 0.26 | **0.27** |
|
208 |
-
| openbookqa/acc_norm | 0.40 | 0.40 |
|
209 |
-
| piqa/acc | **0.76** | 0.75 |
|
210 |
-
| piqa/acc_norm | 0.76 | 0.76 |
|
211 |
-
| record/em | 0.88 | 0.88 |
|
212 |
-
| record/f1 | 0.88 | **0.89** |
|
213 |
-
| rte/acc | 0.55 | **0.58** |
|
214 |
-
| truthfulqa_mc/mc1 | **0.27** | 0.22 |
|
215 |
-
| truthfulqa_mc/mc2 | **0.37** | 0.35 |
|
216 |
-
| wic/acc | **0.49** | 0.48 |
|
217 |
-
| winogrande/acc | **0.63** | 0.62 |
|
218 |
-
| Average | **0.53** | 0.52 |
|
219 |
-
|
220 |
-
|
221 |
-
We removed the task CB and WSC from our benchmark, as our model performs suspiciously well on these two tasks. We hypothesize that there could be a benchmark data contamination in the training set.
|
222 |
|
223 |
## Disclaimer
|
224 |
|
|
|
186 |
)
|
187 |
```
|
188 |
|
189 |
+
## Citation
|
190 |
+
|
191 |
+
If this work is helpful, please kindly cite as:
|
192 |
+
|
193 |
+
```bibtex
|
194 |
+
@Misc{mamba-gpt-3b-v2,
|
195 |
+
title = {Mamba-GPT-3b-v2},
|
196 |
+
author = {chiliu},
|
197 |
+
howpublished = {\url{https://huggingface.co/CobraMamba/mamba-gpt-3b-v2}},
|
198 |
+
year = {2023}
|
199 |
+
}
|
200 |
+
```
|
201 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
|
203 |
## Disclaimer
|
204 |
|
adapter_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"auto_mapping": null,
|
3 |
+
"base_model_name_or_path": "openlm-research/open_llama_3b_v2",
|
4 |
+
"bias": "none",
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"layers_pattern": null,
|
9 |
+
"layers_to_transform": null,
|
10 |
+
"lora_alpha": 16,
|
11 |
+
"lora_dropout": 0.1,
|
12 |
+
"modules_to_save": null,
|
13 |
+
"peft_type": "LORA",
|
14 |
+
"r": 256,
|
15 |
+
"revision": null,
|
16 |
+
"target_modules": [
|
17 |
+
"q_proj",
|
18 |
+
"v_proj"
|
19 |
+
],
|
20 |
+
"task_type": "CAUSAL_LM"
|
21 |
+
}
|