adds chat template and update README
Browse files- README.md +8 -2
- tokenizer_config.json +2 -1
README.md
CHANGED
@@ -12,6 +12,9 @@ co2_eq_emissions:
|
|
12 |
training_type: "pre-training"
|
13 |
geographical_location: "Washington, USA"
|
14 |
hardware_used: "8x NVIDIA H100 HBM"
|
|
|
|
|
|
|
15 |
---
|
16 |
# ClimateGPT-70B
|
17 |
|
@@ -34,7 +37,7 @@ Explore the model lineage [here](https://huggingface.co/spaces/EQTYLab/lineage-e
|
|
34 |
- **Context length:** 4K tokens
|
35 |
- **Input:** Text-only data
|
36 |
- **Output:** Model generates text only
|
37 |
-
- **Paper:** [
|
38 |
- **Website:** [eci.io](https://eci.io)
|
39 |
|
40 |
## Uses
|
@@ -73,7 +76,7 @@ The model was trained using ChatML so the following format should be followed wh
|
|
73 |
|
74 |
## Evaluation
|
75 |
|
76 |
-
Detailed evaluation results are presented in our [paper](https://
|
77 |
|
78 |
## Environmental Impact
|
79 |
- **Hardware Type:** 8x NVIDIA H100 HBM
|
@@ -93,5 +96,8 @@ If you find ClimateGPT is useful in your work, please cite it with:
|
|
93 |
title={ClimateGPT: Towards AI Synthesizing Interdisciplinary Research on Climate Change},
|
94 |
author={David Thulke and Yingbo Gao and Petrus Pelser and Rein Brune and Rricha Jalota and Floris Fok and Michael Ramos and Ian van Wyk and Abdallah Nasir and Hayden Goldstein and Taylor Tragemann and Katie Nguyen and Ariana Fowler and Andrew Stanco and Jon Gabriel and Jordan Taylor and Dean Moro and Evgenii Tsymbalov and Juliette de Waal and Evgeny Matusov and Mudar Yaghi and Mohammad Shihadah and Hermann Ney and Christian Dugast and Jonathan Dotan and Daniel Erasmus},
|
95 |
year={2024},
|
|
|
|
|
|
|
96 |
}
|
97 |
```
|
|
|
12 |
training_type: "pre-training"
|
13 |
geographical_location: "Washington, USA"
|
14 |
hardware_used: "8x NVIDIA H100 HBM"
|
15 |
+
license: other
|
16 |
+
license_name: climategpt-community-license
|
17 |
+
license_link: https://huggingface.co/eci-io/climategpt-70b/blob/main/LICENSE.txt
|
18 |
---
|
19 |
# ClimateGPT-70B
|
20 |
|
|
|
37 |
- **Context length:** 4K tokens
|
38 |
- **Input:** Text-only data
|
39 |
- **Output:** Model generates text only
|
40 |
+
- **Paper:** [arXiv:2401.09646](https://arxiv.org/abs/2401.09646)
|
41 |
- **Website:** [eci.io](https://eci.io)
|
42 |
|
43 |
## Uses
|
|
|
76 |
|
77 |
## Evaluation
|
78 |
|
79 |
+
Detailed evaluation results are presented in our [paper](https://arxiv.org/abs/2401.09646) on our model card website: [eci.io/model-card](https://eci.io/model-card)
|
80 |
|
81 |
## Environmental Impact
|
82 |
- **Hardware Type:** 8x NVIDIA H100 HBM
|
|
|
96 |
title={ClimateGPT: Towards AI Synthesizing Interdisciplinary Research on Climate Change},
|
97 |
author={David Thulke and Yingbo Gao and Petrus Pelser and Rein Brune and Rricha Jalota and Floris Fok and Michael Ramos and Ian van Wyk and Abdallah Nasir and Hayden Goldstein and Taylor Tragemann and Katie Nguyen and Ariana Fowler and Andrew Stanco and Jon Gabriel and Jordan Taylor and Dean Moro and Evgenii Tsymbalov and Juliette de Waal and Evgeny Matusov and Mudar Yaghi and Mohammad Shihadah and Hermann Ney and Christian Dugast and Jonathan Dotan and Daniel Erasmus},
|
98 |
year={2024},
|
99 |
+
eprint={2401.09646},
|
100 |
+
archivePrefix={arXiv},
|
101 |
+
primaryClass={cs.LG}
|
102 |
}
|
103 |
```
|
tokenizer_config.json
CHANGED
@@ -145,5 +145,6 @@
|
|
145 |
"spaces_between_special_tokens": false,
|
146 |
"tokenizer_class": "LlamaTokenizer",
|
147 |
"unk_token": "<unk>",
|
148 |
-
"use_default_system_prompt": false
|
|
|
149 |
}
|
|
|
145 |
"spaces_between_special_tokens": false,
|
146 |
"tokenizer_class": "LlamaTokenizer",
|
147 |
"unk_token": "<unk>",
|
148 |
+
"use_default_system_prompt": false,
|
149 |
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
|
150 |
}
|