Sandiago21
commited on
Commit
·
3a554d5
1
Parent(s):
3fd3337
update notebook and README.md
Browse files- README.md +23 -25
- notebooks/HuggingFace-Inference-Falcon.ipynb +3 -3
README.md
CHANGED
@@ -5,8 +5,8 @@ language:
|
|
5 |
library_name: transformers
|
6 |
pipeline_tag: text-generation
|
7 |
tags:
|
8 |
-
-
|
9 |
-
-
|
10 |
- prompt answering
|
11 |
- peft
|
12 |
---
|
@@ -88,9 +88,11 @@ Use the code below to get started with the model.
|
|
88 |
```python
|
89 |
import torch
|
90 |
from peft import PeftConfig, PeftModel
|
91 |
-
from transformers import GenerationConfig, AutoTokenizer, AutoModelForCausalLM
|
92 |
|
93 |
-
MODEL_NAME = "
|
|
|
|
|
94 |
|
95 |
compute_dtype = getattr(torch, "float16")
|
96 |
|
@@ -112,13 +114,13 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
|
112 |
|
113 |
model = PeftModel.from_pretrained(model, MODEL_NAME)
|
114 |
|
115 |
-
generation_config =
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
|
123 |
model.eval()
|
124 |
if torch.__version__ >= "2":
|
@@ -144,7 +146,7 @@ with torch.no_grad():
|
|
144 |
response = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
|
145 |
print(response)
|
146 |
|
147 |
-
>>> The capital city of Greece is Athens and it borders
|
148 |
```
|
149 |
|
150 |
2. You can also directly call the model from HuggingFace using the following code snippet:
|
@@ -152,7 +154,7 @@ print(response)
|
|
152 |
```python
|
153 |
import torch
|
154 |
from peft import PeftConfig, PeftModel
|
155 |
-
from transformers import GenerationConfig, AutoTokenizer, AutoModelForCausalLM
|
156 |
|
157 |
MODEL_NAME = "Sandiago21/falcon-7b-prompt-answering"
|
158 |
BASE_MODEL = "tiiuae/falcon-7b"
|
@@ -166,8 +168,6 @@ bnb_config = BitsAndBytesConfig(
|
|
166 |
bnb_4bit_use_double_quant=True,
|
167 |
)
|
168 |
|
169 |
-
MODEL_NAME = "Sandiago21/falcon-7b-prompt-answering"
|
170 |
-
|
171 |
model = AutoModelForCausalLM.from_pretrained(
|
172 |
BASE_MODEL,
|
173 |
quantization_config=bnb_config,
|
@@ -179,13 +179,13 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
|
179 |
|
180 |
model = PeftModel.from_pretrained(model, MODEL_NAME)
|
181 |
|
182 |
-
generation_config =
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
|
190 |
model.eval()
|
191 |
if torch.__version__ >= "2":
|
@@ -212,7 +212,7 @@ with torch.no_grad():
|
|
212 |
response = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
|
213 |
print(response)
|
214 |
|
215 |
-
>>> The capital city of Greece is Athens and it borders
|
216 |
```
|
217 |
|
218 |
## Training Details
|
@@ -245,12 +245,10 @@ The following hyperparameters were used during training:
|
|
245 |
|
246 |
The tiiuae/falcon-7b was finetuned on conversations and question answering data
|
247 |
|
248 |
-
|
249 |
### Training Procedure
|
250 |
|
251 |
The tiiuae/falcon-7b model was further trained and finetuned on question answering and prompts data for 1 epoch (approximately 10 hours of training on a single GPU)
|
252 |
|
253 |
-
|
254 |
## Model Architecture and Objective
|
255 |
|
256 |
The model is based on tiiuae/falcon-7b model and finetuned adapters on top of the main model on conversations and question answering data.
|
|
|
5 |
library_name: transformers
|
6 |
pipeline_tag: text-generation
|
7 |
tags:
|
8 |
+
- falcon
|
9 |
+
- falcon-7b
|
10 |
- prompt answering
|
11 |
- peft
|
12 |
---
|
|
|
88 |
```python
|
89 |
import torch
|
90 |
from peft import PeftConfig, PeftModel
|
91 |
+
from transformers import GenerationConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
92 |
|
93 |
+
MODEL_NAME = "."
|
94 |
+
|
95 |
+
config = PeftConfig.from_pretrained(MODEL_NAME)
|
96 |
|
97 |
compute_dtype = getattr(torch, "float16")
|
98 |
|
|
|
114 |
|
115 |
model = PeftModel.from_pretrained(model, MODEL_NAME)
|
116 |
|
117 |
+
generation_config = model.generation_config
|
118 |
+
generation_config.top_p = 0.7
|
119 |
+
generation_config.num_return_sequences = 1
|
120 |
+
generation_config.max_new_tokens = 32
|
121 |
+
generation_config.use_cache = False
|
122 |
+
generation_config.pad_token_id = tokenizer.eos_token_id
|
123 |
+
generation_config.eos_token_id = tokenizer.eos_token_id
|
124 |
|
125 |
model.eval()
|
126 |
if torch.__version__ >= "2":
|
|
|
146 |
response = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
|
147 |
print(response)
|
148 |
|
149 |
+
>>> The capital city of Greece is Athens and it borders Albania, Bulgaria, Macedonia, and Turkey.
|
150 |
```
|
151 |
|
152 |
2. You can also directly call the model from HuggingFace using the following code snippet:
|
|
|
154 |
```python
|
155 |
import torch
|
156 |
from peft import PeftConfig, PeftModel
|
157 |
+
from transformers import GenerationConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
158 |
|
159 |
MODEL_NAME = "Sandiago21/falcon-7b-prompt-answering"
|
160 |
BASE_MODEL = "tiiuae/falcon-7b"
|
|
|
168 |
bnb_4bit_use_double_quant=True,
|
169 |
)
|
170 |
|
|
|
|
|
171 |
model = AutoModelForCausalLM.from_pretrained(
|
172 |
BASE_MODEL,
|
173 |
quantization_config=bnb_config,
|
|
|
179 |
|
180 |
model = PeftModel.from_pretrained(model, MODEL_NAME)
|
181 |
|
182 |
+
generation_config = model.generation_config
|
183 |
+
generation_config.top_p = 0.7
|
184 |
+
generation_config.num_return_sequences = 1
|
185 |
+
generation_config.max_new_tokens = 32
|
186 |
+
generation_config.use_cache = False
|
187 |
+
generation_config.pad_token_id = tokenizer.eos_token_id
|
188 |
+
generation_config.eos_token_id = tokenizer.eos_token_id
|
189 |
|
190 |
model.eval()
|
191 |
if torch.__version__ >= "2":
|
|
|
212 |
response = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
|
213 |
print(response)
|
214 |
|
215 |
+
>>> The capital city of Greece is Athens and it borders Albania, Bulgaria, Macedonia, and Turkey.
|
216 |
```
|
217 |
|
218 |
## Training Details
|
|
|
245 |
|
246 |
The tiiuae/falcon-7b was finetuned on conversations and question answering data
|
247 |
|
|
|
248 |
### Training Procedure
|
249 |
|
250 |
The tiiuae/falcon-7b model was further trained and finetuned on question answering and prompts data for 1 epoch (approximately 10 hours of training on a single GPU)
|
251 |
|
|
|
252 |
## Model Architecture and Objective
|
253 |
|
254 |
The model is based on tiiuae/falcon-7b model and finetuned adapters on top of the main model on conversations and question answering data.
|
notebooks/HuggingFace-Inference-Falcon.ipynb
CHANGED
@@ -101,7 +101,7 @@
|
|
101 |
{
|
102 |
"cell_type": "code",
|
103 |
"execution_count": 4,
|
104 |
-
"id": "
|
105 |
"metadata": {},
|
106 |
"outputs": [
|
107 |
{
|
@@ -164,7 +164,7 @@
|
|
164 |
{
|
165 |
"cell_type": "code",
|
166 |
"execution_count": 6,
|
167 |
-
"id": "
|
168 |
"metadata": {},
|
169 |
"outputs": [],
|
170 |
"source": [
|
@@ -665,7 +665,7 @@
|
|
665 |
{
|
666 |
"cell_type": "code",
|
667 |
"execution_count": null,
|
668 |
-
"id": "
|
669 |
"metadata": {},
|
670 |
"outputs": [],
|
671 |
"source": []
|
|
|
101 |
{
|
102 |
"cell_type": "code",
|
103 |
"execution_count": 4,
|
104 |
+
"id": "fd681dd1",
|
105 |
"metadata": {},
|
106 |
"outputs": [
|
107 |
{
|
|
|
164 |
{
|
165 |
"cell_type": "code",
|
166 |
"execution_count": 6,
|
167 |
+
"id": "78a786cc",
|
168 |
"metadata": {},
|
169 |
"outputs": [],
|
170 |
"source": [
|
|
|
665 |
{
|
666 |
"cell_type": "code",
|
667 |
"execution_count": null,
|
668 |
+
"id": "b061a441",
|
669 |
"metadata": {},
|
670 |
"outputs": [],
|
671 |
"source": []
|