Update README.md
Browse files
README.md
CHANGED
@@ -19,7 +19,7 @@ The following `bitsandbytes` quantization config was used during training:
|
|
19 |
|
20 |
- PEFT 0.4.0.dev0
|
21 |
|
22 |
-
|
23 |
```python
|
24 |
import torch
|
25 |
from peft import PeftModel, PeftConfig
|
@@ -32,7 +32,9 @@ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
|
32 |
|
33 |
# Load the Lora model
|
34 |
model = PeftModel.from_pretrained(model, peft_model_id)
|
|
|
35 |
|
|
|
36 |
|
37 |
```python
|
38 |
input_text = "participant safety -->: "
|
@@ -42,3 +44,4 @@ with torch.cuda.amp.autocast():
|
|
42 |
output_tokens = model.generate(**batch, max_new_tokens=50)
|
43 |
|
44 |
print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))
|
|
|
|
19 |
|
20 |
- PEFT 0.4.0.dev0
|
21 |
|
22 |
+
### Import model
|
23 |
```python
|
24 |
import torch
|
25 |
from peft import PeftModel, PeftConfig
|
|
|
32 |
|
33 |
# Load the Lora model
|
34 |
model = PeftModel.from_pretrained(model, peft_model_id)
|
35 |
+
```
|
36 |
|
37 |
+
### Model Inference
|
38 |
|
39 |
```python
|
40 |
input_text = "participant safety -->: "
|
|
|
44 |
output_tokens = model.generate(**batch, max_new_tokens=50)
|
45 |
|
46 |
print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))
|
47 |
+
```
|