Update README.md
Browse files
README.md
CHANGED
@@ -6,6 +6,30 @@ language:
|
|
6 |
---
|
7 |
|
8 |
```python
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
alpaca_prompt = """Hapo chini kuna maelezo ya kazi, pamoja na maelezo ya ziada yanayotoa muktadha zaidi. Andika jibu ambalo linakamilisha ombi hilo ipasavyo.
|
10 |
|
11 |
### Maelezo:
|
@@ -16,40 +40,22 @@ alpaca_prompt = """Hapo chini kuna maelezo ya kazi, pamoja na maelezo ya ziada y
|
|
16 |
|
17 |
### Jibu:
|
18 |
{}"""
|
19 |
-
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
|
20 |
-
def formatting_prompts_func(examples):
|
21 |
-
instructions = examples["instruction"]
|
22 |
-
inputs = examples["input"]
|
23 |
-
outputs = examples["output"]
|
24 |
-
texts = []
|
25 |
-
for instruction, input, output in zip(instructions, inputs, outputs):
|
26 |
-
# Must add EOS_TOKEN, otherwise your generation will go on forever!
|
27 |
-
text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
|
28 |
-
texts.append(text)
|
29 |
-
return { "text" : texts, }
|
30 |
-
pass
|
31 |
|
32 |
-
|
33 |
|
|
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
# pip install accelerate
|
40 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
41 |
-
import torch
|
42 |
|
43 |
-
|
44 |
-
model = AutoModelForCausalLM.from_pretrained(
|
45 |
-
"sartifyllc/sartify_gemma2-2B-16bit",
|
46 |
-
device_map="auto",
|
47 |
-
torch_dtype=torch.bfloat16
|
48 |
-
)
|
49 |
-
|
50 |
-
input_text = "Je moja jumlisha moja ni ngapi?"
|
51 |
-
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
|
52 |
-
|
53 |
-
outputs = model.generate(**input_ids)
|
54 |
-
print(tokenizer.decode(outputs[0]))
|
55 |
-
```
|
|
|
6 |
---
|
7 |
|
8 |
```python
|
9 |
+
%%capture
|
10 |
+
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
|
11 |
+
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
|
12 |
+
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
|
13 |
+
|
14 |
+
|
15 |
+
from unsloth import FastLanguageModel
|
16 |
+
import torch
|
17 |
+
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
|
18 |
+
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
|
19 |
+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
|
20 |
+
|
21 |
+
model_name = "sartifyllc/sartify_gemma2-2B-16bit"
|
22 |
+
|
23 |
+
|
24 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
25 |
+
model_name = model_name,
|
26 |
+
max_seq_length = max_seq_length,
|
27 |
+
dtype = dtype,
|
28 |
+
trust_remote_code=True,
|
29 |
+
# load_in_4bit = load_in_4bit,
|
30 |
+
# token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
|
31 |
+
)
|
32 |
+
|
33 |
alpaca_prompt = """Hapo chini kuna maelezo ya kazi, pamoja na maelezo ya ziada yanayotoa muktadha zaidi. Andika jibu ambalo linakamilisha ombi hilo ipasavyo.
|
34 |
|
35 |
### Maelezo:
|
|
|
40 |
|
41 |
### Jibu:
|
42 |
{}"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
+
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
|
45 |
|
46 |
+
# alpaca_prompt = Copied from above
|
47 |
|
48 |
+
inputs = tokenizer(
|
49 |
+
[
|
50 |
+
alpaca_prompt.format(
|
51 |
+
"Rudia tu kila kitu ninachosema kwa Kiingereza kwa Kiswahili wala usiseme chochote kingine.", # instruction
|
52 |
+
"Who is the president of Tanzania?", # input
|
53 |
+
"", # output - leave this blank for generation!
|
54 |
+
)
|
55 |
+
], return_tensors = "pt").to("cuda")
|
56 |
|
57 |
+
from transformers import TextStreamer
|
58 |
+
text_streamer = TextStreamer(tokenizer)
|
59 |
+
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)
|
|
|
|
|
|
|
60 |
|
61 |
+
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|