Update README.md
Browse files
README.md
CHANGED
@@ -42,8 +42,8 @@ copy <your_ryzen_ai-sw_install_path>\RyzenAI-SW\example\transformers\models\llam
|
|
42 |
set XLNX_VART_FIRMWARE=<your_firmware_install_path>\voe-4.0-win_amd64\1x4.xclbin
|
43 |
set NUM_OF_DPU_RUNNERS=1
|
44 |
|
45 |
-
# save below sample script as utf8 and
|
46 |
-
python llama3.1-test.py
|
47 |
```
|
48 |
|
49 |
### Sample Script
|
@@ -56,38 +56,62 @@ from transformers import AutoTokenizer, set_seed
|
|
56 |
import qlinear
|
57 |
import logging
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
|
84 |
if __name__ == "__main__":
|
85 |
-
p = psutil.Process()
|
86 |
-
p.cpu_affinity([0, 1, 2, 3])
|
87 |
-
torch.set_num_threads(4)
|
88 |
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
terminators = [
|
92 |
tokenizer.eos_token_id,
|
93 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
@@ -102,31 +126,10 @@ if __name__ == "__main__":
|
|
102 |
m.device = "aie"
|
103 |
m.quantize_weights()
|
104 |
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
print("user: " + message_list[i])
|
110 |
-
|
111 |
-
input = tokenizer.apply_chat_template(
|
112 |
-
messages,
|
113 |
-
add_generation_prompt=True,
|
114 |
-
return_tensors="pt",
|
115 |
-
return_dict=True
|
116 |
-
)
|
117 |
-
|
118 |
-
outputs = model.generate(input['input_ids'],
|
119 |
-
max_new_tokens=600,
|
120 |
-
eos_token_id=terminators,
|
121 |
-
attention_mask=input['attention_mask'],
|
122 |
-
do_sample=True,
|
123 |
-
temperature=0.6,
|
124 |
-
top_p=0.9)
|
125 |
-
|
126 |
-
response = outputs[0][input['input_ids'].shape[-1]:]
|
127 |
-
response_message = tokenizer.decode(response, skip_special_tokens=True)
|
128 |
-
print("assistant: " + response_message)
|
129 |
-
messages.append({"role": "system", "content": response_message})
|
130 |
|
131 |
```
|
132 |
|
|
|
42 |
set XLNX_VART_FIRMWARE=<your_firmware_install_path>\voe-4.0-win_amd64\1x4.xclbin
|
43 |
set NUM_OF_DPU_RUNNERS=1
|
44 |
|
45 |
+
# save below sample script as utf8 and llama3.1-8b_translate-test.py
|
46 |
+
python llama3.1-8b_translate-test.py
|
47 |
```
|
48 |
|
49 |
### Sample Script
|
|
|
56 |
import qlinear
|
57 |
import logging
|
58 |
|
59 |
+
|
60 |
+
def translation(instruction, input):
|
61 |
+
system = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
62 |
+
You are a highly skilled professional translator. You are a native speaker of English, Japanese, French and Mandarin. Translate the given text accurately, taking into account the context and specific instructions provided. Steps may include hints enclosed in square brackets [] with the key and value separated by a colon:. If no additional instructions or context are provided, use your expertise to consider what the most appropriate context is and provide a natural translation that aligns with that context. When translating, strive to faithfully reflect the meaning and tone of the original text, pay attention to cultural nuances and differences in language usage, and ensure that the translation is grammatically correct and easy to read. For technical terms and proper nouns, either leave them in the original language or use appropriate translations as necessary. Take a deep breath, calm down, and start translating.<|eot_id|><|start_header_id|>user<|end_header_id|>"""
|
63 |
+
|
64 |
+
prompt = f"""{system}
|
65 |
+
### Instruction:
|
66 |
+
{instruction}
|
67 |
+
|
68 |
+
### Input:
|
69 |
+
{input}
|
70 |
+
|
71 |
+
### Response:
|
72 |
+
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
73 |
+
"""
|
74 |
+
|
75 |
+
tokenized_input = tokenizer(prompt, return_tensors="pt",
|
76 |
+
padding=True, max_length=1600, truncation=True)
|
77 |
+
|
78 |
+
terminators = [
|
79 |
+
tokenizer.eos_token_id,
|
80 |
+
]
|
81 |
+
|
82 |
+
outputs = model.generate(tokenized_input['input_ids'],
|
83 |
+
max_new_tokens=600,
|
84 |
+
eos_token_id=terminators,
|
85 |
+
attention_mask=tokenized_input['attention_mask'],
|
86 |
+
do_sample=True,
|
87 |
+
temperature=0.3,
|
88 |
+
top_p=0.5)
|
89 |
+
response = outputs[0][tokenized_input['input_ids'].shape[-1]:]
|
90 |
+
response_message = tokenizer.decode(response, skip_special_tokens=True)
|
91 |
+
return response_message
|
92 |
|
93 |
|
94 |
if __name__ == "__main__":
|
|
|
|
|
|
|
95 |
|
96 |
+
set_seed(123)
|
97 |
+
p = psutil.Process()
|
98 |
+
p.cpu_affinity([0, 1, 2, 3])
|
99 |
+
torch.set_num_threads(4)
|
100 |
+
|
101 |
+
tokenizer = AutoTokenizer.from_pretrained("llama3.1-8b_translate-amd-npu")
|
102 |
+
tokenizer.pad_token_id = tokenizer.add_special_tokens({'pad_token': '<|finetune_right_pad_id|>'})
|
103 |
+
ckpt = "llama3.1_8b_translate_w_bit_4_awq_amd.pt"
|
104 |
+
|
105 |
+
model = torch.load(ckpt)
|
106 |
+
model.eval()
|
107 |
+
model = model.to(torch.bfloat16)
|
108 |
+
|
109 |
+
for n, m in model.named_modules():
|
110 |
+
if isinstance(m, qlinear.QLinearPerGrp):
|
111 |
+
print(f"Preparing weights of layer : {n}")
|
112 |
+
m.device = "aie"
|
113 |
+
m.quantize_weights()
|
114 |
+
|
115 |
terminators = [
|
116 |
tokenizer.eos_token_id,
|
117 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
|
|
126 |
m.device = "aie"
|
127 |
m.quantize_weights()
|
128 |
|
129 |
+
|
130 |
+
print("Translate Japanese to English.", "1月1日は日本の祝日です。その日は日曜日で、5日ぶりに雨が降りました")
|
131 |
+
print("Translate English to Japanese.", "It’s raining cats and dogs.")
|
132 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
```
|
135 |
|