Update README.md
Browse files
README.md
CHANGED
@@ -18,4 +18,58 @@ The model supports translation that converts text with China words to text with
|
|
18 |
|
19 |
#### This Model
|
20 |
|
21 |
-
This model is fine-tuned from [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) (by applying Instruction Finetuning). The dataset is collected from [MBZUAI/Bactrian-X](https://huggingface.co/datasets/MBZUAI/Bactrian-X) and automatically labeled by [繁化姬](https://zhconvert.org).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
#### This Model
|
20 |
|
21 |
+
This model is fine-tuned from [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) (by applying Instruction Finetuning). The dataset is collected from [MBZUAI/Bactrian-X](https://huggingface.co/datasets/MBZUAI/Bactrian-X) and automatically labeled by [繁化姬](https://zhconvert.org).
|
22 |
+
|
23 |
+
#### How to use
|
24 |
+
|
25 |
+
```python
|
26 |
+
import torch
|
27 |
+
from transformers import pipeline
|
28 |
+
|
29 |
+
SYSTEM_PROMPT = """\
|
30 |
+
對於輸入內容的中文文字,請將中國用語轉成台灣的用語,其他非中文文字或非中國用語都維持不變。
|
31 |
+
|
32 |
+
範例:
|
33 |
+
Input: ```這個視頻的質量真高啊```
|
34 |
+
Output: ```這個影片的品質真高啊```\
|
35 |
+
"""
|
36 |
+
|
37 |
+
text_trad = "這個軟件的質量真高啊"
|
38 |
+
|
39 |
+
pipeline = pipeline(
|
40 |
+
"text-generation",
|
41 |
+
model="feabries/TaiwanWordTranslator-v0.1",
|
42 |
+
torch_dtype=torch.bfloat16,
|
43 |
+
device_map="auto",
|
44 |
+
)
|
45 |
+
|
46 |
+
prompt = "Input: ```{}```".format(text_trad)
|
47 |
+
messages = [{
|
48 |
+
"role": "system",
|
49 |
+
"content": SYSTEM_PROMPT,
|
50 |
+
}, {
|
51 |
+
"role": "user",
|
52 |
+
"content": prompt,
|
53 |
+
}]
|
54 |
+
input_text = pipeline.tokenizer.apply_chat_template(
|
55 |
+
messages,
|
56 |
+
tokenize=False,
|
57 |
+
add_generation_prompt=True,
|
58 |
+
)
|
59 |
+
outputs = pipeline(
|
60 |
+
input_text,
|
61 |
+
do_sample=False,
|
62 |
+
max_new_tokens=2048,
|
63 |
+
)
|
64 |
+
print(outputs[0]["generated_text"])
|
65 |
+
# <|system|>
|
66 |
+
# 對於輸入內容的中文文字,請將中國用語轉成台灣的用語,其他非中文文字或非中國用語都維持不變。
|
67 |
+
#
|
68 |
+
# 範例:
|
69 |
+
# Input: ```這個視頻的質量真高啊```
|
70 |
+
# Output: ```這個影片的品質真高啊```</s>
|
71 |
+
# <|user|>
|
72 |
+
# Input: ```這個軟件的質量真高啊```</s>
|
73 |
+
# <|assistant|>
|
74 |
+
# Output: ```這個軟體的品質真高啊```
|
75 |
+
```
|