File size: 1,954 Bytes
577164e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
from utils.simple_bleu import simple_score
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained("maywell/Synatra-7B-v0.3-Translation", torch_dtype=torch.bfloat16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("maywell/Synatra-7B-v0.3-Translation")
def translate_ko2en(text):
messages = [
{"role": "system", "content": "주어진 문장을 영어로 번역해라."},
{"role": "user", "content": text},
]
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
model_inputs = encodeds.to(device)
model.to(device)
generated_ids = model.generate(model_inputs, max_new_tokens=2048, do_sample=True)
output = tokenizer.batch_decode(generated_ids)[0]
if output.endswith("<|im_end|>"):
output = output[:-len("<|im_end|>")]
output = output.split('<|im_end|>')[-1]
return output
def translate_en2ko(text):
messages = [
{"role": "system", "content": "주어진 문장을 한국어로 번역해라."},
{"role": "user", "content": text},
]
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
model_inputs = encodeds.to(device)
model.to(device)
generated_ids = model.generate(model_inputs, max_new_tokens=2048, do_sample=True)
output = tokenizer.batch_decode(generated_ids)[0]
if output.endswith("<|im_end|>"):
output = output[:-len("<|im_end|>")]
output = output.split('<|im_end|>')[-1]
return output
def main():
while True:
text = input('>')
en_text = translate_ko2en(text)
ko_text = translate_en2ko(en_text)
print('------en_text--------')
print(en_text)
print('------ko_text--------')
print(ko_text)
print('score', simple_score(text, ko_text))
if __name__ == "__main__":
main()
|