EricaCorral's picture
Marian didn't work, rolled back version to autotokenizer
d4f6c5c
raw
history blame
No virus
1.48 kB
from pypinyin import pinyin
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from LAC import LAC
import gradio as gr
import torch
model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
model.eval()
tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
lac = LAC(mode="seg")
def make_request(chinese_text):
with torch.no_grad():
encoded_zh = tokenizer.prepare_seq2seq_batch([chinese_text], return_tensors="pt")
generated_tokens = model.generate(**encoded_zh)
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
def generatepinyin(input):
pinyin_list = pinyin(input)
pinyin_string = ""
for piece in pinyin_list:
pinyin_string = pinyin_string+" "+piece[0]
return pinyin_string
def generate_response(Chinese_to_translate):
response = []
response.append([Chinese_to_translate,make_request(Chinese_to_translate),generatepinyin(Chinese_to_translate)])
segmented_string_list = lac.run(Chinese_to_translate)
for piece in segmented_string_list:
response.append([piece,make_request(piece),generatepinyin(piece)])
return response
iface = gr.Interface(
fn=generate_response,
title="Chinese to English",
description="Chinese to English with Helsinki Research's Chinese to English model. Makes for extremely FAST translations.",
inputs=gr.inputs.Textbox(lines=5, placeholder="Enter text in Chinese"),
outputs="text")
iface.launch()