EricaCorral's picture
Improve performance with Marian framework?
983993d
raw
history blame
1.42 kB
from pypinyin import pinyin
from transformers import MarianMTModel, MarianTokenizer
from LAC import LAC
import gradio as gr
import torch
model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
model.eval()
tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
lac = LAC(mode="seg")
def make_request(chinese_text):
with torch.no_grad():
generated_tokens = model.generate(**tokenizer(chinese_text, return_tensors="pt", padding=True))
return tokenizer.decode(generated_tokens, skip_special_tokens=True)
def generatepinyin(input):
pinyin_list = pinyin(input)
pinyin_string = ""
for piece in pinyin_list:
pinyin_string = pinyin_string+" "+piece[0]
return pinyin_string
def generate_response(Chinese_to_translate):
response = []
response.append([Chinese_to_translate,make_request(Chinese_to_translate),generatepinyin(Chinese_to_translate)])
segmented_string_list = lac.run(Chinese_to_translate)
for piece in segmented_string_list:
response.append([piece,make_request(piece),generatepinyin(piece)])
return response
iface = gr.Interface(
fn=generate_response,
title="Chinese to English",
description="Chinese to English with Helsinki Research's Chinese to English model. Makes for extremely FAST translations.",
inputs=gr.inputs.Textbox(lines=5, placeholder="Enter text in Chinese"),
outputs="text")
iface.launch()