english2darija / main.py
Essa20001's picture
Upload 9 files
74ac292 verified
raw
history blame contribute delete
904 Bytes
from transformers import AutoModelForSeq2SeqLM, T5Tokenizer
import streamlit as st
MAX_LENGTH = 184
SPECIAL_WORD = "[TODARIJA]"
model = AutoModelForSeq2SeqLM.from_pretrained("ckpt")
tokenizer = T5Tokenizer.from_pretrained("ckpt")
st.set_page_config("English to darija ")
st.title('English to Darija Translation machine by fine-tuning T5 model on Darija Open Dataset')
sentence = st.text_input("input your english text")
button = st.button("translate to Darija")
if button :
sentence = SPECIAL_WORD+" "+sentence
sentence = sentence.lower()
length = len(sentence.split())
if length < MAX_LENGTH-1:
inputs = tokenizer(sentence, max_length=MAX_LENGTH, truncation=True, return_tensors="pt")
outputs =model.generate(**inputs,max_length=MAX_LENGTH)
decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
st.text(decoded_output)