from transformers import AutoModelForSeq2SeqLM, T5Tokenizer import streamlit as st MAX_LENGTH = 184 SPECIAL_WORD = "[TODARIJA]" model = AutoModelForSeq2SeqLM.from_pretrained("ckpt") tokenizer = T5Tokenizer.from_pretrained("ckpt") st.set_page_config("English to darija ") st.title('English to Darija Translation machine by fine-tuning T5 model on Darija Open Dataset') sentence = st.text_input("input your english text") button = st.button("translate to Darija") if button : sentence = SPECIAL_WORD+" "+sentence sentence = sentence.lower() length = len(sentence.split()) if length < MAX_LENGTH-1: inputs = tokenizer(sentence, max_length=MAX_LENGTH, truncation=True, return_tensors="pt") outputs =model.generate(**inputs,max_length=MAX_LENGTH) decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] st.text(decoded_output)