Spaces:
Runtime error
Runtime error
import streamlit as st | |
from ai4bharat.transliteration import XlitEngine | |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
import torch | |
from IndicTransTokenizer import IndicProcessor | |
# Initialize the transliteration engine and model | |
e = XlitEngine(["gu", 'en'], beam_width=10, src_script_type="en") | |
model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True) | |
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True) | |
ip = IndicProcessor(inference=True) | |
def english_to_gujarati(text): | |
return e.translit_sentence(text)['gu'] | |
def translate_question(english_question): | |
gujarati_question = english_to_gujarati(english_question) | |
lst = [gujarati_question] | |
batch = ip.preprocess_batch(lst, src_lang="guj_Gujr", tgt_lang="eng_Latn") | |
batch = tokenizer(batch, padding="longest", truncation=True, max_length=256, return_tensors="pt") | |
with torch.inference_mode(): | |
outputs = model.generate(**batch, num_beams=5, num_return_sequences=1, max_length=256) | |
with tokenizer.as_target_tokenizer(): | |
outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=True) | |
outputs = ip.postprocess_batch(outputs, lang="eng_Latn") | |
return outputs | |
# Streamlit UI | |
st.title("English to Gujarati Translation") | |
st.write("Enter your question in English:") | |
english_question = st.text_input("Question:") | |
if st.button("Translate"): | |
if english_question: | |
translated = translate_question(english_question) | |
st.write("Transliterated and Translated question:", translated) | |
else: | |
st.write("Please enter a question.") | |