Spaces:
Runtime error
Runtime error
File size: 1,707 Bytes
699ec42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import streamlit as st
from ai4bharat.transliteration import XlitEngine
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch
from IndicTransTokenizer import IndicProcessor
# Initialize the transliteration engine and model
e = XlitEngine(["gu", 'en'], beam_width=10, src_script_type="en")
model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True)
ip = IndicProcessor(inference=True)
def english_to_gujarati(text):
return e.translit_sentence(text)['gu']
def translate_question(english_question):
gujarati_question = english_to_gujarati(english_question)
lst = [gujarati_question]
batch = ip.preprocess_batch(lst, src_lang="guj_Gujr", tgt_lang="eng_Latn")
batch = tokenizer(batch, padding="longest", truncation=True, max_length=256, return_tensors="pt")
with torch.inference_mode():
outputs = model.generate(**batch, num_beams=5, num_return_sequences=1, max_length=256)
with tokenizer.as_target_tokenizer():
outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=True)
outputs = ip.postprocess_batch(outputs, lang="eng_Latn")
return outputs
# Streamlit UI
st.title("English to Gujarati Translation")
st.write("Enter your question in English:")
english_question = st.text_input("Question:")
if st.button("Translate"):
if english_question:
translated = translate_question(english_question)
st.write("Transliterated and Translated question:", translated)
else:
st.write("Please enter a question.")
|