# import gradio as gr # Def_04 Docx file to translated_Docx file from transformers import MarianMTModel, MarianTokenizer import nltk from nltk.tokenize import sent_tokenize from nltk.tokenize import LineTokenizer nltk.download('punkt') import math import torch from docx import Document from time import sleep import docx def getText(filename): doc = docx.Document(filename) fullText = [] for para in doc.paragraphs: fullText.append(para.text) return '\n'.join(fullText) # Def_01 applying process bar to function import sys def print_progress_bar(index, total, label): n_bar = 50 # Progress bar width progress = index / total sys.stdout.write('\r') sys.stdout.write(f"[{'=' * int(n_bar * progress):{n_bar}s}] {int(100 * progress)}% {label}") sys.stdout.flush() if torch.cuda.is_available(): dev = "cuda" else: dev = "cpu" device = torch.device(dev) mname = "Helsinki-NLP/opus-mt-en-hi" tokenizer = MarianTokenizer.from_pretrained(mname) model = MarianMTModel.from_pretrained(mname) model.to(device) def btTranslator(docxfile): a=getText(docxfile) a1=a.split('\n') bigtext=''' ''' for a in a1: bigtext=bigtext+'\n'+a files=Document() lt = LineTokenizer() batch_size = 8 paragraphs = lt.tokenize(bigtext) translated_paragraphs = [] for index, paragraph in enumerate(paragraphs): # ###################################### total=len(paragraphs) print_progress_bar(index, total, "Percentage Bar") sleep(0.5) # ###################################### sentences = sent_tokenize(paragraph) batches = math.ceil(len(sentences) / batch_size) translated = [] for i in range(batches): sent_batch = sentences[i*batch_size:(i+1)*batch_size] model_inputs = tokenizer(sent_batch, return_tensors="pt", padding=True, truncation=True, max_length=500).to(device) with torch.no_grad(): translated_batch = model.generate(**model_inputs) translated += translated_batch translated = [tokenizer.decode(t, skip_special_tokens=True) for t in translated] translated_paragraphs += [" ".join(translated)] files.add_paragraph(translated) # translated_text = "\n".join(translated_paragraphs) f=files.save(f"Translated_{docxfile[23:]}") return translated_paragraphs,f import gradio as gr interface = gr.Interface(fn=btTranslator, inputs=gr.inputs.Textbox(lines=1), # inputs = gr.inputs.File(file_count="multiple",label="Input Files"), # inputs= outputs=['text','file'], show_progress=True ) interface.launch(debug=True)