#**************** IMPORT PACKAGES ******************** import gradio as gr import numpy as np import pytesseract as pt import pdf2image import os import tempfile from fpdf import FPDF import re import nltk from nltk.tokenize import sent_tokenize from nltk.tokenize import word_tokenize import os import pdfkit import yake from transformers import AutoTokenizer, AutoModelForPreTraining, AutoModel, AutoConfig from summarizer import Summarizer,TransformerSummarizer from transformers import pipelines nltk.download('punkt') model_name = 'nlpaueb/legal-bert-base-uncased' # The setup of huggingface.co custom_config = AutoConfig.from_pretrained(model_name) custom_config.output_hidden_states=True custom_tokenizer = AutoTokenizer.from_pretrained(model_name) custom_model = AutoModel.from_pretrained(model_name, config=custom_config) bert_legal_model = Summarizer(custom_model=custom_model, custom_tokenizer=custom_tokenizer) from zipfile import ZipFile from gtts import gTTS from pdfminer.high_level import extract_text def pdf_to_text(file_obj): text = extract_text(file_obj.name) myobj = gTTS(text=text, lang='en', slow=False) myobj.save("test.wav") return 'test.wav' # path = folder_name # return path #pageObject.extractText() iface = gr.Interface(fn = pdf_to_text, inputs = "file", outputs="audio" ) if __name__ == "__main__": iface.launch(share=True)