#**************** IMPORT PACKAGES ******************** import gradio as gr import numpy as np import pytesseract as pt import pdf2image import os import tempfile from fpdf import FPDF import re import nltk from nltk.tokenize import sent_tokenize from nltk.tokenize import word_tokenize import os import pdfkit import yake from transformers import AutoTokenizer, AutoModelForPreTraining, AutoModel, AutoConfig from summarizer import Summarizer,TransformerSummarizer from transformers import pipelines nltk.download('punkt') model_name = 'nlpaueb/legal-bert-base-uncased' # The setup of huggingface.co custom_config = AutoConfig.from_pretrained(model_name) custom_config.output_hidden_states=True custom_tokenizer = AutoTokenizer.from_pretrained(model_name) custom_model = AutoModel.from_pretrained(model_name, config=custom_config) bert_legal_model = Summarizer(custom_model=custom_model, custom_tokenizer=custom_tokenizer) from zipfile import ZipFile def zip_to_json(file_obj): with tempfile.TemporaryDirectory() as t_dir: os.path.join(t_dir, file_obj) return t_dir #def pdf(file_name): # path = folder_name # return path #pageObject.extractText() iface = gr.Interface(fn = zip_to_json, inputs = "file", outputs="file" ) if __name__ == "__main__": iface.launch(share=True)