Spaces:
Sleeping
Sleeping
#**************** IMPORT PACKAGES ******************** | |
import gradio as gr | |
import numpy as np | |
import pytesseract as pt | |
import pdf2image | |
import os | |
import tempfile | |
from fpdf import FPDF | |
import re | |
import nltk | |
from nltk.tokenize import sent_tokenize | |
from nltk.tokenize import word_tokenize | |
import os | |
import pdfkit | |
import yake | |
from transformers import AutoTokenizer, AutoModelForPreTraining, AutoModel, AutoConfig | |
from summarizer import Summarizer,TransformerSummarizer | |
from transformers import pipelines | |
nltk.download('punkt') | |
model_name = 'nlpaueb/legal-bert-base-uncased' | |
# The setup of huggingface.co | |
custom_config = AutoConfig.from_pretrained(model_name) | |
custom_config.output_hidden_states=True | |
custom_tokenizer = AutoTokenizer.from_pretrained(model_name) | |
custom_model = AutoModel.from_pretrained(model_name, config=custom_config) | |
bert_legal_model = Summarizer(custom_model=custom_model, custom_tokenizer=custom_tokenizer) | |
from zipfile import ZipFile | |
def zip_to_json(file_obj): | |
with open(file_obj, 'w') as fh: | |
fh.write('<content>') | |
return fh | |
#def pdf(file_name): | |
# path = folder_name | |
# return path | |
#pageObject.extractText() | |
iface = gr.Interface(fn = zip_to_json, | |
inputs = "file", outputs="file" ) | |
if __name__ == "__main__": | |
iface.launch(share=True) |