arithescientist commited on
Commit
037452a
·
1 Parent(s): d084402

Startup

Files changed (1) hide show
  1. app.py +43 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pytesseract as pt
3
+ import pdf2image
4
+ from fpdf import FPDF
5
+ import re
6
+ import nltk
7
+ from nltk.tokenize import sent_tokenize
8
+ from nltk.tokenize import word_tokenize
9
+ import os
10
+ import pdfkit
11
+ import yake
12
+ from transformers import AutoTokenizer, AutoModelForPreTraining, AutoModel, AutoConfig
13
+ from summarizer import Summarizer,TransformerSummarizer
14
+ from transformers import pipelines
15
+ nltk.download('punkt')
16
+
17
+
18
+ model_name = 'nlpaueb/legal-bert-base-uncased'
19
+
20
+
21
+ # The setup of huggingface.co
22
+ custom_config = AutoConfig.from_pretrained(model_name)
23
+ custom_config.output_hidden_states=True
24
+ custom_tokenizer = AutoTokenizer.from_pretrained(model_name)
25
+ custom_model = AutoModel.from_pretrained(model_name, config=custom_config)
26
+ bert_legal_model = Summarizer(custom_model=custom_model, custom_tokenizer=custom_tokenizer)
27
+
28
+
29
+ def get_response(input_text):
30
+ output_text= bert_legal_model(input_text, min_length = 8, ratio = 0.05
31
+
32
+ return output_text
33
+
34
+
35
+
36
+ iface = gr.Interface(
37
+ get_response,
38
+ "text",
39
+ "text"
40
+ )
41
+
42
+ if __name__ == "__main__":
43
+ iface.launch(share=True)