lincolnlegalbart

Sleeping

App Files Files Community

arithescientist commited on Oct 11, 2024

Commit

c3c2470

verified ·

1 Parent(s): fc2a37d

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -23

app.py CHANGED Viewed

@@ -8,27 +8,34 @@ from pdfminer.high_level import extract_text
 from docx import Document
 from reportlab.lib.pagesizes import letter
 from reportlab.pdfgen import canvas
 import spacy
-# Load spaCy English model
-nlp = spacy.load("en_core_web_sm")
-# Load the LegalBERT model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained("nlpaueb/legal-bert-base-uncased")
 model = AutoModel.from_pretrained("nlpaueb/legal-bert-base-uncased")
 # Convert DOCX to PDF using ReportLab
 def docx_to_pdf(docx_file, output_pdf="converted_doc.pdf"):
     doc = Document(docx_file)
     full_text = [para.text for para in doc.paragraphs]
     pdf = canvas.Canvas(output_pdf, pagesize=letter)
     pdf.setFont("Helvetica", 12)
     text_object = pdf.beginText(40, 750)
     for line in full_text:
         text_object.textLine(line)
     pdf.drawText(text_object)
     pdf.save()
     return output_pdf
@@ -73,9 +80,9 @@ def pdf_to_text(text, PDF, num_sentences=5):
             pass  # Use the text input provided by the user
         else:
             return None, "Please provide input text or upload a file.", None
         summary = extractive_summarization(text, num_sentences)
         # Generate a PDF of the summary
         pdf = FPDF()
         pdf.add_page()
@@ -83,14 +90,14 @@ def pdf_to_text(text, PDF, num_sentences=5):
         pdf.multi_cell(190, 10, txt=summary, align='L')
         pdf_output_path = "legal_summary.pdf"
         pdf.output(pdf_output_path)
         # Generate an audio file of the summary
         audio_output_path = "legal_summary.wav"
         tts = gTTS(text=summary, lang='en', slow=False)
         tts.save(audio_output_path)
         return audio_output_path, summary, pdf_output_path
     except Exception as e:
         return None, f"An error occurred: {str(e)}", None
@@ -104,35 +111,35 @@ def process_sample_document(num_sentences=5):
 with gr.Blocks() as iface:
     with gr.Row():
         process_sample_button = gr.Button("Summarize Marbury v. Madison Case (Pre-Uploaded)")
     text_input = gr.Textbox(label="Input Text")
     file_input = gr.File(label="Upload PDF or DOCX")
     slider = gr.Slider(minimum=1, maximum=20, step=1, value=5, label="Number of Summary Sentences")
     audio_output = gr.Audio(label="Generated Audio")
     summary_output = gr.Textbox(label="Generated Summary")
     pdf_output = gr.File(label="Summary PDF")
     # Update the function calls to match new parameters
     process_sample_button.click(
-        fn=process_sample_document,
-        inputs=slider,
         outputs=[audio_output, summary_output, pdf_output]
     )
     # Use submit event for the text input and file input
     def on_submit(text, file, num_sentences):
         return pdf_to_text(text, file, num_sentences)
     text_input.submit(
-        fn=on_submit,
-        inputs=[text_input, file_input, slider],
         outputs=[audio_output, summary_output, pdf_output]
     )
     file_input.change(
-        fn=on_submit,
-        inputs=[text_input, file_input, slider],
         outputs=[audio_output, summary_output, pdf_output]
     )
 if __name__ == "__main__":
     iface.launch()

 from docx import Document
 from reportlab.lib.pagesizes import letter
 from reportlab.pdfgen import canvas
+# Import spaCy and handle model loading
 import spacy
+try:
+    nlp = spacy.load("en_core_web_sm")
+except OSError:
+    # Download the model if not found
+    from spacy.cli import download
+    download("en_core_web_sm")
+    nlp = spacy.load("en_core_web_sm")
+# Load the LegalBERT model and tokenizer with use_fast=False
+tokenizer = AutoTokenizer.from_pretrained("nlpaueb/legal-bert-base-uncased", use_fast=False)
 model = AutoModel.from_pretrained("nlpaueb/legal-bert-base-uncased")
 # Convert DOCX to PDF using ReportLab
 def docx_to_pdf(docx_file, output_pdf="converted_doc.pdf"):
     doc = Document(docx_file)
     full_text = [para.text for para in doc.paragraphs]
     pdf = canvas.Canvas(output_pdf, pagesize=letter)
     pdf.setFont("Helvetica", 12)
     text_object = pdf.beginText(40, 750)
     for line in full_text:
         text_object.textLine(line)
     pdf.drawText(text_object)
     pdf.save()
     return output_pdf
             pass  # Use the text input provided by the user
         else:
             return None, "Please provide input text or upload a file.", None
         summary = extractive_summarization(text, num_sentences)
         # Generate a PDF of the summary
         pdf = FPDF()
         pdf.add_page()
         pdf.multi_cell(190, 10, txt=summary, align='L')
         pdf_output_path = "legal_summary.pdf"
         pdf.output(pdf_output_path)
         # Generate an audio file of the summary
         audio_output_path = "legal_summary.wav"
         tts = gTTS(text=summary, lang='en', slow=False)
         tts.save(audio_output_path)
         return audio_output_path, summary, pdf_output_path
     except Exception as e:
         return None, f"An error occurred: {str(e)}", None
 with gr.Blocks() as iface:
     with gr.Row():
         process_sample_button = gr.Button("Summarize Marbury v. Madison Case (Pre-Uploaded)")
     text_input = gr.Textbox(label="Input Text")
     file_input = gr.File(label="Upload PDF or DOCX")
     slider = gr.Slider(minimum=1, maximum=20, step=1, value=5, label="Number of Summary Sentences")
     audio_output = gr.Audio(label="Generated Audio")
     summary_output = gr.Textbox(label="Generated Summary")
     pdf_output = gr.File(label="Summary PDF")
     # Update the function calls to match new parameters
     process_sample_button.click(
+        fn=process_sample_document,
+        inputs=slider,
         outputs=[audio_output, summary_output, pdf_output]
     )
     # Use submit event for the text input and file input
     def on_submit(text, file, num_sentences):
         return pdf_to_text(text, file, num_sentences)
     text_input.submit(
+        fn=on_submit,
+        inputs=[text_input, file_input, slider],
         outputs=[audio_output, summary_output, pdf_output]
     )
     file_input.change(
+        fn=on_submit,
+        inputs=[text_input, file_input, slider],
         outputs=[audio_output, summary_output, pdf_output]
     )
 if __name__ == "__main__":
     iface.launch()