Spaces:

MarineLives
/

MarineLives-Legal-Assistant

Sleeping

App Files Files Community

Update app.py

by Addaci - opened Oct 22, 2024

base: refs/heads/main

←

from: refs/pr/6

Discussion Files changed

+85

-51

Files changed (1) hide show

app.py +85 -51

app.py CHANGED Viewed

@@ -1,79 +1,113 @@
 import gradio as gr
-from transformers import T5ForConditionalGeneration, T5Tokenizer
-# Load model and tokenizer
-model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")
-tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
-# Function for Correct Raw HTR
-def correct_htr(text, max_new_tokens, temperature):
-    inputs = tokenizer(text, return_tensors="pt")
     outputs = model.generate(
-        **inputs,
-        max_new_tokens=max_new_tokens,
-        temperature=temperature,
-        do_sample=True
     )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Function for Summarize Legal Text
-def summarize_legal_text(text, max_new_tokens, temperature):
-    prompt = "summarize: " + text
     inputs = tokenizer(prompt, return_tensors="pt")
     outputs = model.generate(
-        **inputs,
-        max_new_tokens=max_new_tokens,
-        temperature=temperature,
-        do_sample=True
     )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Function for Answer Legal Question
-def answer_legal_question(context, question, max_new_tokens, temperature):
-    prompt = f"question: {question} context: {context}"
     inputs = tokenizer(prompt, return_tensors="pt")
     outputs = model.generate(
-        **inputs,
-        max_new_tokens=max_new_tokens,
-        temperature=temperature,
-        do_sample=True
     )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Gradio Interface Setup
 with gr.Blocks() as demo:
-    # Title and clickable buttons with URLs
-    gr.Markdown("# Flan-T5 Legal Assistant")
-    with gr.Row():
-        gr.Markdown('[Admiralty Court Legal Glossary](http://www.marinelives.org/wiki/Tools:_Admiralty_court_legal_glossary)')
-        gr.Markdown('[HCA 13/70 Ground Truth](https://github.com/Addaci/HCA/blob/main/HCA_13_70_Full_Volume_Processed_Text_EDITED_Ver.1.2_18062024.txt)')
-    # Tabs for different functionalities
     with gr.Tab("Correct Raw HTR"):
-        text_input_htr = gr.Textbox(label="Textbox", placeholder="Enter text to correct")
-        text_output_htr = gr.Textbox(label="Textbox", placeholder="Corrected text will appear here")
-        max_new_tokens_htr = gr.Slider(10, 512, value=128, label="Max New Tokens")
-        temperature_htr = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
-        gr.Button("Correct HTR").click(correct_htr, inputs=[text_input_htr, max_new_tokens_htr, temperature_htr], outputs=text_output_htr)
-        gr.Button("Clear").click(lambda: "", None, text_input_htr)
     with gr.Tab("Summarize Legal Text"):
-        text_input_summarize = gr.Textbox(label="Textbox", placeholder="Enter legal text to summarize")
-        text_output_summarize = gr.Textbox(label="Textbox", placeholder="Summary will appear here")
         max_new_tokens_summarize = gr.Slider(10, 512, value=256, label="Max New Tokens")
         temperature_summarize = gr.Slider(0.1, 1.0, value=0.5, label="Temperature")
-        gr.Button("Summarize Text").click(summarize_legal_text, inputs=[text_input_summarize, max_new_tokens_summarize, temperature_summarize], outputs=text_output_summarize)
-        gr.Button("Clear").click(lambda: "", None, text_input_summarize)
     with gr.Tab("Answer Legal Question"):
-        context_input = gr.Textbox(label="Textbox", placeholder="Enter legal text for context")
-        question_input = gr.Textbox(label="Textbox", placeholder="Enter your question")
-        answer_output = gr.Textbox(label="Textbox", placeholder="Answer will appear here")
-        max_new_tokens_answer = gr.Slider(10, 512, value=128, label="Max New Tokens")
-        temperature_answer = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
-        gr.Button("Get Answer").click(answer_legal_question, inputs=[context_input, question_input, max_new_tokens_answer, temperature_answer], outputs=answer_output)
-        gr.Button("Clear").click(lambda: "", None, [context_input, question_input])
-# Launch the demo
 demo.launch()

 import gradio as gr
+from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration
+# Load model and tokenizer for mT5-small
+model = T5ForConditionalGeneration.from_pretrained("google/mt5-small")
+tokenizer = T5Tokenizer.from_pretrained("google/mt5-small")
+# Define task-specific prompts
+def correct_htr_text(input_text, max_new_tokens, temperature):
+    prompt = f"Correct the following handwritten transcription for obvious errors while preserving C17th spelling: {input_text}"
+    inputs = tokenizer(prompt, return_tensors="pt")
     outputs = model.generate(
+        inputs.input_ids,
+        max_new_tokens=max_new_tokens,
+        temperature=temperature
     )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
+def summarize_legal_text(input_text, max_new_tokens, temperature):
+    prompt = f"Summarize this legal text: {input_text}"
     inputs = tokenizer(prompt, return_tensors="pt")
     outputs = model.generate(
+        inputs.input_ids,
+        max_new_tokens=max_new_tokens,
+        temperature=temperature
     )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
+def answer_legal_question(input_text, question, max_new_tokens, temperature):
+    prompt = f"Answer this question based on the legal text: '{question}' Text: {input_text}"
     inputs = tokenizer(prompt, return_tensors="pt")
     outputs = model.generate(
+        inputs.input_ids,
+        max_new_tokens=max_new_tokens,
+        temperature=temperature
     )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# Define Gradio interface functions
+def correct_htr_interface(text, max_new_tokens, temperature):
+    return correct_htr_text(text, max_new_tokens, temperature)
+def summarize_interface(text, max_new_tokens, temperature):
+    return summarize_legal_text(text, max_new_tokens, temperature)
+def question_interface(text, question, max_new_tokens, temperature):
+    return answer_legal_question(text, question, max_new_tokens, temperature)
+def clear_all():
+    return "", ""
+# External clickable buttons
+def clickable_buttons():
+    button_html = """
+    <div style="display: flex; justify-content: space-between; margin-bottom: 10px;">
+        <a href="http://www.marinelives.org/wiki/Tools:_Admiralty_court_legal_glossary"
+        style="border: 1px solid black; padding: 5px; text-align: center; width: 48%; background-color: #f0f0f0;">
+        Admiralty Court Legal Glossary</a>
+        <a href="https://github.com/Addaci/HCA/blob/main/HCA_13_70_Full_Volume_Processed_Text_EDITED_Ver.1.2_18062024.txt"
+        style="border: 1px solid black; padding: 5px; text-align: center; width: 48%; background-color: #f0f0f0;">
+        HCA 13/70 Ground Truth</a>
+    </div>
+    """
+    return button_html
+# Interface layout
 with gr.Blocks() as demo:
+    gr.HTML("<h1>Flan-T5 Legal Assistant</h1>")
+    gr.HTML(clickable_buttons())
     with gr.Tab("Correct Raw HTR"):
+        input_text = gr.Textbox(lines=10, label="Textbox")
+        output_text = gr.Textbox(label="Textbox")
+        max_new_tokens = gr.Slider(10, 512, value=128, label="Max New Tokens")
+        temperature = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
+        correct_button = gr.Button("Correct HTR")
+        clear_button = gr.Button("Clear")
+        correct_button.click(fn=correct_htr_interface,
+                             inputs=[input_text, max_new_tokens, temperature],
+                             outputs=output_text)
+        clear_button.click(fn=clear_all, outputs=[input_text, output_text])
     with gr.Tab("Summarize Legal Text"):
+        input_text_summarize = gr.Textbox(lines=10, label="Textbox")
+        output_text_summarize = gr.Textbox(label="Textbox")
         max_new_tokens_summarize = gr.Slider(10, 512, value=256, label="Max New Tokens")
         temperature_summarize = gr.Slider(0.1, 1.0, value=0.5, label="Temperature")
+        summarize_button = gr.Button("Summarize Text")
+        clear_button_summarize = gr.Button("Clear")
+        summarize_button.click(fn=summarize_interface,
+                               inputs=[input_text_summarize, max_new_tokens_summarize, temperature_summarize],
+                               outputs=output_text_summarize)
+        clear_button_summarize.click(fn=clear_all, outputs=[input_text_summarize, output_text_summarize])
     with gr.Tab("Answer Legal Question"):
+        input_text_question = gr.Textbox(lines=10, label="Textbox")
+        question = gr.Textbox(label="Textbox")
+        output_text_question = gr.Textbox(label="Textbox")
+        max_new_tokens_question = gr.Slider(10, 512, value=128, label="Max New Tokens")
+        temperature_question = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
+        question_button = gr.Button("Get Answer")
+        clear_button_question = gr.Button("Clear")
+        question_button.click(fn=question_interface,
+                              inputs=[input_text_question, question, max_new_tokens_question, temperature_question],
+                              outputs=output_text_question)
+        clear_button_question.click(fn=clear_all, outputs=[input_text_question, question, output_text_question])
+    gr.Button("Clear", elem_id="clear_button").click(clear_all)
 demo.launch()