|
@install_packages transformers |
|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
|
|
model_name = "Addaci/mT5-small-experiment-13-checkpoint-2790" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) |
|
|
|
def correct_htr(raw_htr_text): |
|
|
|
inputs = tokenizer(raw_htr_text, return_tensors="pt") |
|
|
|
|
|
outputs = model.generate(**inputs) |
|
corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
return corrected_text |
|
|
|
def summarize_text(legal_text): |
|
|
|
inputs = tokenizer("summarize: " + legal_text, return_tensors="pt") |
|
|
|
|
|
outputs = model.generate(**inputs) |
|
summary = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
return summary |
|
|
|
def answer_question(legal_text, question): |
|
|
|
inputs = tokenizer(f"question: {question} context: {legal_text}", return_tensors="pt") |
|
|
|
|
|
outputs = model.generate(**inputs) |
|
answer = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
return answer |
|
|
|
|
|
iface = gr.Interface( |
|
fn=[correct_htr, summarize_text, answer_question], |
|
inputs=[ |
|
gr.Textbox(lines=5, placeholder="Enter raw HTR text here..."), |
|
gr.Textbox(lines=10, placeholder="Enter legal text to summarize..."), |
|
[gr.Textbox(lines=10, placeholder="Enter legal text..."), |
|
gr.Textbox(lines=2, placeholder="Enter your question...")] |
|
], |
|
outputs=[ |
|
gr.Textbox(lines=5, placeholder="Corrected HTR text"), |
|
gr.Textbox(lines=5, placeholder="Summary of legal text"), |
|
gr.Textbox(lines=5, placeholder="Answer to your question") |
|
], |
|
title="mT5 Legal Assistant", |
|
description="Use this tool to correct raw HTR, summarize legal texts, or answer questions about legal cases." |
|
) |
|
|
|
iface.launch() |