Addaci commited on
Commit
fb6b907
1 Parent(s): 77122ee

Update app.py (change to MT5Tokenizer; add button boxes and bold to button text)

Browse files
Files changed (1) hide show
  1. app.py +37 -20
app.py CHANGED
@@ -1,40 +1,55 @@
1
  import os
2
  import gradio as gr
3
- from transformers import T5Tokenizer, T5ForConditionalGeneration
4
 
5
  # Load your fine-tuned mT5 model
6
  model_name = "Addaci/mT5-small-experiment-13-checkpoint-2790"
7
- tokenizer = T5Tokenizer.from_pretrained(model_name)
8
- model = T5ForConditionalGeneration.from_pretrained(model_name)
9
 
10
  def correct_htr(raw_htr_text):
11
- inputs = tokenizer(raw_htr_text, return_tensors="pt", max_length=512, truncation=True)
12
- outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
13
- corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
14
- return corrected_text
 
 
 
15
 
16
  def summarize_text(legal_text):
17
- inputs = tokenizer("summarize: " + legal_text, return_tensors="pt", max_length=512, truncation=True)
18
- outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
19
- summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
20
- return summary
 
 
 
21
 
22
  def answer_question(legal_text, question):
23
- formatted_input = f"question: {question} context: {legal_text}"
24
- inputs = tokenizer(formatted_input, return_tensors="pt", max_length=512, truncation=True)
25
- outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
26
- answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
27
- return answer
 
 
 
28
 
29
- # Create the Gradio Blocks interface
30
  with gr.Blocks() as demo:
31
  gr.Markdown("# mT5 Legal Assistant")
32
  gr.Markdown("Use this tool to correct raw HTR, summarize legal texts, or answer questions about legal cases.")
33
 
34
- # Adding external link buttons at the top
35
  with gr.Row():
36
- gr.HTML('<a href="http://www.marinelives.org/wiki/Tools:_Admiralty_court_legal_glossary" target="_blank"><button>Admiralty Court Legal Glossary</button></a>')
37
- gr.HTML('<a href="https://raw.githubusercontent.com/Addaci/HCA/refs/heads/main/HCA_13_70_Full_Volume_Processed_Text_EDITED_Ver.1.2_18062024.txt" target="_blank"><button>HCA 13/70 Ground Truth (1654-55)</button></a>')
 
 
 
 
 
 
38
 
39
  with gr.Tab("Correct HTR"):
40
  gr.Markdown("### Correct Raw HTR Text")
@@ -70,4 +85,6 @@ with gr.Blocks() as demo:
70
  # Launch the Gradio interface
71
  demo.launch()
72
 
 
 
73
 
 
1
  import os
2
  import gradio as gr
3
+ from transformers import MT5Tokenizer, MT5ForConditionalGeneration
4
 
5
  # Load your fine-tuned mT5 model
6
  model_name = "Addaci/mT5-small-experiment-13-checkpoint-2790"
7
+ tokenizer = MT5Tokenizer.from_pretrained(model_name)
8
+ model = MT5ForConditionalGeneration.from_pretrained(model_name)
9
 
10
  def correct_htr(raw_htr_text):
11
+ try:
12
+ inputs = tokenizer(raw_htr_text, return_tensors="pt", max_length=512, truncation=True)
13
+ outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
14
+ corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
15
+ return corrected_text
16
+ except Exception as e:
17
+ return str(e)
18
 
19
  def summarize_text(legal_text):
20
+ try:
21
+ inputs = tokenizer("summarize: " + legal_text, return_tensors="pt", max_length=512, truncation=True)
22
+ outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
23
+ summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
24
+ return summary
25
+ except Exception as e:
26
+ return str(e)
27
 
28
  def answer_question(legal_text, question):
29
+ try:
30
+ formatted_input = f"question: {question} context: {legal_text}"
31
+ inputs = tokenizer(formatted_input, return_tensors="pt", max_length=512, truncation=True)
32
+ outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
33
+ answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
+ return answer
35
+ except Exception as e:
36
+ return str(e)
37
 
38
+ # Create the Gradio Blocks interface with boxed clickable buttons and bold text
39
  with gr.Blocks() as demo:
40
  gr.Markdown("# mT5 Legal Assistant")
41
  gr.Markdown("Use this tool to correct raw HTR, summarize legal texts, or answer questions about legal cases.")
42
 
43
+ # Adding external link buttons with a box around them and bold text
44
  with gr.Row():
45
+ gr.HTML('''<div style="border: 2px solid black; padding: 10px; display: inline-block;">
46
+ <a href="http://www.marinelives.org/wiki/Tools:_Admiralty_court_legal_glossary" target="_blank">
47
+ <button style="font-weight:bold;">Admiralty Court Legal Glossary</button>
48
+ </a>
49
+ <a href="https://raw.githubusercontent.com/Addaci/HCA/refs/heads/main/HCA_13_70_Full_Volume_Processed_Text_EDITED_Ver.1.2_18062024.txt" target="_blank">
50
+ <button style="font-weight:bold;">HCA 13/70 Ground Truth (1654-55)</button>
51
+ </a>
52
+ </div>''')
53
 
54
  with gr.Tab("Correct HTR"):
55
  gr.Markdown("### Correct Raw HTR Text")
 
85
  # Launch the Gradio interface
86
  demo.launch()
87
 
88
+
89
+
90