chinhon commited on
Commit
b3118cd
1 Parent(s): 9967432

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import re
3
+
4
+ from transformers import (
5
+ AutoTokenizer,
6
+ AutoModelForSeq2SeqLM
7
+ )
8
+
9
+ def clean_text(text):
10
+ text = text.encode("ascii", errors="ignore").decode(
11
+ "ascii"
12
+ ) # remove non-ascii, Chinese characters
13
+ text = re.sub(r"\n", " ", text)
14
+ text = re.sub(r"\n\n", " ", text)
15
+ text = re.sub(r"\t", " ", text)
16
+ text = text.strip(" ")
17
+ text = re.sub(
18
+ " +", " ", text
19
+ ).strip() # get rid of multiple spaces and replace with a single
20
+ return text
21
+
22
+
23
+ modchoice_1 = "chinhon/bart-large-cnn-summarizer_03"
24
+
25
+ def summarizer1(text):
26
+ input_text = clean_text(text)
27
+
28
+ tokenizer_1 = AutoTokenizer.from_pretrained(modchoice_1)
29
+
30
+ model_1 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_1)
31
+
32
+ with tokenizer_1.as_target_tokenizer():
33
+ batch = tokenizer_1(
34
+ input_text, truncation=True, padding="longest", return_tensors="pt"
35
+ )
36
+
37
+ raw_1 = model_1.generate(**batch)
38
+
39
+ summary_1 = tokenizer_1.batch_decode(raw_1, skip_special_tokens=True)
40
+
41
+ summed_1 = summary_1[0]
42
+
43
+ lines1 = summed_1.split(". ")
44
+
45
+ for i in range(len(lines1)):
46
+ lines1[i] = "* " + lines1[i]
47
+
48
+ summ_bullet1 = "\n".join(lines1)
49
+
50
+ return summ_bullet1
51
+
52
+
53
+ summary1 = gr.Interface(
54
+ fn=summarizer1, inputs=gr.inputs.Textbox(), outputs=gr.outputs.Textbox(label="")
55
+ )
56
+
57
+
58
+ modchoice_2 = (
59
+ "chinhon/pegasus-newsroom-summarizer_02"
60
+ )
61
+
62
+ def summarizer2(text):
63
+ input_text = clean_text(text)
64
+
65
+ tokenizer_2 = AutoTokenizer.from_pretrained(modchoice_2)
66
+
67
+ model_2 = AutoModelForSeq2SeqLM.from_pretrained(modchoice_2)
68
+
69
+ with tokenizer_2.as_target_tokenizer():
70
+ batch = tokenizer_2(
71
+ input_text, truncation=True, padding="longest", return_tensors="pt"
72
+ )
73
+
74
+ raw_2 = model_2.generate(**batch)
75
+
76
+ summary_2 = tokenizer_2.batch_decode(raw_2, skip_special_tokens=True)
77
+
78
+ summed_2 = summary_2[0]
79
+
80
+ lines2 = summed_2.split(". ")
81
+
82
+ for i in range(len(lines2)):
83
+ lines2[i] = "* " + lines2[i]
84
+
85
+ summ_bullet2 = "\n".join(lines2)
86
+
87
+ return summ_bullet2
88
+
89
+ summary2 = gr.Interface(
90
+ fn=summarizer2, inputs=gr.inputs.Textbox(), outputs=gr.outputs.Textbox(label="")
91
+ )
92
+
93
+
94
+ gradio_ui = gr.Parallel(
95
+ summary1,
96
+ summary2,
97
+ title="Compare 2 AI Summarizers",
98
+ inputs=gr.inputs.Textbox(
99
+ lines=20,
100
+ label="Paste your news story here, and choose from 2 suggested summaries",
101
+ ),
102
+ theme="huggingface",
103
+ )
104
+
105
+ gradio_ui.launch(enable_queue=True)