jgyasu commited on
Commit
960f419
·
verified ·
1 Parent(s): 7baf701

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -221
app.py CHANGED
@@ -29,232 +29,29 @@ from termcolor import colored
29
  import nltk
30
  from nltk.translate.bleu_score import sentence_bleu
31
  from transformers import BertTokenizer, BertModel
32
- import graphviz
33
  import gradio as gr
34
  from tree import generate_plot
35
  from paraphraser import generate_paraphrase
 
 
36
 
37
  nltk.download('stopwords')
38
 
39
 
40
- # Function to Find the Longest Common Substring Words Subsequence
41
- def longest_common_subss(original_sentence, paraphrased_sentences):
42
- stop_words = set(stopwords.words('english'))
43
- original_sentence_lower = original_sentence.lower()
44
- paraphrased_sentences_lower = [s.lower() for s in paraphrased_sentences]
45
- paraphrased_sentences_no_stopwords = []
46
-
47
- for sentence in paraphrased_sentences_lower:
48
- words = re.findall(r'\b\w+\b', sentence)
49
- filtered_sentence = ' '.join([word for word in words if word not in stop_words])
50
- paraphrased_sentences_no_stopwords.append(filtered_sentence)
51
-
52
- results = []
53
- for sentence in paraphrased_sentences_no_stopwords:
54
- common_words = set(original_sentence_lower.split()) & set(sentence.split())
55
- for word in common_words:
56
- sentence = sentence.replace(word, colored(word, 'green'))
57
- results.append({
58
- "Original Sentence": original_sentence_lower,
59
- "Paraphrased Sentence": sentence,
60
- "Substrings Word Pair": common_words
61
- })
62
- return results
63
-
64
- # Function to Find Common Substring Word between each paraphrase sentences
65
- def common_substring_word(original_sentence, paraphrased_sentences):
66
- stop_words = set(stopwords.words('english'))
67
- original_sentence_lower = original_sentence.lower()
68
- paraphrased_sentences_lower = [s.lower() for s in paraphrased_sentences]
69
- paraphrased_sentences_no_stopwords = []
70
-
71
- for sentence in paraphrased_sentences_lower:
72
- words = re.findall(r'\b\w+\b', sentence)
73
- filtered_sentence = ' '.join([word for word in words if word not in stop_words])
74
- paraphrased_sentences_no_stopwords.append(filtered_sentence)
75
-
76
- results = []
77
- for idx, sentence in enumerate(paraphrased_sentences_no_stopwords):
78
- common_words = set(original_sentence_lower.split()) & set(sentence.split())
79
- common_substrings = ', '.join(sorted(common_words))
80
- for word in common_words:
81
- sentence = sentence.replace(word, colored(word, 'green'))
82
- results.append({
83
- f"Paraphrased Sentence {idx+1}": sentence,
84
- "Common Substrings": common_substrings
85
- })
86
- return results
87
-
88
-
89
- import re
90
- from nltk.corpus import stopwords
91
-
92
- def find_common_subsequences(sentence, str_list):
93
- stop_words = set(stopwords.words('english'))
94
- sentence = sentence.lower()
95
-
96
- str_list = [s.lower() for s in str_list]
97
-
98
- def is_present(lcs, str_list):
99
- for string in str_list:
100
- if lcs not in string:
101
- return False
102
- return True
103
-
104
- def remove_stop_words_and_special_chars(sentence):
105
- sentence = re.sub(r'[^\w\s]', '', sentence)
106
- words = sentence.split()
107
- filtered_words = [word for word in words if word.lower() not in stop_words]
108
- return " ".join(filtered_words)
109
-
110
- sentence = remove_stop_words_and_special_chars(sentence)
111
- str_list = [remove_stop_words_and_special_chars(s) for s in str_list]
112
-
113
- words = sentence.split(" ")
114
- common_grams = []
115
- added_phrases = set()
116
-
117
- def is_covered(subseq, added_phrases):
118
- for phrase in added_phrases:
119
- if subseq in phrase:
120
- return True
121
- return False
122
-
123
- for i in range(len(words) - 4):
124
- penta = " ".join(words[i:i+5])
125
- if is_present(penta, str_list):
126
- common_grams.append(penta)
127
- added_phrases.add(penta)
128
-
129
- for i in range(len(words) - 3):
130
- quad = " ".join(words[i:i+4])
131
- if is_present(quad, str_list) and not is_covered(quad, added_phrases):
132
- common_grams.append(quad)
133
- added_phrases.add(quad)
134
-
135
- for i in range(len(words) - 2):
136
- tri = " ".join(words[i:i+3])
137
- if is_present(tri, str_list) and not is_covered(tri, added_phrases):
138
- common_grams.append(tri)
139
- added_phrases.add(tri)
140
-
141
- for i in range(len(words) - 1):
142
- bi = " ".join(words[i:i+2])
143
- if is_present(bi, str_list) and not is_covered(bi, added_phrases):
144
- common_grams.append(bi)
145
- added_phrases.add(bi)
146
-
147
- for i in range(len(words)):
148
- uni = words[i]
149
- if is_present(uni, str_list) and not is_covered(uni, added_phrases):
150
- common_grams.append(uni)
151
- added_phrases.add(uni)
152
-
153
- return common_grams
154
-
155
- def llm_output(prompt):
156
- return prompt, prompt
157
-
158
- def highlight_phrases_with_colors(sentences, phrases):
159
- color_map = {}
160
- color_index = 0
161
- highlighted_html = []
162
- idx = 1
163
- for sentence in sentences:
164
- sentence_with_idx = f"{idx}. {sentence}"
165
- idx += 1
166
- highlighted_sentence = sentence_with_idx
167
- phrase_count = 0
168
- words = re.findall(r'\b\w+\b', sentence)
169
- word_index = 1
170
- for phrase in phrases:
171
- if phrase not in color_map:
172
- color_map[phrase] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
173
- color_index += 1
174
- escaped_phrase = re.escape(phrase)
175
- pattern = rf'\b{escaped_phrase}\b'
176
- highlighted_sentence, num_replacements = re.subn(
177
- pattern,
178
- lambda m, count=phrase_count, color=color_map[phrase], index=word_index: (
179
- f'<span style="background-color: {color}; font-weight: bold;'
180
- f' padding: 2px 4px; border-radius: 2px; position: relative;">'
181
- f'<span style="background-color: black; color: white; border-radius: 50%;'
182
- f' padding: 2px 5px; margin-right: 5px;">{index}</span>'
183
- f'{m.group(0)}'
184
- f'</span>'
185
- ),
186
- highlighted_sentence,
187
- flags=re.IGNORECASE
188
- )
189
- if num_replacements > 0:
190
- phrase_count += 1
191
- word_index += 1
192
- highlighted_html.append(highlighted_sentence)
193
- final_html = "<br><br>".join(highlighted_html)
194
- return f'''
195
- <div style="border: solid 1px #; padding: 16px; background-color: #FFFFFF; color: #374151; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 2px;">
196
- <h3 style="margin-top: 0; font-size: 1em; color: #111827;">Paraphrased And Highlighted Text</h3>
197
- <div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 2px;">{final_html}</div>
198
- </div>
199
- '''
200
-
201
- import re
202
-
203
- def highlight_phrases_with_colors_single_sentence(sentence, phrases):
204
- color_map = {}
205
- color_index = 0
206
- highlighted_sentence = sentence
207
- phrase_count = 0
208
- words = re.findall(r'\b\w+\b', sentence)
209
- word_index = 1
210
-
211
- for phrase in phrases:
212
- if phrase not in color_map:
213
- color_map[phrase] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
214
- color_index += 1
215
- escaped_phrase = re.escape(phrase)
216
- pattern = rf'\b{escaped_phrase}\b'
217
- highlighted_sentence, num_replacements = re.subn(
218
- pattern,
219
- lambda m, count=phrase_count, color=color_map[phrase], index=word_index: (
220
- f'<span style="background-color: {color}; font-weight: bold;'
221
- f' padding: 2px 4px; border-radius: 2px; position: relative;">'
222
- f'<span style="background-color: black; color: white; border-radius: 50%;'
223
- f' padding: 2px 5px; margin-right: 5px;">{index}</span>'
224
- f'{m.group(0)}'
225
- f'</span>'
226
- ),
227
- highlighted_sentence,
228
- flags=re.IGNORECASE
229
- )
230
- if num_replacements > 0:
231
- phrase_count += 1
232
- word_index += 1
233
-
234
- final_html = highlighted_sentence
235
- return f'''
236
- <div style="border: solid 1px #; padding: 16px; background-color: #FFFFFF; color: #374151; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 2px;">
237
- <h3 style="margin-top: 0; font-size: 1em; color: #111827;">Selected Sentence</h3>
238
- <div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 2px;">{final_html}</div>
239
- </div>
240
- '''
241
-
242
-
243
  # Function for the Gradio interface
244
  def model(prompt):
245
- generated, sentence = llm_output(prompt)
246
- res = generate_paraphrase(sentence)
247
- common_subs = longest_common_subss(sentence, res)
248
- common_grams = find_common_subsequences(sentence, res)
249
- for i in range(len(common_subs)):
250
- common_subs[i]["Paraphrased Sentence"] = res[i]
251
- generated_highlighted = highlight_phrases_with_colors_single_sentence(generated, common_grams)
252
- result = highlight_phrases_with_colors(res, common_grams)
253
  tree = generate_plot(sentence)
254
- return generated, generated_highlighted, result, tree
255
 
256
- with gr.Blocks(theme = gr.themes.Monochrome()) as demo:
257
- gr.Markdown("# Paraphrases the Text and Highlights the Non-melting Points")
 
258
 
259
  with gr.Row():
260
  user_input = gr.Textbox(label="User Prompt")
@@ -263,21 +60,22 @@ with gr.Blocks(theme = gr.themes.Monochrome()) as demo:
263
  submit_button = gr.Button("Submit")
264
  clear_button = gr.Button("Clear")
265
 
266
- with gr.Row():
267
- ai_output = gr.Textbox(label="AI-generated Text (Llama3)")
268
-
269
  with gr.Row():
270
  selected_sentence = gr.HTML()
271
 
272
  with gr.Row():
273
  html_output = gr.HTML()
274
 
 
 
 
275
  with gr.Row():
276
  tree = gr.Plot()
277
 
278
- submit_button.click(model, inputs=user_input, outputs=[ai_output, selected_sentence, html_output, tree])
279
  clear_button.click(lambda: "", inputs=None, outputs=user_input)
280
- clear_button.click(lambda: "", inputs=None, outputs=[ai_output, selected_sentence, html_output, tree])
281
 
282
  # Launch the demo
283
- demo.launch(share=True)
 
 
29
  import nltk
30
  from nltk.translate.bleu_score import sentence_bleu
31
  from transformers import BertTokenizer, BertModel
 
32
  import gradio as gr
33
  from tree import generate_plot
34
  from paraphraser import generate_paraphrase
35
+ from lcs import find_common_subsequences
36
+ from highlighter import highlight_common_words
37
 
38
  nltk.download('stopwords')
39
 
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  # Function for the Gradio interface
42
  def model(prompt):
43
+ sentence = prompt
44
+ paraphrased_sentences = generate_paraphrase(sentence)
45
+ common_grams = find_common_subsequences(sentence, paraphrased_sentences)
46
+ highlighted_user_prompt = highlight_common_words(common_grams, [sentence]) # Pass the sentence as a list
47
+ highlighted_paraphrased_sentences = highlight_common_words(common_grams, paraphrased_sentences) # Fix parameter order
48
+ discarded_sentences = []
 
 
49
  tree = generate_plot(sentence)
50
+ return highlighted_user_prompt, highlighted_paraphrased_sentences, discarded_sentences, tree
51
 
52
+
53
+ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
54
+ gr.Markdown("# **AIISC Watermarking Model**")
55
 
56
  with gr.Row():
57
  user_input = gr.Textbox(label="User Prompt")
 
60
  submit_button = gr.Button("Submit")
61
  clear_button = gr.Button("Clear")
62
 
 
 
 
63
  with gr.Row():
64
  selected_sentence = gr.HTML()
65
 
66
  with gr.Row():
67
  html_output = gr.HTML()
68
 
69
+ with gr.Row():
70
+ discarded_sentences = gr.Textbox(label="Discarded Sentences")
71
+
72
  with gr.Row():
73
  tree = gr.Plot()
74
 
75
+ submit_button.click(model, inputs=user_input, outputs=[selected_sentence, html_output, discarded_sentences, tree])
76
  clear_button.click(lambda: "", inputs=None, outputs=user_input)
77
+ clear_button.click(lambda: "", inputs=None, outputs=[selected_sentence, html_output, discarded_sentences, tree])
78
 
79
  # Launch the demo
80
+ demo.launch(share=True)
81
+