jgyasu commited on
Commit
b265c4f
1 Parent(s): 02d0f22

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +522 -0
app.py ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """watermark_intern.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1SyerXj0c3UyLSYmdL4TBBzWhwvMJ3JwJ
8
+ """
9
+
10
+
11
+ import gradio as gr
12
+
13
+ # import streamlit as st
14
+ from transformers import AutoTokenizer
15
+ from transformers import AutoModelForSeq2SeqLM
16
+ import plotly.graph_objects as go
17
+ from transformers import pipeline
18
+ import re
19
+ import time
20
+ import requests
21
+ from PIL import Image
22
+ import itertools
23
+ import numpy as np
24
+ import matplotlib.pyplot as plt
25
+ import matplotlib
26
+ from matplotlib.colors import ListedColormap, rgb2hex
27
+ import ipywidgets as widgets
28
+ from IPython.display import display, HTML
29
+ import pandas as pd
30
+ from pprint import pprint
31
+ from tenacity import retry
32
+ from tqdm import tqdm
33
+ # import tiktoken
34
+ import scipy.stats
35
+ import torch
36
+ from transformers import GPT2LMHeadModel
37
+ import seaborn as sns
38
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
39
+ # from colorama import Fore, Style
40
+ # import openai
41
+ import random
42
+ from nltk.corpus import stopwords
43
+ from termcolor import colored
44
+ import nltk
45
+ from nltk.translate.bleu_score import sentence_bleu
46
+ from transformers import BertTokenizer, BertModel
47
+
48
+ import nltk
49
+ nltk.download('stopwords')
50
+
51
+ # Function to Initialize the Model
52
+ def init_model():
53
+ para_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
54
+ para_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
55
+ return para_tokenizer, para_model
56
+
57
+ # Function to Paraphrase the Text
58
+ def paraphrase(question, para_tokenizer, para_model, num_beams=5, num_beam_groups=5, num_return_sequences=5, repetition_penalty=10.0, diversity_penalty=3.0, no_repeat_ngram_size=2, temperature=0.7, max_length=64):
59
+ input_ids = para_tokenizer(
60
+ f'paraphrase: {question}',
61
+ return_tensors="pt", padding="longest",
62
+ max_length=max_length,
63
+ truncation=True,
64
+ ).input_ids
65
+ outputs = para_model.generate(
66
+ input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
67
+ num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
68
+ num_beams=num_beams, num_beam_groups=num_beam_groups,
69
+ max_length=max_length, diversity_penalty=diversity_penalty
70
+ )
71
+ res = para_tokenizer.batch_decode(outputs, skip_special_tokens=True)
72
+ return res
73
+
74
+ # Function to Find the Longest Common Substring Words Subsequence
75
+ def longest_common_subss(original_sentence, paraphrased_sentences):
76
+ stop_words = set(stopwords.words('english'))
77
+ original_sentence_lower = original_sentence.lower()
78
+ paraphrased_sentences_lower = [s.lower() for s in paraphrased_sentences]
79
+ paraphrased_sentences_no_stopwords = []
80
+
81
+ for sentence in paraphrased_sentences_lower:
82
+ words = re.findall(r'\b\w+\b', sentence)
83
+ filtered_sentence = ' '.join([word for word in words if word not in stop_words])
84
+ paraphrased_sentences_no_stopwords.append(filtered_sentence)
85
+
86
+ results = []
87
+ for sentence in paraphrased_sentences_no_stopwords:
88
+ common_words = set(original_sentence_lower.split()) & set(sentence.split())
89
+ for word in common_words:
90
+ sentence = sentence.replace(word, colored(word, 'green'))
91
+ results.append({
92
+ "Original Sentence": original_sentence_lower,
93
+ "Paraphrased Sentence": sentence,
94
+ "Substrings Word Pair": common_words
95
+ })
96
+ return results
97
+
98
+ # Function to Find Common Substring Word between each paraphrase sentences
99
+ def common_substring_word(original_sentence, paraphrased_sentences):
100
+ stop_words = set(stopwords.words('english'))
101
+ original_sentence_lower = original_sentence.lower()
102
+ paraphrased_sentences_lower = [s.lower() for s in paraphrased_sentences]
103
+ paraphrased_sentences_no_stopwords = []
104
+
105
+ for sentence in paraphrased_sentences_lower:
106
+ words = re.findall(r'\b\w+\b', sentence)
107
+ filtered_sentence = ' '.join([word for word in words if word not in stop_words])
108
+ paraphrased_sentences_no_stopwords.append(filtered_sentence)
109
+
110
+ results = []
111
+ for idx, sentence in enumerate(paraphrased_sentences_no_stopwords):
112
+ common_words = set(original_sentence_lower.split()) & set(sentence.split())
113
+ common_substrings = ', '.join(sorted(common_words))
114
+ for word in common_words:
115
+ sentence = sentence.replace(word, colored(word, 'green'))
116
+ results.append({
117
+ f"Paraphrased Sentence {idx+1}": sentence,
118
+ "Common Substrings": common_substrings
119
+ })
120
+ return results
121
+
122
+ # Function to Watermark a Word Take Randomly Between Each lcs Point (Random Sampling)
123
+ def random_sampling(original_sentence, paraphrased_sentences):
124
+ stop_words = set(stopwords.words('english'))
125
+ original_sentence_lower = original_sentence.lower()
126
+ paraphrased_sentences_lower = [s.lower() for s in paraphrased_sentences]
127
+ paraphrased_sentences_no_stopwords = []
128
+
129
+ for sentence in paraphrased_sentences_lower:
130
+ words = re.findall(r'\b\w+\b', sentence)
131
+ filtered_sentence = ' '.join([word for word in words if word not in stop_words])
132
+ paraphrased_sentences_no_stopwords.append(filtered_sentence)
133
+
134
+ results = []
135
+ for idx, sentence in enumerate(paraphrased_sentences_no_stopwords):
136
+ common_words = set(original_sentence_lower.split()) & set(sentence.split())
137
+ common_substrings = ', '.join(sorted(common_words))
138
+
139
+ words_to_replace = [word for word in sentence.split() if word not in common_words]
140
+ if words_to_replace:
141
+ word_to_mark = random.choice(words_to_replace)
142
+ sentence = sentence.replace(word_to_mark, colored(word_to_mark, 'red'))
143
+
144
+ for word in common_words:
145
+ sentence = sentence.replace(word, colored(word, 'green'))
146
+
147
+ results.append({
148
+ f"Paraphrased Sentence {idx+1}": sentence,
149
+ "Common Substrings": common_substrings
150
+ })
151
+ return results
152
+
153
+ # Function for Inverse Transform Sampling
154
+ def inverse_transform_sampling(original_sentence, paraphrased_sentences):
155
+ stop_words = set(stopwords.words('english'))
156
+ original_sentence_lower = original_sentence.lower()
157
+ paraphrased_sentences_lower = [s.lower() for s in paraphrased_sentences]
158
+ paraphrased_sentences_no_stopwords = []
159
+
160
+ for sentence in paraphrased_sentences_lower:
161
+ words = re.findall(r'\b\w+\b', sentence)
162
+ filtered_sentence = ' '.join([word for word in words if word not in stop_words])
163
+ paraphrased_sentences_no_stopwords.append(filtered_sentence)
164
+
165
+ results = []
166
+ for idx, sentence in enumerate(paraphrased_sentences_no_stopwords):
167
+ common_words = set(original_sentence_lower.split()) & set(sentence.split())
168
+ common_substrings = ', '.join(sorted(common_words))
169
+
170
+ words_to_replace = [word for word in sentence.split() if word not in common_words]
171
+ if words_to_replace:
172
+ probabilities = [1 / len(words_to_replace)] * len(words_to_replace)
173
+ chosen_word = random.choices(words_to_replace, weights=probabilities)[0]
174
+ sentence = sentence.replace(chosen_word, colored(chosen_word, 'magenta'))
175
+
176
+ for word in common_words:
177
+ sentence = sentence.replace(word, colored(word, 'green'))
178
+
179
+ results.append({
180
+ f"Paraphrased Sentence {idx+1}": sentence,
181
+ "Common Substrings": common_substrings
182
+ })
183
+ return results
184
+
185
+ # Function for Contextual Sampling
186
+ def contextual_sampling(original_sentence, paraphrased_sentences):
187
+ stop_words = set(stopwords.words('english'))
188
+ original_sentence_lower = original_sentence.lower()
189
+ paraphrased_sentences_lower = [s.lower() for s in paraphrased_sentences]
190
+ paraphrased_sentences_no_stopwords = []
191
+
192
+ for sentence in paraphrased_sentences_lower:
193
+ words = re.findall(r'\b\w+\b', sentence)
194
+ filtered_sentence = ' '.join([word for word in words if word not in stop_words])
195
+ paraphrased_sentences_no_stopwords.append(filtered_sentence)
196
+
197
+ results = []
198
+ for idx, sentence in enumerate(paraphrased_sentences_no_stopwords):
199
+ common_words = set(original_sentence_lower.split()) & set(sentence.split())
200
+ common_substrings = ', '.join(sorted(common_words))
201
+
202
+ words_to_replace = [word for word in sentence.split() if word not in common_words]
203
+ if words_to_replace:
204
+ context = " ".join([word for word in sentence.split() if word not in common_words])
205
+ chosen_word = random.choice(words_to_replace)
206
+ sentence = sentence.replace(chosen_word, colored(chosen_word, 'red'))
207
+
208
+ for word in common_words:
209
+ sentence = sentence.replace(word, colored(word, 'green'))
210
+
211
+ results.append({
212
+ f"Paraphrased Sentence {idx+1}": sentence,
213
+ "Common Substrings": common_substrings
214
+ })
215
+ return results
216
+
217
+ # Function for Exponential Minimum Sampling
218
+ def exponential_minimum_sampling(original_sentence, paraphrased_sentences):
219
+ stop_words = set(stopwords.words('english'))
220
+ original_sentence_lower = original_sentence.lower()
221
+ paraphrased_sentences_lower = [s.lower() for s in paraphrased_sentences]
222
+ paraphrased_sentences_no_stopwords = []
223
+
224
+ for sentence in paraphrased_sentences_lower:
225
+ words = re.findall(r'\b\w+\b', sentence)
226
+ filtered_sentence = ' '.join([word for word in words if word not in stop_words])
227
+ paraphrased_sentences_no_stopwords.append(filtered_sentence)
228
+
229
+ results = []
230
+ for idx, sentence in enumerate(paraphrased_sentences_no_stopwords):
231
+ common_words = set(original_sentence_lower.split()) & set(sentence.split())
232
+ common_substrings = ', '.join(sorted(common_words))
233
+
234
+ words_to_replace = [word for word in sentence.split() if word not in common_words]
235
+ if words_to_replace:
236
+ num_words = len(words_to_replace)
237
+ probabilities = [2 ** (-i) for i in range(num_words)]
238
+ chosen_word = random.choices(words_to_replace, weights=probabilities)[0]
239
+ sentence = sentence.replace(chosen_word, colored(chosen_word, 'red'))
240
+
241
+ for word in common_words:
242
+ sentence = sentence.replace(word, colored(word, 'green'))
243
+
244
+ results.append({
245
+ f"Paraphrased Sentence {idx+1}": sentence,
246
+ "Common Substrings": common_substrings
247
+ })
248
+ return results
249
+
250
+ # Function to Calculate the BLEU score
251
+ def calculate_bleu(reference, candidate):
252
+ return sentence_bleu([reference], candidate)
253
+
254
+ # Function to calculate BERT score
255
+ def calculate_bert(reference, candidate):
256
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
257
+ model = BertModel.from_pretrained('bert-base-uncased')
258
+
259
+ reference_tokens = tokenizer.tokenize(reference)
260
+ candidate_tokens = tokenizer.tokenize(candidate)
261
+
262
+ reference_ids = tokenizer.encode(reference, add_special_tokens=True, max_length=512, truncation=True, return_tensors="pt")
263
+ candidate_ids = tokenizer.encode(candidate, add_special_tokens=True, max_length=512, truncation=True, return_tensors="pt")
264
+
265
+ with torch.no_grad():
266
+ reference_outputs = model(reference_ids)
267
+ candidate_outputs = model(candidate_ids)
268
+
269
+ reference_embeddings = reference_outputs[0][:, 0, :].numpy()
270
+ candidate_embeddings = candidate_outputs[0][:, 0, :].numpy()
271
+
272
+ cosine_similarity = np.dot(reference_embeddings, candidate_embeddings.T) / (np.linalg.norm(reference_embeddings) * np.linalg.norm(candidate_embeddings))
273
+ return np.mean(cosine_similarity)
274
+
275
+ # Function to calculate minimum edit distance
276
+ def min_edit_distance(reference, candidate):
277
+ m = len(reference)
278
+ n = len(candidate)
279
+
280
+ dp = [[0] * (n + 1) for _ in range(m + 1)]
281
+
282
+ for i in range(m + 1):
283
+ for j in range(n + 1):
284
+ if i == 0:
285
+ dp[i][j] = j
286
+ elif j == 0:
287
+ dp[i][j] = i
288
+ elif reference[i - 1] == candidate[j - 1]:
289
+ dp[i][j] = dp[i - 1][j - 1]
290
+ else:
291
+ dp[i][j] = 1 + min(dp[i][j - 1], # Insert
292
+ dp[i - 1][j], # Remove
293
+ dp[i - 1][j - 1]) # Replace
294
+
295
+ return dp[m][n]
296
+
297
+ def generate_paraphrase(question):
298
+ para_tokenizer, para_model = init_model()
299
+ res = paraphrase(question, para_tokenizer, para_model)
300
+ return res
301
+
302
+ # question = "The official position of the United States on the Russia Ukraine war has been consistent in supporting Ukraine ’s sovereignty , territorial integrity, and the peaceful resolution of the conflict."
303
+
304
+ question = "Following the declaration of the State of Israel in 1948, neighboring Arab states invaded. The war ended with Israel controlling a significant portion of the territory. Many Palestinians became refugees."
305
+
306
+ res = generate_paraphrase(question)
307
+
308
+ res
309
+
310
+ longest_common_subss(question, res)
311
+
312
+ import nltk
313
+ nltk.download('punkt')
314
+
315
+ import re
316
+ from nltk.corpus import stopwords
317
+ from nltk.tokenize import word_tokenize
318
+
319
+ def non_melting_points(original_sentence, paraphrased_sentences):
320
+ stop_words = set(stopwords.words('english'))
321
+
322
+ def tokenize_and_filter(sentence):
323
+ words = word_tokenize(sentence.lower())
324
+ filtered_words = {word for word in words if word.isalpha() and word not in stop_words}
325
+ return filtered_words
326
+ original_words = tokenize_and_filter(original_sentence)
327
+ paraphrased_words_list = [tokenize_and_filter(sentence) for sentence in paraphrased_sentences]
328
+ common_words = original_words
329
+ for words in paraphrased_words_list:
330
+ common_words &= words
331
+ return common_words
332
+
333
+ #Function to get the first sentence from a paragraph
334
+
335
+ import re
336
+
337
+ def get_first_sentence(paragraph):
338
+ match = re.search(r'([^.]*\.[\s]*[A-Z])', paragraph)
339
+ if match:
340
+ first_sentence = match.group(0)
341
+ first_sentence = first_sentence.strip()
342
+ if len(first_sentence.split('.')) > 1:
343
+ return first_sentence.split('.')[0] + '.'
344
+ return first_sentence
345
+ else:
346
+ return paragraph
347
+
348
+
349
+ #Initializing llama3
350
+
351
+ # import json
352
+ # import torch
353
+ # from transformers import (AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline)
354
+
355
+ # config_data = json.load(open("config.json"))
356
+ # HF_TOKEN = config_data["HF_TOKEN"]
357
+
358
+ # model_name = "meta-llama/Meta-Llama-3-8B"
359
+
360
+ # bnb_config = BitsAndBytesConfig(
361
+ # load_in_4bit=True,
362
+ # bnb_4bit_use_double_quant=True,
363
+ # bnb_4bit_quant_type="nf4",
364
+ # bnb_4bit_compute_dtype=torch.bfloat16
365
+ # )
366
+
367
+ # tokenizer = AutoTokenizer.from_pretrained(model_name, token=HF_TOKEN)
368
+ # tokenizer.pad_token = tokenizer.eos_token
369
+
370
+ # model = AutoModelForCausalLM.from_pretrained(
371
+ # model_name,
372
+ # device_map="auto",
373
+ # quantization_config=bnb_config,
374
+ # token=HF_TOKEN
375
+ # )
376
+
377
+ # text_generator = pipeline(
378
+ # "text-generation",
379
+ # model=model,
380
+ # tokenizer=tokenizer,
381
+ # max_new_tokens=512,
382
+ # )
383
+
384
+ # # llm_result = text_generator("write about nazism")
385
+
386
+ # llm_result
387
+
388
+ # llm_result[0]["generated_text"].split('.')
389
+
390
+
391
+ #Finds LCS
392
+
393
+ import re
394
+ from nltk.corpus import stopwords
395
+
396
+ def find_common_subsequences(sentence, str_list):
397
+ stop_words = set(stopwords.words('english'))
398
+ sentence = sentence.lower()
399
+
400
+ str_list = [s.lower() for s in str_list]
401
+
402
+ def is_present(lcs, str_list):
403
+ for string in str_list:
404
+ if lcs not in string:
405
+ return False
406
+ return True
407
+
408
+ def remove_stop_words_and_special_chars(sentence):
409
+ sentence = re.sub(r'[^\w\s]', '', sentence)
410
+ words = sentence.split()
411
+ filtered_words = [word for word in words if word.lower() not in stop_words]
412
+ return " ".join(filtered_words)
413
+
414
+ sentence = remove_stop_words_and_special_chars(sentence)
415
+ str_list = [remove_stop_words_and_special_chars(s) for s in str_list]
416
+
417
+ words = sentence.split(" ")
418
+ common_grams = []
419
+ added_phrases = set()
420
+
421
+ def is_covered(subseq, added_phrases):
422
+ for phrase in added_phrases:
423
+ if subseq in phrase:
424
+ return True
425
+ return False
426
+
427
+ for i in range(len(words) - 4):
428
+ penta = " ".join(words[i:i+5])
429
+ if is_present(penta, str_list):
430
+ common_grams.append(penta)
431
+ added_phrases.add(penta)
432
+
433
+ for i in range(len(words) - 3):
434
+ quad = " ".join(words[i:i+4])
435
+ if is_present(quad, str_list) and not is_covered(quad, added_phrases):
436
+ common_grams.append(quad)
437
+ added_phrases.add(quad)
438
+
439
+ for i in range(len(words) - 2):
440
+ tri = " ".join(words[i:i+3])
441
+ if is_present(tri, str_list) and not is_covered(tri, added_phrases):
442
+ common_grams.append(tri)
443
+ added_phrases.add(tri)
444
+
445
+ for i in range(len(words) - 1):
446
+ bi = " ".join(words[i:i+2])
447
+ if is_present(bi, str_list) and not is_covered(bi, added_phrases):
448
+ common_grams.append(bi)
449
+ added_phrases.add(bi)
450
+
451
+ for i in range(len(words)):
452
+ uni = words[i]
453
+ if is_present(uni, str_list) and not is_covered(uni, added_phrases):
454
+ common_grams.append(uni)
455
+ added_phrases.add(uni)
456
+
457
+ return common_grams
458
+
459
+ question = '''the colorado republican party sent a mass email last week with the subject line "god hates pride"'''
460
+ res = generate_paraphrase(question)
461
+
462
+ res
463
+
464
+ common_grams = find_common_subsequences(question, res[0:3])
465
+ common_grams
466
+
467
+ common_gram_words = [word for gram in common_grams for word in gram.split()]
468
+ common_gram_words
469
+
470
+ import re
471
+
472
+ def llm_output(prompt):
473
+ # sequences = text_generator(prompt)
474
+ # gen_text = sequences[0]["generated_text"]
475
+ # sentences = gen_text.split('.')
476
+ # # first_sentence = get_first_sentence(gen_text[len(prompt):])
477
+ # return gen_text,sentences[-3]
478
+ return prompt,prompt
479
+
480
+ import re
481
+
482
+ def generate_html_output(results,common_grams,common_gram_words):
483
+ html_output = "<table border='1'>"
484
+ html_output += "<tr><th>Original Sentence</th><th>Paraphrased Sentence</th><th>Common Substrings</th><th>Non Melting Points</th></tr>"
485
+
486
+ for result in results:
487
+ original_sentence = result[f"Original Sentence"]
488
+ paraphrased_sentence = result[f"Paraphrased Sentence"]
489
+ common_substrings = result[f"Substrings Word Pair"]
490
+ # Highlight common substrings in the paraphrased sentence
491
+ for word in common_gram_words:
492
+ paraphrased_sentence = re.sub(r'\b' + re.escape(word) + r'\b', f'<span style="color:green">{word}</span>', paraphrased_sentence, flags=re.IGNORECASE)
493
+ html_output += f"<tr><td>{original_sentence}</td><td>{paraphrased_sentence}</td><td>{common_substrings}</td><td>{common_grams}</td></tr>"
494
+ html_output += "</table>"
495
+ return html_output
496
+
497
+
498
+ def model(prompt):
499
+ generated,sentence = llm_output(prompt)
500
+ res = generate_paraphrase(sentence)
501
+ common_subs = longest_common_subss(sentence,res)
502
+ non_melting = non_melting_points(sentence, res)
503
+ common_grams = find_common_subsequences(sentence,res)
504
+ common_gram_words = [word for gram in common_grams for word in gram.split()]
505
+ for i in range(len(common_subs)):
506
+ common_subs[i]["Paraphrased Sentence"] = res[i]
507
+ result = generate_html_output(common_subs,common_grams,common_gram_words)
508
+ return generated, result
509
+
510
+ # final = model(question)
511
+
512
+ import gradio as gr
513
+
514
+ demo = gr.Interface(
515
+ fn=model,
516
+ inputs=gr.Textbox(label="User Prompt"),
517
+ outputs=[gr.Textbox(label="AI-generated Text (Llama3)"), gr.HTML()],
518
+ title="Paraphrases the Text and Highlights the Non-melting Points",
519
+ theme=gr.themes.Soft()
520
+ )
521
+
522
+ demo.launch(share=True)