Towhidul commited on
Commit
f7ce7f1
1 Parent(s): 41d8e3a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -0
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import plotly.graph_objects as go
3
+ from transformers import pipeline
4
+ import re
5
+ import time
6
+ import requests
7
+ from PIL import Image
8
+ import itertools
9
+ import numpy as np
10
+ import matplotlib.pyplot as plt
11
+ from matplotlib.colors import rgb2hex
12
+ import matplotlib
13
+ from matplotlib.colors import ListedColormap, rgb2hex
14
+ import ipywidgets as widgets
15
+ from IPython.display import display, HTML
16
+ import re
17
+ import pandas as pd
18
+ from pprint import pprint
19
+ from tenacity import retry
20
+ from tqdm import tqdm
21
+ # import tiktoken
22
+ import scipy.stats
23
+ import torch
24
+ from transformers import GPT2LMHeadModel
25
+ # import tiktoken
26
+ import seaborn as sns
27
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
28
+ from colorama import Fore, Style
29
+ # import openai
30
+
31
+
32
+ para_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
33
+ para_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
34
+
35
+ def paraphrase(
36
+ question,
37
+ num_beams=5,
38
+ num_beam_groups=5,
39
+ num_return_sequences=5,
40
+ repetition_penalty=10.0,
41
+ diversity_penalty=3.0,
42
+ no_repeat_ngram_size=2,
43
+ temperature=0.7,
44
+ max_length=64 #128
45
+ ):
46
+ input_ids = para_tokenizer(
47
+ f'paraphrase: {question}',
48
+ return_tensors="pt", padding="longest",
49
+ max_length=max_length,
50
+ truncation=True,
51
+ ).input_ids
52
+
53
+ outputs = para_model.generate(
54
+ input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
55
+ num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
56
+ num_beams=num_beams, num_beam_groups=num_beam_groups,
57
+ max_length=max_length, diversity_penalty=diversity_penalty
58
+ )
59
+
60
+ res = para_tokenizer.batch_decode(outputs, skip_special_tokens=True)
61
+
62
+ return res
63
+
64
+
65
+ def find_longest_common_sequences(main_sentence, paraphrases):
66
+ main_tokens = main_sentence.split()
67
+ common_sequences = set()
68
+
69
+ for paraphrase in paraphrases:
70
+ paraphrase_tokens = paraphrase.split()
71
+ for i in range(len(main_tokens)):
72
+ for j in range(len(paraphrase_tokens)):
73
+ # Start comparing pairs of words
74
+ m = i
75
+ n = j
76
+ while m < len(main_tokens) and n < len(paraphrase_tokens) and main_tokens[m] == paraphrase_tokens[n]:
77
+ m += 1
78
+ n += 1
79
+ # If we found a longer common sequence, update it
80
+ if m - i > 1:
81
+ sequence = ' '.join(main_tokens[i:m])
82
+ is_subsequence = any(sequence in existing_seq for existing_seq in common_sequences)
83
+ if not is_subsequence:
84
+ common_sequences.add(sequence)
85
+
86
+ return sorted(common_sequences, key=len, reverse=True)
87
+
88
+ # Extracting longest common sequences
89
+ longest_common_sequences = find_longest_common_sequences(main_sentence, paraphrases)
90
+
91
+ # Assigning colors to different sequences
92
+ color_palette = [Fore.RED, Fore.GREEN, Fore.BLUE, Fore.MAGENTA, Fore.CYAN]
93
+ highlighted_sentence = main_sentence
94
+ for i, sequence in enumerate(longest_common_sequences):
95
+ color = color_palette[i % len(color_palette)]
96
+ highlighted_sentence = highlighted_sentence.replace(sequence, f"{color}{sequence}{Style.RESET_ALL}")
97
+
98
+
99
+
100
+ prompt_list=["The official position of the United States on the Russia-Ukraine war has been consistent in supporting Ukraine's sovereignty, territorial integrity, and the peaceful resolution of the conflict."
101
+ ]
102
+
103
+ options = [f"Prompt #{i+1}: {prompt_list[i]}" for i in range(3)] + ["Another Prompt..."]
104
+ selection = st.selectbox("Choose a prompt from the dropdown below . Click on :blue['Another Prompt...'] , if you want to enter your own custom prompt.", options=options)
105
+ check=[]
106
+
107
+ if selection == "Another Prompt...":
108
+ check = st.text_input("Enter your custom prompt...")
109
+ check = " " + check
110
+ if check:
111
+ st.caption(f""":white_check_mark: Your input prompt is : {check}""")
112
+ st.caption(':green[Kindly hold on for a few minutes while the AI text is being generated]')
113
+
114
+ else:
115
+ check = re.split(r'#\d+:', selection, 1)[1]
116
+ if check:
117
+ st.caption(f""":white_check_mark: Your input prompt is : {check}""")
118
+ st.caption(':green[Kindly hold on for a few minutes while the Paraphrase texts are being generated]')
119
+
120
+
121
+ main_sentence = check
122
+
123
+ st.write("Main Sentence:")
124
+ st.write(main_sentence)
125
+
126
+ # Generate paraphrases
127
+ paraphrases = paraphrase(main_sentence)
128
+
129
+ # Display paraphrases with numbers
130
+ st.write("Paraphrases:")
131
+ for i, para in enumerate(paraphrases, 1):
132
+ st.write(f"Paraphrase {i}:")
133
+ st.write(para)
134
+
135
+
136
+ # Displaying the main sentence with highlighted longest common sequences
137
+ st.text("Main sentence with highlighted longest common sequences:")
138
+ st.text(highlighted_sentence)