awacke1 commited on
Commit
c057548
1 Parent(s): 7108100

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +190 -39
app.py CHANGED
@@ -9,57 +9,211 @@ import pandas as pd
9
  import faiss
10
  import datetime
11
  import time
12
- import random # Import random library
13
- from streamlit.components.v1 import html as html_component # Import html component
14
-
15
- # Add a list of random subjects
16
- random_subjects = [
17
- "Computer Science",
18
- "Physics",
19
- "Chemistry",
20
- "Biology",
21
- "History",
22
- "Mathematics",
23
- "Geography",
24
- "Art",
25
- "Music",
26
- "Literature",
27
- ]
28
-
29
- # ... (rest of the code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  def main():
32
  st.title("Streamlit Chat")
33
 
34
  name = st.text_input("Enter your name")
35
  message = st.text_input("Enter a topic to share from Wikipedia")
36
-
37
- # If no input is provided, select a random subject
38
- if not message:
39
- message = random.choice(random_subjects)
40
-
41
  if st.button("Submit"):
42
 
43
  # wiki
44
  df = get_wiki_summaryDF(message)
45
 
46
  save_message(name, message)
47
- save_message(name, df.to_string()) # Convert DataFrame to string before saving
48
 
49
  st.text("Message sent!")
50
 
 
51
  st.text("Chat history:")
52
-
53
- # Display chat history in a data grid
54
  with open("chat.txt", "a+") as f:
55
  f.seek(0)
56
  chat_history = f.read()
57
-
58
- # Wrap text in the chat history
59
- wrapped_chat_history = "<br>".join(chat_history.split("\n"))
60
-
61
- # Render chat history using the html component
62
- html_component(f"<pre style='white-space: pre-wrap;'>{wrapped_chat_history}</pre>")
63
 
64
  countdown = st.empty()
65
  t = 60
@@ -73,16 +227,13 @@ def main():
73
  with open("chat.txt", "a+") as f:
74
  f.seek(0)
75
  chat_history = f.read()
76
-
77
- # Wrap text in the chat history
78
- wrapped_chat_history = "<br>".join(chat_history.split("\n"))
79
-
80
- # Render chat history using the html component
81
- html_component(f"<pre style='white-space: pre-wrap;'>{wrapped_chat_history}</pre>")
82
 
83
  press_release()
84
 
85
- t = 60
86
 
87
  if __name__ == "__main__":
88
  main()
 
 
9
  import faiss
10
  import datetime
11
  import time
12
+
13
+
14
+ try:
15
+ nlp = spacy.load("en_core_web_sm")
16
+ except:
17
+ spacy.cli.download("en_core_web_sm")
18
+ nlp = spacy.load("en_core_web_sm")
19
+
20
+ wh_words = ['what', 'who', 'how', 'when', 'which']
21
+
22
+ def get_concepts(text):
23
+ text = text.lower()
24
+ doc = nlp(text)
25
+ concepts = []
26
+ for chunk in doc.noun_chunks:
27
+ if chunk.text not in wh_words:
28
+ concepts.append(chunk.text)
29
+ return concepts
30
+
31
+ def get_passages(text, k=100):
32
+ doc = nlp(text)
33
+ passages = []
34
+ passage_len = 0
35
+ passage = ""
36
+ sents = list(doc.sents)
37
+ for i in range(len(sents)):
38
+ sen = sents[i]
39
+ passage_len += len(sen)
40
+ if passage_len >= k:
41
+ passages.append(passage)
42
+ passage = sen.text
43
+ passage_len = len(sen)
44
+ continue
45
+ elif i == (len(sents) - 1):
46
+ passage += " " + sen.text
47
+ passages.append(passage)
48
+ passage = ""
49
+ passage_len = 0
50
+ continue
51
+ passage += " " + sen.text
52
+ return passages
53
+
54
+ def get_dicts_for_dpr(concepts, n_results=20, k=100):
55
+ dicts = []
56
+ for concept in concepts:
57
+ wikis = wikipedia.search(concept, results=n_results)
58
+ st.write(f"{concept} No of Wikis: {len(wikis)}")
59
+ for wiki in wikis:
60
+ try:
61
+ html_page = wikipedia.page(title=wiki, auto_suggest=False)
62
+ except DisambiguationError:
63
+ continue
64
+ htmlResults = html_page.content
65
+ passages = get_passages(htmlResults, k=k)
66
+ for passage in passages:
67
+ i_dicts = {}
68
+ i_dicts['text'] = passage
69
+ i_dicts['title'] = wiki
70
+ dicts.append(i_dicts)
71
+ return dicts
72
+
73
+ passage_encoder = TFAutoModel.from_pretrained("nlpconnect/dpr-ctx_encoder_bert_uncased_L-2_H-128_A-2")
74
+ query_encoder = TFAutoModel.from_pretrained("nlpconnect/dpr-question_encoder_bert_uncased_L-2_H-128_A-2")
75
+ p_tokenizer = AutoTokenizer.from_pretrained("nlpconnect/dpr-ctx_encoder_bert_uncased_L-2_H-128_A-2")
76
+ q_tokenizer = AutoTokenizer.from_pretrained("nlpconnect/dpr-question_encoder_bert_uncased_L-2_H-128_A-2")
77
+
78
+ def get_title_text_combined(passage_dicts):
79
+ res = []
80
+ for p in passage_dicts:
81
+ res.append(tuple((p['title'], p['text'])))
82
+ return res
83
+
84
+ def extracted_passage_embeddings(processed_passages, max_length=156):
85
+ passage_inputs = p_tokenizer.batch_encode_plus(
86
+ processed_passages,
87
+ add_special_tokens=True,
88
+ truncation=True,
89
+ padding="max_length",
90
+ max_length=max_length,
91
+ return_token_type_ids=True
92
+ )
93
+ passage_embeddings = passage_encoder.predict([np.array(passage_inputs['input_ids']), np.array(passage_inputs['attention_mask']),
94
+ np.array(passage_inputs['token_type_ids'])],
95
+ batch_size=64,
96
+ verbose=1)
97
+ return passage_embeddings
98
+
99
+ def extracted_query_embeddings(queries, max_length=64):
100
+ query_inputs = q_tokenizer.batch_encode_plus(
101
+ queries,
102
+ add_special_tokens=True,
103
+ truncation=True,
104
+ padding="max_length",
105
+ max_length=max_length,
106
+ return_token_type_ids=True
107
+ )
108
+
109
+ query_embeddings = query_encoder.predict([np.array(query_inputs['input_ids']),
110
+ np.array(query_inputs['attention_mask']),
111
+ np.array(query_inputs['token_type_ids'])],
112
+ batch_size=1,
113
+ verbose=1)
114
+ return query_embeddings
115
+
116
+ def get_pagetext(page):
117
+ s = str(page).replace("/t","")
118
+ return s
119
+
120
+ def get_wiki_summary(search):
121
+ wiki_wiki = wikipediaapi.Wikipedia('en')
122
+ page = wiki_wiki.page(search)
123
+
124
+
125
+ def get_wiki_summaryDF(search):
126
+ wiki_wiki = wikipediaapi.Wikipedia('en')
127
+ page = wiki_wiki.page(search)
128
+
129
+ isExist = page.exists()
130
+ if not isExist:
131
+ return isExist, "Not found", "Not found", "Not found", "Not found"
132
+
133
+ pageurl = page.fullurl
134
+ pagetitle = page.title
135
+ pagesummary = page.summary[0:60]
136
+ pagetext = get_pagetext(page.text)
137
+
138
+ backlinks = page.backlinks
139
+ linklist = ""
140
+ for link in backlinks.items():
141
+ pui = link[0]
142
+ linklist += pui + " , "
143
+ a=1
144
+
145
+ categories = page.categories
146
+ categorylist = ""
147
+ for category in categories.items():
148
+ pui = category[0]
149
+ categorylist += pui + " , "
150
+ a=1
151
+
152
+ links = page.links
153
+ linklist2 = ""
154
+ for link in links.items():
155
+ pui = link[0]
156
+ linklist2 += pui + " , "
157
+ a=1
158
+
159
+ sections = page.sections
160
+
161
+ ex_dic = {
162
+ 'Entity' : ["URL","Title","Summary", "Text", "Backlinks", "Links", "Categories"],
163
+ 'Value': [pageurl, pagetitle, pagesummary, pagetext, linklist,linklist2, categorylist ]
164
+ }
165
+
166
+ df = pd.DataFrame(ex_dic)
167
+
168
+ return df
169
+
170
+
171
+ def save_message(name, message):
172
+ now = datetime.datetime.now()
173
+ timestamp = now.strftime("%Y-%m-%d %H:%M:%S")
174
+ with open("chat.txt", "a") as f:
175
+ f.write(f"{timestamp} - {name}: {message}\n")
176
+
177
+ def press_release():
178
+ st.markdown("""🎉🎊 Breaking News! 📢📣
179
+
180
+ Introducing StreamlitWikipediaChat - the ultimate way to chat with Wikipedia and the whole world at the same time! 🌎📚👋
181
+
182
+ Are you tired of reading boring articles on Wikipedia? Do you want to have some fun while learning new things? Then StreamlitWikipediaChat is just the thing for you! 😃💻
183
+
184
+ With StreamlitWikipediaChat, you can ask Wikipedia anything you want and get instant responses! Whether you want to know the capital of Madagascar or how to make a delicious chocolate cake, Wikipedia has got you covered. 🍰🌍
185
+
186
+ But that's not all! You can also chat with other people from around the world who are using StreamlitWikipediaChat at the same time. It's like a virtual classroom where you can learn from and teach others. 🌐👨‍🏫👩‍🏫
187
+
188
+ And the best part? StreamlitWikipediaChat is super easy to use! All you have to do is type in your question and hit send. That's it! 🤯🙌
189
+
190
+ So, what are you waiting for? Join the fun and start chatting with Wikipedia and the world today! 😎🎉
191
+
192
+ StreamlitWikipediaChat - where learning meets fun! 🤓🎈""")
193
+
194
 
195
  def main():
196
  st.title("Streamlit Chat")
197
 
198
  name = st.text_input("Enter your name")
199
  message = st.text_input("Enter a topic to share from Wikipedia")
 
 
 
 
 
200
  if st.button("Submit"):
201
 
202
  # wiki
203
  df = get_wiki_summaryDF(message)
204
 
205
  save_message(name, message)
206
+ save_message(name, df)
207
 
208
  st.text("Message sent!")
209
 
210
+
211
  st.text("Chat history:")
 
 
212
  with open("chat.txt", "a+") as f:
213
  f.seek(0)
214
  chat_history = f.read()
215
+ #st.text(chat_history)
216
+ st.markdown(chat_history)
 
 
 
 
217
 
218
  countdown = st.empty()
219
  t = 60
 
227
  with open("chat.txt", "a+") as f:
228
  f.seek(0)
229
  chat_history = f.read()
230
+ #st.text(chat_history)
231
+ st.markdown(chat_history)
 
 
 
 
232
 
233
  press_release()
234
 
235
+ t = 15
236
 
237
  if __name__ == "__main__":
238
  main()
239
+