awacke1 commited on
Commit
a731957
β€’
1 Parent(s): 829271f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -19
app.py CHANGED
@@ -90,9 +90,146 @@ def SpeechSynthesis(result):
90
  def parse_to_markdown(text):
91
  return text
92
 
93
- # Show ArXiv Scholary Articles! ----------------*************----▢️ Semantic and Episodic Memory System
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  def search_arxiv(query):
95
  start_time = time.strftime("%Y-%m-%d %H:%M:%S")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
 
98
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
@@ -107,19 +244,13 @@ def search_arxiv(query):
107
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
108
  response1 = client.predict(
109
  query,
110
- 10,
111
  "Semantic Search - up to 10 Mar 2024", # Literal['Semantic Search - up to 10 Mar 2024', 'Arxiv Search - Latest - (EXPERIMENTAL)'] in 'Search Source' Dropdown component
112
  "mistralai/Mixtral-8x7B-Instruct-v0.1", # Literal['mistralai/Mixtral-8x7B-Instruct-v0.1', 'mistralai/Mistral-7B-Instruct-v0.2', 'google/gemma-7b-it', 'None'] in 'LLM Model' Dropdown component
113
  api_name="/update_with_rag_md"
114
  )
115
- #st.markdown(response1)
116
  lastpart=''
117
  totalparts=''
118
- #for parts in response1:
119
- # st.markdown(parts) # expect 2
120
- # lastpart=parts
121
- # totalparts=totalparts+parts
122
-
123
  results = response1[0] # Format for markdown display with links
124
  results2 = response1[1] # format for subquery without links
125
  st.markdown(results)
@@ -127,13 +258,11 @@ def search_arxiv(query):
127
  RunSecondQuery = False
128
  if RunSecondQuery:
129
  # Search 2 - Retieve the Summary with Papers Context and Original Query
130
- #client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
131
- #newquery='Create a summary as markdown outline with emojis for query: ' + query + ' ' + totalparts
132
  response2 = client.predict(
133
  query, # str in 'parameter_13' Textbox component
134
- #"mistralai/Mixtral-8x7B-Instruct-v0.1",
135
  #"mistralai/Mistral-7B-Instruct-v0.2",
136
- "google/gemma-7b-it",
137
  True, # bool in 'Stream output' Checkbox component
138
  api_name="/ask_llm"
139
  )
@@ -148,7 +277,9 @@ def search_arxiv(query):
148
  st.write(f"Start time: {start_time}")
149
  st.write(f"Finish time: {end_time}")
150
  st.write(f"Elapsed time: {elapsed_seconds:.2f} seconds")
 
151
  SpeechSynthesis(results) # Search History Reader / Writer IO Memory - Audio at Same time as Reading.
 
152
  filename=generate_filename(query, "md")
153
  create_file(filename, query, results, should_save)
154
  return results
@@ -421,7 +552,7 @@ def compare_and_delete_files(files):
421
  if file != latest_file:
422
  os.remove(file)
423
  st.success(f"Deleted {file} as a duplicate.")
424
- st.experimental_rerun()
425
 
426
  # Function to get file size
427
  def get_file_size(file_path):
@@ -456,7 +587,7 @@ def FileSidebar():
456
 
457
  if st.button("Delete", key=f"delete3_{file}"):
458
  os.remove(file)
459
- st.experimental_rerun()
460
  previous_size = size # Update previous size for the next iteration
461
 
462
 
@@ -472,7 +603,7 @@ def FileSidebar():
472
  if st.button("πŸ—‘ Delete All"):
473
  for file in all_files:
474
  os.remove(file)
475
- st.experimental_rerun()
476
  with Files2:
477
  if st.button("⬇️ Download"):
478
  zip_file = create_zip_of_files(all_files)
@@ -511,7 +642,7 @@ def FileSidebar():
511
  if st.button("πŸ—‘", key="delete_"+file):
512
  os.remove(file)
513
  file_name=file
514
- st.experimental_rerun()
515
  next_action='delete'
516
  st.session_state['next_action'] = next_action
517
 
@@ -1271,7 +1402,7 @@ filekey = 'delall'
1271
  if st.sidebar.button("πŸ—‘ Delete All Audio", key=filekey):
1272
  for file in all_files:
1273
  os.remove(file)
1274
- st.experimental_rerun()
1275
 
1276
  for file in all_files:
1277
  col1, col2 = st.sidebar.columns([6, 1]) # adjust the ratio as needed
@@ -1286,7 +1417,7 @@ for file in all_files:
1286
  with col2:
1287
  if st.button("πŸ—‘", key="delete_" + file):
1288
  os.remove(file)
1289
- st.experimental_rerun()
1290
 
1291
 
1292
 
@@ -1333,7 +1464,7 @@ if 'action' in st.query_params:
1333
  st.success("Showing a message because 'action=show_message' was found in the URL.")
1334
  elif action == 'clear':
1335
  clear_query_params()
1336
- st.experimental_rerun()
1337
 
1338
  if 'query' in st.query_params:
1339
  query = st.query_params['query'][0] # Get the query parameter
 
90
  def parse_to_markdown(text):
91
  return text
92
 
93
+
94
+
95
+
96
+ import re
97
+
98
+ def extract_urls(text):
99
+ # Regular expression patterns to find the required fields
100
+ date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})')
101
+ abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)')
102
+ pdf_link_pattern = re.compile(r'\[⬇️\]\((https://arxiv\.org/pdf/\d+\.\d+)\)')
103
+ title_pattern = re.compile(r'### \d{2} \w{3} \d{4} \| \[(.*?)\]')
104
+
105
+ # Find all occurrences of the required fields using the regular expression patterns
106
+ date_matches = date_pattern.findall(text)
107
+ abs_link_matches = abs_link_pattern.findall(text)
108
+ pdf_link_matches = pdf_link_pattern.findall(text)
109
+ title_matches = title_pattern.findall(text)
110
+
111
+ # Generate markdown string with the extracted fields
112
+ markdown_text = ""
113
+ for i in range(len(date_matches)):
114
+ date = date_matches[i]
115
+ title = title_matches[i]
116
+ abs_link = abs_link_matches[i][1]
117
+ pdf_link = pdf_link_matches[i]
118
+
119
+ markdown_text += f"**Date:** {date}\n\n"
120
+ markdown_text += f"**Title:** {title}\n\n"
121
+ markdown_text += f"**Abstract Link:** [{abs_link}]({abs_link})\n\n"
122
+ markdown_text += f"**PDF Link:** [{pdf_link}]({pdf_link})\n\n"
123
+ markdown_text += "---\n\n"
124
+
125
+ return markdown_text
126
+
127
+
128
+
129
+ def download_pdfs(urls):
130
+ local_files = []
131
+ for url in urls:
132
+ if url.endswith('.pdf'):
133
+ local_filename = url.split('/')[-1]
134
+ response = requests.get(url)
135
+ with open(local_filename, 'wb') as f:
136
+ f.write(response.content)
137
+ local_files.append(local_filename)
138
+ return local_files
139
+
140
+ def generate_html(local_files):
141
+ html = "<ul>"
142
+ for file in local_files:
143
+ link = f'<li><a href="{file}">{file}</a></li>'
144
+ html += link
145
+ html += "</ul>"
146
+ return html
147
+
148
+
149
  def search_arxiv(query):
150
  start_time = time.strftime("%Y-%m-%d %H:%M:%S")
151
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
152
+ search_query = query
153
+ search_source = "Arxiv Search - Latest - (EXPERIMENTAL)"
154
+ llm_model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
155
+ st.markdown('### πŸ”Ž ' + query)
156
+
157
+ # Search 1 - Retrieve the Papers
158
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
159
+ response1 = client.predict(
160
+ query,
161
+ 20,
162
+ "Semantic Search - up to 10 Mar 2024",
163
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
164
+ api_name="/update_with_rag_md"
165
+ )
166
+ lastpart = ''
167
+ totalparts = ''
168
+ results = response1[0] # Format for markdown display with links
169
+ results2 = response1[1] # format for subquery without links
170
+ st.markdown(results)
171
+
172
+ # Extract URLs from the response
173
+ urls = extract_urls(results)
174
+
175
+ # Download PDFs and generate HTML5 code for local links
176
+ # local_links_html = download_pdfs_and_generate_html(urls)
177
+ st.markdown(urls)
178
+
179
+ RunSecondQuery = False
180
+ if RunSecondQuery:
181
+ # Search 2 - Retrieve the Summary with Papers Context and Original Query
182
+ response2 = client.predict(
183
+ query,
184
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
185
+ True,
186
+ api_name="/ask_llm"
187
+ )
188
+ st.markdown(response2)
189
+ results = results + response2
190
+
191
+ st.write('πŸ”Run of Multi-Agent System Paper Summary Spec is Complete')
192
+ end_time = time.strftime("%Y-%m-%d %H:%M:%S")
193
+ start_timestamp = time.mktime(time.strptime(start_time, "%Y-%m-%d %H:%M:%S"))
194
+ end_timestamp = time.mktime(time.strptime(end_time, "%Y-%m-%d %H:%M:%S"))
195
+ elapsed_seconds = end_timestamp - start_timestamp
196
+ st.write(f"Start time: {start_time}")
197
+ st.write(f"Finish time: {end_time}")
198
+ st.write(f"Elapsed time: {elapsed_seconds:.2f} seconds")
199
+
200
+ SpeechSynthesis(results)
201
+ filename = generate_filename(query, "md")
202
+ create_file(filename, query, results, should_save)
203
+
204
+ # Display local links HTML
205
+ st.markdown(local_links_html, unsafe_allow_html=True)
206
+
207
+ return results
208
+
209
+ def download_pdfs_and_generate_html(urls):
210
+ pdf_links = []
211
+ for url in urls:
212
+ if url.endswith('.pdf'):
213
+ pdf_filename = os.path.basename(url)
214
+ download_pdf(url, pdf_filename)
215
+ pdf_links.append(pdf_filename)
216
+
217
+ local_links_html = '<ul>'
218
+ for link in pdf_links:
219
+ local_links_html += f'<li><a href="{link}">{link}</a></li>'
220
+ local_links_html += '</ul>'
221
+ return local_links_html
222
+
223
+ def download_pdf(url, filename):
224
+ response = requests.get(url)
225
+ with open(filename, 'wb') as file:
226
+ file.write(response.content)
227
+
228
+
229
+
230
+ # Show ArXiv Scholary Articles! ----------------*************----▢️ Semantic and Episodic Memory System
231
+ def search_arxiv_old(query):
232
+ start_time = time.strftime("%Y-%m-%d %H:%M:%S")
233
 
234
 
235
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
 
244
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
245
  response1 = client.predict(
246
  query,
247
+ 20,
248
  "Semantic Search - up to 10 Mar 2024", # Literal['Semantic Search - up to 10 Mar 2024', 'Arxiv Search - Latest - (EXPERIMENTAL)'] in 'Search Source' Dropdown component
249
  "mistralai/Mixtral-8x7B-Instruct-v0.1", # Literal['mistralai/Mixtral-8x7B-Instruct-v0.1', 'mistralai/Mistral-7B-Instruct-v0.2', 'google/gemma-7b-it', 'None'] in 'LLM Model' Dropdown component
250
  api_name="/update_with_rag_md"
251
  )
 
252
  lastpart=''
253
  totalparts=''
 
 
 
 
 
254
  results = response1[0] # Format for markdown display with links
255
  results2 = response1[1] # format for subquery without links
256
  st.markdown(results)
 
258
  RunSecondQuery = False
259
  if RunSecondQuery:
260
  # Search 2 - Retieve the Summary with Papers Context and Original Query
 
 
261
  response2 = client.predict(
262
  query, # str in 'parameter_13' Textbox component
263
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
264
  #"mistralai/Mistral-7B-Instruct-v0.2",
265
+ #"google/gemma-7b-it",
266
  True, # bool in 'Stream output' Checkbox component
267
  api_name="/ask_llm"
268
  )
 
277
  st.write(f"Start time: {start_time}")
278
  st.write(f"Finish time: {end_time}")
279
  st.write(f"Elapsed time: {elapsed_seconds:.2f} seconds")
280
+
281
  SpeechSynthesis(results) # Search History Reader / Writer IO Memory - Audio at Same time as Reading.
282
+
283
  filename=generate_filename(query, "md")
284
  create_file(filename, query, results, should_save)
285
  return results
 
552
  if file != latest_file:
553
  os.remove(file)
554
  st.success(f"Deleted {file} as a duplicate.")
555
+ st.rerun()
556
 
557
  # Function to get file size
558
  def get_file_size(file_path):
 
587
 
588
  if st.button("Delete", key=f"delete3_{file}"):
589
  os.remove(file)
590
+ st.rerun()
591
  previous_size = size # Update previous size for the next iteration
592
 
593
 
 
603
  if st.button("πŸ—‘ Delete All"):
604
  for file in all_files:
605
  os.remove(file)
606
+ st.rerun()
607
  with Files2:
608
  if st.button("⬇️ Download"):
609
  zip_file = create_zip_of_files(all_files)
 
642
  if st.button("πŸ—‘", key="delete_"+file):
643
  os.remove(file)
644
  file_name=file
645
+ st.rerun()
646
  next_action='delete'
647
  st.session_state['next_action'] = next_action
648
 
 
1402
  if st.sidebar.button("πŸ—‘ Delete All Audio", key=filekey):
1403
  for file in all_files:
1404
  os.remove(file)
1405
+ st.rerun()
1406
 
1407
  for file in all_files:
1408
  col1, col2 = st.sidebar.columns([6, 1]) # adjust the ratio as needed
 
1417
  with col2:
1418
  if st.button("πŸ—‘", key="delete_" + file):
1419
  os.remove(file)
1420
+ st.rerun()
1421
 
1422
 
1423
 
 
1464
  st.success("Showing a message because 'action=show_message' was found in the URL.")
1465
  elif action == 'clear':
1466
  clear_query_params()
1467
+ st.rerun()
1468
 
1469
  if 'query' in st.query_params:
1470
  query = st.query_params['query'][0] # Get the query parameter