awacke1 commited on
Commit
399cdfe
β€’
1 Parent(s): ac36720

Create backup6.ArXivPerfected.app.py

Browse files
Files changed (1) hide show
  1. backup6.ArXivPerfected.app.py +702 -0
backup6.ArXivPerfected.app.py ADDED
@@ -0,0 +1,702 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from azure.cosmos import CosmosClient, exceptions
3
+ import os
4
+ import pandas as pd
5
+ import traceback
6
+ import shutil
7
+ from github import Github
8
+ from git import Repo
9
+ from datetime import datetime
10
+ import base64
11
+ import json
12
+ import uuid # 🎲 For generating unique IDs
13
+ from urllib.parse import quote # πŸ”— For encoding URLs
14
+ from gradio_client import Client # 🌐 For connecting to Gradio apps
15
+
16
+ # πŸŽ‰ Welcome to our fun-filled Cosmos DB and GitHub Integration app!
17
+ st.set_page_config(layout="wide")
18
+
19
+ # 🌌 Cosmos DB configuration
20
+ ENDPOINT = "https://acae-afd.documents.azure.com:443/"
21
+ DATABASE_NAME = os.environ.get("COSMOS_DATABASE_NAME")
22
+ CONTAINER_NAME = os.environ.get("COSMOS_CONTAINER_NAME")
23
+ Key = os.environ.get("Key") # πŸ”‘ Don't forget your key!
24
+
25
+ # 🏠 Your local app URL (Change this to your app's URL)
26
+ LOCAL_APP_URL = "https://huggingface.co/spaces/awacke1/AzureCosmosDBUI"
27
+
28
+ # πŸ€– OpenAI configuration
29
+ #openai.api_key = os.environ.get("OPENAI_API_KEY")
30
+ #MODEL = "gpt-3.5-turbo" # Replace with your desired model
31
+
32
+ # πŸ™ GitHub configuration
33
+ def download_github_repo(url, local_path):
34
+ # 🚚 Let's download that GitHub repo!
35
+ if os.path.exists(local_path):
36
+ shutil.rmtree(local_path)
37
+ Repo.clone_from(url, local_path)
38
+
39
+ def create_zip_file(source_dir, output_filename):
40
+ # πŸ“¦ Zipping up files like a pro!
41
+ shutil.make_archive(output_filename, 'zip', source_dir)
42
+
43
+ def create_repo(g, repo_name):
44
+ # πŸ› οΈ Creating a new GitHub repo. Magic!
45
+ user = g.get_user()
46
+ return user.create_repo(repo_name)
47
+
48
+ def push_to_github(local_path, repo, github_token):
49
+ # πŸš€ Pushing code to GitHub. Hold on tight!
50
+ repo_url = f"https://{github_token}@github.com/{repo.full_name}.git"
51
+ local_repo = Repo(local_path)
52
+
53
+ if 'origin' in [remote.name for remote in local_repo.remotes]:
54
+ origin = local_repo.remote('origin')
55
+ origin.set_url(repo_url)
56
+ else:
57
+ origin = local_repo.create_remote('origin', repo_url)
58
+
59
+ if not local_repo.heads:
60
+ local_repo.git.checkout('-b', 'main')
61
+ current_branch = 'main'
62
+ else:
63
+ current_branch = local_repo.active_branch.name
64
+
65
+ local_repo.git.add(A=True)
66
+
67
+ if local_repo.is_dirty():
68
+ local_repo.git.commit('-m', 'Initial commit')
69
+
70
+ origin.push(refspec=f'{current_branch}:{current_branch}')
71
+
72
+ def get_base64_download_link(file_path, file_name):
73
+ # πŸ§™β€β™‚οΈ Generating a magical download link!
74
+ with open(file_path, "rb") as file:
75
+ contents = file.read()
76
+ base64_encoded = base64.b64encode(contents).decode()
77
+ return f'<a href="data:application/zip;base64,{base64_encoded}" download="{file_name}">⬇️ Download {file_name}</a>'
78
+
79
+
80
+ # 🧭 New functions for dynamic sidebar navigation
81
+ def get_databases(client):
82
+ # πŸ“š Fetching list of databases. So many options!
83
+ return [db['id'] for db in client.list_databases()]
84
+
85
+ def get_containers(database):
86
+ # πŸ“‚ Getting containers. Containers within containers!
87
+ return [container['id'] for container in database.list_containers()]
88
+
89
+ def get_documents(container, limit=None):
90
+ # πŸ“ Retrieving documents. Shhh, don't tell anyone!
91
+ query = "SELECT * FROM c ORDER BY c._ts DESC"
92
+ items = list(container.query_items(query=query, enable_cross_partition_query=True, max_item_count=limit))
93
+ return items
94
+
95
+
96
+ # 🌟 Cosmos DB functions
97
+ def insert_record(container, record):
98
+ try:
99
+ container.create_item(body=record)
100
+ return True, "Record inserted successfully! πŸŽ‰"
101
+ except exceptions.CosmosHttpResponseError as e:
102
+ return False, f"HTTP error occurred: {str(e)} 🚨"
103
+ except Exception as e:
104
+ return False, f"An unexpected error occurred: {str(e)} 😱"
105
+
106
+ def update_record(container, updated_record):
107
+ try:
108
+ container.upsert_item(body=updated_record)
109
+ return True, f"Record with id {updated_record['id']} successfully updated. πŸ› οΈ"
110
+ except exceptions.CosmosHttpResponseError as e:
111
+ return False, f"HTTP error occurred: {str(e)} 🚨"
112
+ except Exception as e:
113
+ return False, f"An unexpected error occurred: {traceback.format_exc()} 😱"
114
+
115
+ def delete_record(container, name, id):
116
+ try:
117
+ container.delete_item(item=id, partition_key=id)
118
+ return True, f"Successfully deleted record with name: {name} and id: {id} πŸ—‘οΈ"
119
+ except exceptions.CosmosResourceNotFoundError:
120
+ return False, f"Record with id {id} not found. It may have been already deleted. πŸ•΅οΈβ€β™‚οΈ"
121
+ except exceptions.CosmosHttpResponseError as e:
122
+ return False, f"HTTP error occurred: {str(e)} 🚨"
123
+ except Exception as e:
124
+ return False, f"An unexpected error occurred: {traceback.format_exc()} 😱"
125
+
126
+ # 🎲 Function to generate a unique UUID
127
+ def generate_unique_id():
128
+ # πŸ§™β€β™‚οΈ Generating a unique UUID!
129
+ return str(uuid.uuid4())
130
+
131
+ # πŸ“¦ Function to archive current container
132
+ def archive_current_container(database_name, container_name, client):
133
+ try:
134
+ base_dir = "./cosmos_archive_current_container"
135
+ if os.path.exists(base_dir):
136
+ shutil.rmtree(base_dir)
137
+ os.makedirs(base_dir)
138
+
139
+ db_client = client.get_database_client(database_name)
140
+ container_client = db_client.get_container_client(container_name)
141
+ items = list(container_client.read_all_items())
142
+
143
+ container_dir = os.path.join(base_dir, container_name)
144
+ os.makedirs(container_dir)
145
+
146
+ for item in items:
147
+ item_id = item.get('id', f"unknown_{datetime.now().strftime('%Y%m%d%H%M%S')}")
148
+ with open(os.path.join(container_dir, f"{item_id}.json"), 'w') as f:
149
+ json.dump(item, f, indent=2)
150
+
151
+ archive_name = f"{container_name}_archive_{datetime.now().strftime('%Y%m%d%H%M%S')}"
152
+ shutil.make_archive(archive_name, 'zip', base_dir)
153
+
154
+ return get_base64_download_link(f"{archive_name}.zip", f"{archive_name}.zip")
155
+ except Exception as e:
156
+ return f"An error occurred while archiving data: {str(e)} 😒"
157
+
158
+
159
+ # Helper to extract hyperlinks
160
+ def extract_hyperlinks(responses):
161
+ hyperlinks = []
162
+ for response in responses:
163
+ parsed_response = json.loads(response)
164
+ links = [value for key, value in parsed_response.items() if isinstance(value, str) and value.startswith("http")]
165
+ hyperlinks.extend(links)
166
+ return hyperlinks
167
+
168
+ # Helper to format text with line numbers
169
+ def format_with_line_numbers(text):
170
+ lines = text.splitlines()
171
+ formatted_text = '\n'.join(f"{i+1}: {line}" for i, line in enumerate(lines))
172
+ return formatted_text
173
+
174
+ # Save responses to Cosmos DB
175
+ def save_to_cosmos_db(query, response1, response2):
176
+ cosmos_container = st.session_state.get("cosmos_container")
177
+ if cosmos_container:
178
+ record = {
179
+ "query": query,
180
+ "response1": json.loads(response1),
181
+ "response2": json.loads(response2)
182
+ }
183
+ try:
184
+ cosmos_container.create_item(body=record)
185
+ st.success(f"Record saved successfully with ID: {record['id']}")
186
+ except exceptions.CosmosHttpResponseError as e:
187
+ st.error(f"Error saving record to Cosmos DB: {e}")
188
+ else:
189
+ st.error("Cosmos DB is not initialized.")
190
+
191
+
192
+ # Add dropdowns for model and database choices
193
+ def search_glossary(query):
194
+ st.markdown(f"### πŸ” Search Glossary for: `{query}`")
195
+
196
+ # Dropdown for model selection
197
+ model_options = ['mistralai/Mixtral-8x7B-Instruct-v0.1', 'mistralai/Mistral-7B-Instruct-v0.2', 'google/gemma-7b-it', 'None']
198
+ model_choice = st.selectbox('🧠 Select LLM Model', options=model_options, index=1)
199
+
200
+ # Dropdown for database selection
201
+ database_options = ['Semantic Search', 'Arxiv Search - Latest - (EXPERIMENTAL)']
202
+ database_choice = st.selectbox('πŸ“š Select Database', options=database_options, index=0)
203
+
204
+
205
+
206
+ # Run Button with Emoji
207
+ if st.button("πŸš€ Run"):
208
+
209
+ # πŸ•΅οΈβ€β™‚οΈ Searching the glossary for: query
210
+ all_results = ""
211
+ st.markdown(f"- {query}")
212
+
213
+ # πŸ” ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM
214
+ #database_choice Literal['Semantic Search', 'Arxiv Search - Latest - (EXPERIMENTAL)'] Default: "Semantic Search"
215
+ #llm_model_picked Literal['mistralai/Mixtral-8x7B-Instruct-v0.1', 'mistralai/Mistral-7B-Instruct-v0.2', 'google/gemma-7b-it', 'None'] Default: "mistralai/Mistral-7B-Instruct-v0.2"
216
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
217
+
218
+
219
+ # πŸ” ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM - api_name: /ask_llm
220
+ result = client.predict(
221
+ prompt=query,
222
+ llm_model_picked="mistralai/Mixtral-8x7B-Instruct-v0.1",
223
+ stream_outputs=True,
224
+ api_name="/ask_llm"
225
+ )
226
+ st.markdown(result)
227
+ st.code(result, language="python", line_numbers=True)
228
+
229
+ # πŸ” ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM - api_name: /ask_llm
230
+ result2 = client.predict(
231
+ prompt=query,
232
+ llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2",
233
+ stream_outputs=True,
234
+ api_name="/ask_llm"
235
+ )
236
+ st.markdown(result2)
237
+ st.code(result2, language="python", line_numbers=True)
238
+
239
+ # πŸ” ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM - api_name: /ask_llm
240
+ result3 = client.predict(
241
+ prompt=query,
242
+ llm_model_picked="google/gemma-7b-it",
243
+ stream_outputs=True,
244
+ api_name="/ask_llm"
245
+ )
246
+ st.markdown(result3)
247
+ st.code(result3, language="python", line_numbers=True)
248
+
249
+
250
+
251
+ # πŸ” ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM - api_name: /update_with_rag_md
252
+ response2 = client.predict(
253
+ message=query, # str in 'parameter_13' Textbox component
254
+ llm_results_use=10,
255
+ database_choice="Semantic Search",
256
+ llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2",
257
+ api_name="/update_with_rag_md"
258
+ ) # update_with_rag_md Returns tuple of 2 elements [0] str The output value that appears in the "value_14" Markdown component. [1] str
259
+
260
+ st.markdown(response2[0])
261
+ st.code(response2[0], language="python", line_numbers=True, wrap_lines=True)
262
+
263
+ st.markdown(response2[1])
264
+ st.code(response2[1], language="python", line_numbers=True, wrap_lines=True)
265
+
266
+
267
+ # Aggregate hyperlinks and show with emojis
268
+ hyperlinks = extract_hyperlinks([response1, response2])
269
+ st.markdown("### πŸ”— Aggregated Hyperlinks")
270
+ for link in hyperlinks:
271
+ st.markdown(f"πŸ”— [{link}]({link})")
272
+
273
+ # Show responses in a code format with line numbers
274
+ st.markdown("### πŸ“œ Response Outputs with Line Numbers")
275
+ st.code(f"Response 1: \n{format_with_line_numbers(response1)}\n\nResponse 2: \n{format_with_line_numbers(response2)}", language="json")
276
+
277
+ # Save both responses to Cosmos DB
278
+ save_to_cosmos_db(query, response2, result)
279
+
280
+
281
+
282
+ # πŸ“ Function to process text input
283
+ def process_text(text_input):
284
+ if text_input:
285
+ if 'messages' not in st.session_state:
286
+ st.session_state.messages = []
287
+
288
+ st.session_state.messages.append({"role": "user", "content": text_input})
289
+
290
+ with st.chat_message("user"):
291
+ st.markdown(text_input)
292
+
293
+ with st.chat_message("assistant"):
294
+
295
+ search_glossary(text_input)
296
+
297
+
298
+ useOpenAI=False
299
+ if useOpenAI:
300
+ completion = openai.ChatCompletion.create(
301
+ model=MODEL,
302
+ messages=[
303
+ {"role": m["role"], "content": m["content"]}
304
+ for m in st.session_state.messages
305
+ ],
306
+ stream=False
307
+ )
308
+ return_text = completion.choices[0].message.content
309
+ st.write("Assistant: " + return_text)
310
+
311
+
312
+ filename = generate_filename(text_input, "md")
313
+ create_and_save_file(return_text, file_type="md", prompt=text_input, is_image=False, should_save=True)
314
+ st.session_state.messages.append({"role": "assistant", "content": return_text})
315
+
316
+ # πŸ“„ Function to generate a filename
317
+ def generate_filename(text, file_type):
318
+ # πŸ“ Generate a filename based on the text input
319
+ safe_text = "".join(c if c.isalnum() or c in (' ', '.', '_') else '_' for c in text)
320
+ safe_text = "_".join(safe_text.strip().split())
321
+ filename = f"{safe_text}.{file_type}"
322
+ return filename
323
+
324
+ # 🏷️ Function to extract markdown title
325
+ def extract_markdown_title(content):
326
+ # πŸ” Extract the first markdown heading as the title
327
+ lines = content.splitlines()
328
+ for line in lines:
329
+ if line.startswith('#'):
330
+ return line.lstrip('#').strip()
331
+ return None
332
+
333
+ # πŸ’Ύ Function to create and save a file
334
+ def create_and_save_file(content, file_type="md", prompt=None, is_image=False, should_save=True):
335
+ """
336
+ Combines file name generation and file creation into one function.
337
+ If the file is a markdown file, extracts the title from the content (if available) and uses it for the filename.
338
+ """
339
+ if not should_save:
340
+ return None
341
+
342
+ # Step 1: Generate filename based on the prompt or content
343
+ filename = generate_filename(prompt if prompt else content, file_type)
344
+
345
+ # Step 2: If it's a markdown file, check if it has a title (e.g., # Heading in markdown)
346
+ if file_type == "md":
347
+ title_from_content = extract_markdown_title(content)
348
+ if title_from_content:
349
+ filename = generate_filename(title_from_content, file_type)
350
+
351
+ # Step 3: Save the file
352
+ with open(filename, "w", encoding="utf-8") as f:
353
+ if is_image:
354
+ f.write(content)
355
+ else:
356
+ f.write(prompt + "\n\n" + content)
357
+
358
+ return filename
359
+
360
+ # 🎈 Let's modify the main app to be more fun!
361
+ def main():
362
+ st.title("πŸ™Git🌌CosmosπŸ’« - Azure Cosmos DB and Github Agent")
363
+
364
+ # 🚦 Initialize session state
365
+ if 'logged_in' not in st.session_state:
366
+ st.session_state.logged_in = False
367
+ if 'selected_records' not in st.session_state:
368
+ st.session_state.selected_records = []
369
+ if 'client' not in st.session_state:
370
+ st.session_state.client = None
371
+ if 'selected_database' not in st.session_state:
372
+ st.session_state.selected_database = None
373
+ if 'selected_container' not in st.session_state:
374
+ st.session_state.selected_container = None
375
+ if 'selected_document_id' not in st.session_state:
376
+ st.session_state.selected_document_id = None
377
+ if 'current_index' not in st.session_state:
378
+ st.session_state.current_index = 0
379
+ if 'cloned_doc' not in st.session_state:
380
+ st.session_state.cloned_doc = None
381
+
382
+ # βš™οΏ½οΏ½ q= Run ArXiv search from query parameters
383
+ try:
384
+ query_params = st.query_params
385
+ query = query_params.get('q') or query_params.get('query') or ''
386
+ if query:
387
+ # πŸ•΅οΈβ€β™‚οΈ We have a query! Let's process it!
388
+ process_text(query)
389
+ st.stop() # Stop further execution
390
+ except Exception as e:
391
+ st.markdown(' ')
392
+
393
+ # πŸ” Automatic Login
394
+ if Key:
395
+ st.session_state.primary_key = Key
396
+ st.session_state.logged_in = True
397
+ else:
398
+ st.error("Cosmos DB Key is not set in environment variables. πŸ”‘βŒ")
399
+ return # Can't proceed without a key
400
+
401
+ if st.session_state.logged_in:
402
+ # 🌌 Initialize Cosmos DB client
403
+ try:
404
+ if st.session_state.client is None:
405
+ st.session_state.client = CosmosClient(ENDPOINT, credential=st.session_state.primary_key)
406
+
407
+ # πŸ—„οΈ Sidebar for database, container, and document selection
408
+ st.sidebar.title("πŸ™Git🌌CosmosπŸ’«πŸ—„οΈNavigator")
409
+
410
+ databases = get_databases(st.session_state.client)
411
+ selected_db = st.sidebar.selectbox("πŸ—ƒοΈ Select Database", databases)
412
+
413
+ if selected_db != st.session_state.selected_database:
414
+ st.session_state.selected_database = selected_db
415
+ st.session_state.selected_container = None
416
+ st.session_state.selected_document_id = None
417
+ st.session_state.current_index = 0
418
+ st.rerun()
419
+
420
+ if st.session_state.selected_database:
421
+ database = st.session_state.client.get_database_client(st.session_state.selected_database)
422
+ containers = get_containers(database)
423
+ selected_container = st.sidebar.selectbox("πŸ“ Select Container", containers)
424
+
425
+ if selected_container != st.session_state.selected_container:
426
+ st.session_state.selected_container = selected_container
427
+ st.session_state.selected_document_id = None
428
+ st.session_state.current_index = 0
429
+ st.rerun()
430
+
431
+ if st.session_state.selected_container:
432
+ container = database.get_container_client(st.session_state.selected_container)
433
+
434
+ # πŸ“¦ Add Export button
435
+ if st.button("πŸ“¦ Export Container Data"):
436
+ download_link = archive_current_container(st.session_state.selected_database, st.session_state.selected_container, st.session_state.client)
437
+ if download_link.startswith('<a'):
438
+ st.markdown(download_link, unsafe_allow_html=True)
439
+ else:
440
+ st.error(download_link)
441
+
442
+ # Fetch documents
443
+ documents = get_documents(container)
444
+ total_docs = len(documents)
445
+
446
+ if total_docs > 5:
447
+ documents_to_display = documents[:5]
448
+ st.info("Showing top 5 most recent documents.")
449
+ else:
450
+ documents_to_display = documents
451
+ st.info(f"Showing all {len(documents_to_display)} documents.")
452
+
453
+ if documents_to_display:
454
+ # 🎨 Add Viewer/Editor selection
455
+ view_options = ['Show as Markdown', 'Show as Code Editor', 'Show as Edit and Save', 'Clone Document', 'New Record']
456
+ selected_view = st.selectbox("Select Viewer/Editor", view_options, index=2)
457
+
458
+ if selected_view == 'Show as Markdown':
459
+ # πŸ–ŒοΈ Show each record as Markdown with navigation
460
+ total_docs = len(documents)
461
+ doc = documents[st.session_state.current_index]
462
+ st.markdown(f"#### Document ID: {doc.get('id', '')}")
463
+
464
+ # πŸ•΅οΈβ€β™‚οΈ Let's extract values from the JSON that have at least one space
465
+ values_with_space = []
466
+ def extract_values(obj):
467
+ if isinstance(obj, dict):
468
+ for k, v in obj.items():
469
+ extract_values(v)
470
+ elif isinstance(obj, list):
471
+ for item in obj:
472
+ extract_values(item)
473
+ elif isinstance(obj, str):
474
+ if ' ' in obj:
475
+ values_with_space.append(obj)
476
+
477
+ extract_values(doc)
478
+
479
+ # πŸ”— Let's create a list of links for these values
480
+ search_urls = {
481
+ "πŸš€πŸŒŒArXiv": lambda k: f"{LOCAL_APP_URL}/?q={quote(k)}",
482
+ "πŸƒAnalyst": lambda k: f"{LOCAL_APP_URL}/?q={quote(k)}-{quote('PromptPrefix')}",
483
+ "πŸ“šPyCoder": lambda k: f"{LOCAL_APP_URL}/?q={quote(k)}-{quote('PromptPrefix2')}",
484
+ "πŸ”¬JSCoder": lambda k: f"{LOCAL_APP_URL}/?q={quote(k)}-{quote('PromptPrefix3')}",
485
+ "🏠": lambda k: f"{LOCAL_APP_URL}/?q={quote(k)}",
486
+ "πŸ“–": lambda k: f"https://en.wikipedia.org/wiki/{quote(k)}",
487
+ "πŸ”": lambda k: f"https://www.google.com/search?q={quote(k)}",
488
+ "▢️": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}",
489
+ "πŸ”Ž": lambda k: f"https://www.bing.com/search?q={quote(k)}",
490
+ "πŸŽ₯": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}",
491
+ "🐦": lambda k: f"https://twitter.com/search?q={quote(k)}",
492
+ }
493
+
494
+ st.markdown("#### πŸ”— Links for Extracted Texts")
495
+ for term in values_with_space:
496
+ links_md = ' '.join([f"[{emoji}]({url(term)})" for emoji, url in search_urls.items()])
497
+ st.markdown(f"**{term}** <small>{links_md}</small>", unsafe_allow_html=True)
498
+
499
+ # Show the document content as markdown
500
+ content = json.dumps(doc, indent=2)
501
+ st.markdown(f"```json\n{content}\n```")
502
+
503
+ # Navigation buttons
504
+ col_prev, col_next = st.columns([1, 1])
505
+ with col_prev:
506
+ if st.button("⬅️ Previous", key='prev_markdown'):
507
+ if st.session_state.current_index > 0:
508
+ st.session_state.current_index -= 1
509
+ st.rerun()
510
+ with col_next:
511
+ if st.button("➑️ Next", key='next_markdown'):
512
+ if st.session_state.current_index < total_docs - 1:
513
+ st.session_state.current_index += 1
514
+ st.rerun()
515
+
516
+ elif selected_view == 'Show as Code Editor':
517
+ # πŸ’» Show each record in a code editor with navigation
518
+ total_docs = len(documents)
519
+ doc = documents[st.session_state.current_index]
520
+ st.markdown(f"#### Document ID: {doc.get('id', '')}")
521
+ doc_str = st.text_area("Edit Document", value=json.dumps(doc, indent=2), height=300, key=f'code_editor_{st.session_state.current_index}')
522
+ col_prev, col_next = st.columns([1, 1])
523
+ with col_prev:
524
+ if st.button("⬅️ Previous", key='prev_code'):
525
+ if st.session_state.current_index > 0:
526
+ st.session_state.current_index -= 1
527
+ st.rerun()
528
+ with col_next:
529
+ if st.button("➑️ Next", key='next_code'):
530
+ if st.session_state.current_index < total_docs - 1:
531
+ st.session_state.current_index += 1
532
+ st.rerun()
533
+ if st.button("πŸ’Ύ Save Changes", key=f'save_button_{st.session_state.current_index}'):
534
+ try:
535
+ updated_doc = json.loads(doc_str)
536
+ success, message = update_record(container, updated_doc)
537
+ if success:
538
+ st.success(f"Document {updated_doc['id']} saved successfully.")
539
+ st.session_state.selected_document_id = updated_doc['id']
540
+ st.rerun()
541
+ else:
542
+ st.error(message)
543
+ except json.JSONDecodeError as e:
544
+ st.error(f"Invalid JSON: {str(e)} 🚫")
545
+
546
+ elif selected_view == 'Show as Edit and Save':
547
+ # ✏️ Show as Edit and Save in columns
548
+ st.markdown("#### Edit the document fields below:")
549
+
550
+ # Create columns for each document
551
+ num_cols = len(documents_to_display)
552
+ cols = st.columns(num_cols)
553
+
554
+ for idx, (col, doc) in enumerate(zip(cols, documents_to_display)):
555
+ with col:
556
+ st.markdown(f"##### Document ID: {doc.get('id', '')}")
557
+ editable_id = st.text_input("ID", value=doc.get('id', ''), key=f'edit_id_{idx}')
558
+ # Remove 'id' from the document for editing other fields
559
+ editable_doc = doc.copy()
560
+ editable_doc.pop('id', None)
561
+ doc_str = st.text_area("Document Content (in JSON format)", value=json.dumps(editable_doc, indent=2), height=300, key=f'doc_str_{idx}')
562
+ if st.button("πŸ’Ύ Save Changes", key=f'save_button_{idx}'):
563
+ try:
564
+ updated_doc = json.loads(doc_str)
565
+ updated_doc['id'] = editable_id # Include the possibly edited ID
566
+ success, message = update_record(container, updated_doc)
567
+ if success:
568
+ st.success(f"Document {updated_doc['id']} saved successfully.")
569
+ st.session_state.selected_document_id = updated_doc['id']
570
+ st.rerun()
571
+ else:
572
+ st.error(message)
573
+ except json.JSONDecodeError as e:
574
+ st.error(f"Invalid JSON: {str(e)} 🚫")
575
+
576
+ elif selected_view == 'Clone Document':
577
+ # 🧬 Clone Document per record
578
+ st.markdown("#### Clone a document:")
579
+ for idx, doc in enumerate(documents_to_display):
580
+ st.markdown(f"##### Document ID: {doc.get('id', '')}")
581
+ if st.button("πŸ“„ Clone Document", key=f'clone_button_{idx}'):
582
+ cloned_doc = doc.copy()
583
+ # Generate a unique ID
584
+ cloned_doc['id'] = generate_unique_id()
585
+ st.session_state.cloned_doc = cloned_doc
586
+ st.session_state.cloned_doc_str = json.dumps(cloned_doc, indent=2)
587
+ st.session_state.clone_mode = True
588
+ st.rerun()
589
+ if st.session_state.get('clone_mode', False):
590
+ st.markdown("#### Edit Cloned Document:")
591
+ cloned_doc_str = st.text_area("Cloned Document Content (in JSON format)", value=st.session_state.cloned_doc_str, height=300)
592
+ if st.button("πŸ’Ύ Save Cloned Document"):
593
+ try:
594
+ new_doc = json.loads(cloned_doc_str)
595
+ success, message = insert_record(container, new_doc)
596
+ if success:
597
+ st.success(f"Cloned document saved with id: {new_doc['id']} πŸŽ‰")
598
+ st.session_state.selected_document_id = new_doc['id']
599
+ st.session_state.clone_mode = False
600
+ st.session_state.cloned_doc = None
601
+ st.session_state.cloned_doc_str = ''
602
+ st.rerun()
603
+ else:
604
+ st.error(message)
605
+ except json.JSONDecodeError as e:
606
+ st.error(f"Invalid JSON: {str(e)} 🚫")
607
+
608
+ elif selected_view == 'New Record':
609
+ # πŸ†• New Record
610
+ st.markdown("#### Create a new document:")
611
+ new_id = st.text_input("ID", value=generate_unique_id(), key='new_id')
612
+ new_doc_str = st.text_area("Document Content (in JSON format)", value='{}', height=300)
613
+ if st.button("βž• Create New Document"):
614
+ try:
615
+ new_doc = json.loads(new_doc_str)
616
+ new_doc['id'] = new_id # Use the provided ID
617
+ success, message = insert_record(container, new_doc)
618
+ if success:
619
+ st.success(f"New document created with id: {new_doc['id']} πŸŽ‰")
620
+ st.session_state.selected_document_id = new_doc['id']
621
+ # Switch to 'Show as Edit and Save' mode
622
+ st.rerun()
623
+ else:
624
+ st.error(message)
625
+ except json.JSONDecodeError as e:
626
+ st.error(f"Invalid JSON: {str(e)} 🚫")
627
+
628
+ else:
629
+ st.sidebar.info("No documents found in this container. πŸ“­")
630
+
631
+ # πŸŽ‰ Main content area
632
+ st.subheader(f"πŸ“Š Container: {st.session_state.selected_container}")
633
+ if st.session_state.selected_container:
634
+ if documents_to_display:
635
+ df = pd.DataFrame(documents_to_display)
636
+ st.dataframe(df)
637
+ else:
638
+ st.info("No documents to display. 🧐")
639
+
640
+ # πŸ™ GitHub section
641
+ st.subheader("πŸ™ GitHub Operations")
642
+ github_token = os.environ.get("GITHUB") # Read GitHub token from environment variable
643
+ source_repo = st.text_input("Source GitHub Repository URL", value="https://github.com/AaronCWacker/AIExamples-8-24-Streamlit")
644
+ new_repo_name = st.text_input("New Repository Name (for cloning)", value=f"AIExample-Clone-{datetime.now().strftime('%Y%m%d_%H%M%S')}")
645
+
646
+ col1, col2 = st.columns(2)
647
+ with col1:
648
+ if st.button("πŸ“₯ Clone Repository"):
649
+ if github_token and source_repo:
650
+ try:
651
+ local_path = f"./temp_repo_{datetime.now().strftime('%Y%m%d%H%M%S')}"
652
+ download_github_repo(source_repo, local_path)
653
+ zip_filename = f"{new_repo_name}.zip"
654
+ create_zip_file(local_path, zip_filename[:-4])
655
+ st.markdown(get_base64_download_link(zip_filename, zip_filename), unsafe_allow_html=True)
656
+ st.success("Repository cloned successfully! πŸŽ‰")
657
+ except Exception as e:
658
+ st.error(f"An error occurred: {str(e)} 😒")
659
+ finally:
660
+ if os.path.exists(local_path):
661
+ shutil.rmtree(local_path)
662
+ if os.path.exists(zip_filename):
663
+ os.remove(zip_filename)
664
+ else:
665
+ st.error("Please ensure GitHub token is set in environment variables and source repository URL is provided. πŸ”‘β“")
666
+
667
+ with col2:
668
+ if st.button("πŸ“€ Push to New Repository"):
669
+ if github_token and source_repo:
670
+ try:
671
+ g = Github(github_token)
672
+ new_repo = create_repo(g, new_repo_name)
673
+ local_path = f"./temp_repo_{datetime.now().strftime('%Y%m%d%H%M%S')}"
674
+ download_github_repo(source_repo, local_path)
675
+ push_to_github(local_path, new_repo, github_token)
676
+ st.success(f"Repository pushed successfully to {new_repo.html_url} πŸš€")
677
+ except Exception as e:
678
+ st.error(f"An error occurred: {str(e)} 😒")
679
+ finally:
680
+ if os.path.exists(local_path):
681
+ shutil.rmtree(local_path)
682
+ else:
683
+ st.error("Please ensure GitHub token is set in environment variables and source repository URL is provided. πŸ”‘β“")
684
+
685
+ except exceptions.CosmosHttpResponseError as e:
686
+ st.error(f"Failed to connect to Cosmos DB. HTTP error: {str(e)} 🚨")
687
+ except Exception as e:
688
+ st.error(f"An unexpected error occurred: {str(e)} 😱")
689
+
690
+ # πŸšͺ Logout button
691
+ if st.session_state.logged_in and st.sidebar.button("πŸšͺ Logout"):
692
+ st.session_state.logged_in = False
693
+ st.session_state.selected_records.clear()
694
+ st.session_state.client = None
695
+ st.session_state.selected_database = None
696
+ st.session_state.selected_container = None
697
+ st.session_state.selected_document_id = None
698
+ st.session_state.current_index = 0
699
+ st.rerun()
700
+
701
+ if __name__ == "__main__":
702
+ main()