awacke1 commited on
Commit
2bc7c37
Β·
verified Β·
1 Parent(s): 286ed5f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +708 -0
app.py ADDED
@@ -0,0 +1,708 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from azure.cosmos import CosmosClient, exceptions
3
+ import os
4
+ import pandas as pd
5
+ import traceback
6
+ import shutil
7
+ from github import Github
8
+ from git import Repo
9
+ from datetime import datetime
10
+ import base64
11
+ import json
12
+ import uuid # 🎲 For generating unique IDs
13
+ from urllib.parse import quote # πŸ”— For encoding URLs
14
+ from gradio_client import Client # 🌐 For connecting to Gradio apps
15
+ import openai # πŸ€– For OpenAI API interactions
16
+ import markdown # πŸ“„ For parsing markdown content
17
+ from bs4 import BeautifulSoup # 🍲 For parsing HTML content
18
+
19
+ # πŸŽ‰ Welcome to our fun-filled Cosmos DB and GitHub Integration app!
20
+ st.set_page_config(layout="wide")
21
+
22
+ # 🌌 Cosmos DB configuration
23
+ ENDPOINT = "https://acae-afd.documents.azure.com:443/"
24
+ DATABASE_NAME = os.environ.get("COSMOS_DATABASE_NAME")
25
+ CONTAINER_NAME = os.environ.get("COSMOS_CONTAINER_NAME")
26
+ Key = os.environ.get("Key") # πŸ”‘ Don't forget your key!
27
+
28
+ # 🏠 Your local app URL (Change this to your app's URL)
29
+ LOCAL_APP_URL = "http://localhost:8501"
30
+
31
+ # πŸ€– OpenAI configuration
32
+ openai.api_key = os.environ.get("OPENAI_API_KEY")
33
+ MODEL = "gpt-3.5-turbo" # Replace with your desired model
34
+
35
+ # πŸ™ GitHub configuration
36
+ def download_github_repo(url, local_path):
37
+ # 🚚 Let's download that GitHub repo!
38
+ if os.path.exists(local_path):
39
+ shutil.rmtree(local_path)
40
+ Repo.clone_from(url, local_path)
41
+
42
+ def create_zip_file(source_dir, output_filename):
43
+ # πŸ“¦ Zipping up files like a pro!
44
+ shutil.make_archive(output_filename, 'zip', source_dir)
45
+
46
+ def create_repo(g, repo_name):
47
+ # πŸ› οΈ Creating a new GitHub repo. Magic!
48
+ user = g.get_user()
49
+ return user.create_repo(repo_name)
50
+
51
+ def push_to_github(local_path, repo, github_token):
52
+ # πŸš€ Pushing code to GitHub. Hold on tight!
53
+ repo_url = f"https://{github_token}@github.com/{repo.full_name}.git"
54
+ local_repo = Repo(local_path)
55
+
56
+ if 'origin' in [remote.name for remote in local_repo.remotes]:
57
+ origin = local_repo.remote('origin')
58
+ origin.set_url(repo_url)
59
+ else:
60
+ origin = local_repo.create_remote('origin', repo_url)
61
+
62
+ if not local_repo.heads:
63
+ local_repo.git.checkout('-b', 'main')
64
+ current_branch = 'main'
65
+ else:
66
+ current_branch = local_repo.active_branch.name
67
+
68
+ local_repo.git.add(A=True)
69
+
70
+ if local_repo.is_dirty():
71
+ local_repo.git.commit('-m', 'Initial commit')
72
+
73
+ origin.push(refspec=f'{current_branch}:{current_branch}')
74
+
75
+ def get_base64_download_link(file_path, file_name):
76
+ # πŸ§™β€β™‚οΈ Generating a magical download link!
77
+ with open(file_path, "rb") as file:
78
+ contents = file.read()
79
+ base64_encoded = base64.b64encode(contents).decode()
80
+ return f'<a href="data:application/zip;base64,{base64_encoded}" download="{file_name}">⬇️ Download {file_name}</a>'
81
+
82
+
83
+ # 🧭 New functions for dynamic sidebar navigation
84
+ def get_databases(client):
85
+ # πŸ“š Fetching list of databases. So many options!
86
+ return [db['id'] for db in client.list_databases()]
87
+
88
+ def get_containers(database):
89
+ # πŸ“‚ Getting containers. Containers within containers!
90
+ return [container['id'] for container in database.list_containers()]
91
+
92
+ def get_documents(container, limit=None):
93
+ # πŸ“ Retrieving documents. Shhh, don't tell anyone!
94
+ query = "SELECT * FROM c ORDER BY c._ts DESC"
95
+ items = list(container.query_items(query=query, enable_cross_partition_query=True, max_item_count=limit))
96
+ return items
97
+
98
+
99
+ # 🌟 Cosmos DB functions
100
+ def insert_record(container, record):
101
+ try:
102
+ container.create_item(body=record)
103
+ return True, "Record inserted successfully! πŸŽ‰"
104
+ except exceptions.CosmosHttpResponseError as e:
105
+ return False, f"HTTP error occurred: {str(e)} 🚨"
106
+ except Exception as e:
107
+ return False, f"An unexpected error occurred: {str(e)} 😱"
108
+
109
+ def update_record(container, updated_record):
110
+ try:
111
+ container.upsert_item(body=updated_record)
112
+ return True, f"Record with id {updated_record['id']} successfully updated. πŸ› οΈ"
113
+ except exceptions.CosmosHttpResponseError as e:
114
+ return False, f"HTTP error occurred: {str(e)} 🚨"
115
+ except Exception as e:
116
+ return False, f"An unexpected error occurred: {traceback.format_exc()} 😱"
117
+
118
+ def delete_record(container, name, id):
119
+ try:
120
+ container.delete_item(item=id, partition_key=id)
121
+ return True, f"Successfully deleted record with name: {name} and id: {id} πŸ—‘οΈ"
122
+ except exceptions.CosmosResourceNotFoundError:
123
+ return False, f"Record with id {id} not found. It may have been already deleted. πŸ•΅οΈβ€β™‚οΈ"
124
+ except exceptions.CosmosHttpResponseError as e:
125
+ return False, f"HTTP error occurred: {str(e)} 🚨"
126
+ except Exception as e:
127
+ return False, f"An unexpected error occurred: {traceback.format_exc()} 😱"
128
+
129
+ # 🎲 Function to generate a unique UUID
130
+ def generate_unique_id():
131
+ # πŸ§™β€β™‚οΈ Generating a unique UUID!
132
+ return str(uuid.uuid4())
133
+
134
+ # πŸ“¦ Function to archive current container
135
+ def archive_current_container(database_name, container_name, client):
136
+ try:
137
+ base_dir = "./cosmos_archive_current_container"
138
+ if os.path.exists(base_dir):
139
+ shutil.rmtree(base_dir)
140
+ os.makedirs(base_dir)
141
+
142
+ db_client = client.get_database_client(database_name)
143
+ container_client = db_client.get_container_client(container_name)
144
+ items = list(container_client.read_all_items())
145
+
146
+ container_dir = os.path.join(base_dir, container_name)
147
+ os.makedirs(container_dir)
148
+
149
+ for item in items:
150
+ item_id = item.get('id', f"unknown_{datetime.now().strftime('%Y%m%d%H%M%S')}")
151
+ with open(os.path.join(container_dir, f"{item_id}.json"), 'w') as f:
152
+ json.dump(item, f, indent=2)
153
+
154
+ archive_name = f"{container_name}_archive_{datetime.now().strftime('%Y%m%d%H%M%S')}"
155
+ shutil.make_archive(archive_name, 'zip', base_dir)
156
+
157
+ return get_base64_download_link(f"{archive_name}.zip", f"{archive_name}.zip")
158
+ except Exception as e:
159
+ return f"An error occurred while archiving data: {str(e)} 😒"
160
+
161
+ # πŸ” Search Glossary function
162
+ def search_glossary(query, container):
163
+ # πŸ•΅οΈβ€β™‚οΈ Searching the glossary for: query
164
+ all_results = ""
165
+ st.markdown(f"- {query}")
166
+
167
+ # πŸ” Run 1 - ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM
168
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
169
+ response2 = client.predict(
170
+ query, # str in 'parameter_13' Textbox component
171
+ "google/gemma-7b-it", # LLM Model Dropdown component
172
+ True, # Stream output Checkbox component
173
+ api_name="/ask_llm"
174
+ )
175
+ st.write('πŸ” Run of Multi-Agent System Paper Summary Spec is Complete')
176
+ st.markdown(response2)
177
+
178
+ # ArXiv searcher ~-<>-~ Paper References - Update with RAG
179
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
180
+ response1 = client.predict(
181
+ query,
182
+ 10,
183
+ "Semantic Search - up to 10 Mar 2024", # Search Source Dropdown component
184
+ "mistralai/Mixtral-8x7B-Instruct-v0.1", # LLM Model Dropdown component
185
+ api_name="/update_with_rag_md"
186
+ )
187
+ st.write('πŸ” Run of Multi-Agent System Paper References is Complete')
188
+ responseall = response2 + response1[0] + response1[1]
189
+ st.markdown(responseall)
190
+
191
+ # Save the response to a file
192
+ filename = create_and_save_file(responseall, file_type="md", prompt=query, is_image=False, should_save=True)
193
+
194
+ # Convert markdown to JSON structure
195
+ json_content = markdown_to_json(responseall)
196
+
197
+ # Create a record to insert into Cosmos DB
198
+ record = {
199
+ 'id': generate_unique_id(),
200
+ 'query': query,
201
+ 'content': json_content,
202
+ 'filename': filename,
203
+ 'timestamp': datetime.now().isoformat()
204
+ }
205
+
206
+ # Insert the record into Cosmos DB
207
+ success, message = insert_record(container, record)
208
+ if success:
209
+ st.success("Record inserted into Cosmos DB successfully!")
210
+ else:
211
+ st.error(f"Failed to insert record into Cosmos DB: {message}")
212
+
213
+ return responseall
214
+
215
+ # πŸ“ Function to process text input
216
+ def process_text(text_input, container):
217
+ if text_input:
218
+ if 'messages' not in st.session_state:
219
+ st.session_state.messages = []
220
+
221
+ st.session_state.messages.append({"role": "user", "content": text_input})
222
+
223
+ with st.chat_message("user"):
224
+ st.markdown(text_input)
225
+
226
+ with st.chat_message("assistant"):
227
+ completion = openai.ChatCompletion.create(
228
+ model=MODEL,
229
+ messages=[
230
+ {"role": m["role"], "content": m["content"]}
231
+ for m in st.session_state.messages
232
+ ],
233
+ stream=False
234
+ )
235
+ return_text = completion.choices[0].message.content
236
+ st.write("Assistant: " + return_text)
237
+ filename = create_and_save_file(return_text, file_type="md", prompt=text_input, is_image=False, should_save=True)
238
+ st.session_state.messages.append({"role": "assistant", "content": return_text})
239
+
240
+ # Convert markdown to JSON structure
241
+ json_content = markdown_to_json(return_text)
242
+
243
+ # Create a record to insert into Cosmos DB
244
+ record = {
245
+ 'id': generate_unique_id(),
246
+ 'query': text_input,
247
+ 'content': json_content,
248
+ 'filename': filename,
249
+ 'timestamp': datetime.now().isoformat()
250
+ }
251
+
252
+ # Insert the record into Cosmos DB
253
+ success, message = insert_record(container, record)
254
+ if success:
255
+ st.success("Record inserted into Cosmos DB successfully!")
256
+ else:
257
+ st.error(f"Failed to insert record into Cosmos DB: {message}")
258
+
259
+ # πŸ“„ Function to generate a filename
260
+ def generate_filename(text, file_type):
261
+ # πŸ“ Generate a filename based on the text input
262
+ safe_text = "".join(c if c.isalnum() or c in (' ', '.', '_') else '_' for c in text)
263
+ safe_text = "_".join(safe_text.strip().split())
264
+ filename = f"{safe_text}"
265
+ return filename
266
+
267
+ # 🏷️ Function to extract markdown title
268
+ def extract_markdown_title(content):
269
+ # πŸ” Extract the first markdown heading as the title
270
+ lines = content.splitlines()
271
+ for line in lines:
272
+ if line.startswith('#'):
273
+ return line.lstrip('#').strip()
274
+ return None
275
+
276
+ # πŸ“„ Function to convert markdown to JSON structure
277
+ def markdown_to_json(md_content):
278
+ html = markdown.markdown(md_content)
279
+ soup = BeautifulSoup(html, 'html.parser')
280
+ json_content = []
281
+ for element in soup.descendants:
282
+ if element.name == 'h1':
283
+ json_content.append({'type': 'heading', 'level': 1, 'text': element.get_text()})
284
+ elif element.name == 'h2':
285
+ json_content.append({'type': 'heading', 'level': 2, 'text': element.get_text()})
286
+ elif element.name == 'h3':
287
+ json_content.append({'type': 'heading', 'level': 3, 'text': element.get_text()})
288
+ elif element.name == 'p':
289
+ json_content.append({'type': 'paragraph', 'text': element.get_text()})
290
+ elif element.name == 'a':
291
+ json_content.append({'type': 'link', 'href': element.get('href'), 'text': element.get_text()})
292
+ elif element.name == 'img':
293
+ json_content.append({'type': 'image', 'src': element.get('src'), 'alt': element.get('alt')})
294
+ return json_content
295
+
296
+ # πŸ’Ύ Function to create and save a file
297
+ def create_and_save_file(content, file_type="md", prompt=None, is_image=False, should_save=True):
298
+ """
299
+ Combines file name generation and file creation into one function.
300
+ If the file is a markdown file, extracts the title from the content (if available) and uses it for the filename.
301
+ """
302
+ if not should_save:
303
+ return None
304
+
305
+ # Step 1: Generate filename based on the prompt or content
306
+ filename = generate_filename(prompt if prompt else content, file_type)
307
+
308
+ # Step 2: If it's a markdown file, check if it has a title (e.g., # Heading in markdown)
309
+ if file_type == "md":
310
+ title_from_content = extract_markdown_title(content)
311
+ if title_from_content:
312
+ filename = generate_filename(title_from_content, file_type)
313
+
314
+ # Add date time to filename
315
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
316
+ filename = f"{filename}_{timestamp}.{file_type}"
317
+
318
+ # Ensure the directory exists
319
+ save_dir = "saved_files"
320
+ os.makedirs(save_dir, exist_ok=True)
321
+
322
+ # Full path
323
+ filepath = os.path.join(save_dir, filename)
324
+
325
+ # Step 3: Save the file
326
+ with open(filepath, "w", encoding="utf-8") as f:
327
+ if is_image:
328
+ f.write(content)
329
+ else:
330
+ f.write(content)
331
+
332
+ return filename # Return just the filename
333
+
334
+ # 🎈 Let's modify the main app to be more fun!
335
+ def main():
336
+ st.title("πŸ™Git🌌CosmosπŸ’« - Azure Cosmos DB and Github Agent")
337
+
338
+ # 🚦 Initialize session state
339
+ if 'logged_in' not in st.session_state:
340
+ st.session_state.logged_in = False
341
+ if 'selected_records' not in st.session_state:
342
+ st.session_state.selected_records = []
343
+ if 'client' not in st.session_state:
344
+ st.session_state.client = None
345
+ if 'selected_database' not in st.session_state:
346
+ st.session_state.selected_database = None
347
+ if 'selected_container' not in st.session_state:
348
+ st.session_state.selected_container = None
349
+ if 'selected_document_id' not in st.session_state:
350
+ st.session_state.selected_document_id = None
351
+ if 'current_index' not in st.session_state:
352
+ st.session_state.current_index = 0
353
+ if 'cloned_doc' not in st.session_state:
354
+ st.session_state.cloned_doc = None
355
+
356
+ # βš™οΈ q= Run ArXiv search from query parameters
357
+ try:
358
+ query_params = st.experimental_get_query_params()
359
+ query = (query_params.get('q') or query_params.get('query') or [''])[0]
360
+ if query:
361
+ # πŸ•΅οΈβ€β™‚οΈ We have a query! Let's process it!
362
+ # Ensure the client and container are initialized
363
+ if 'client' in st.session_state and st.session_state.client:
364
+ database = st.session_state.client.get_database_client(DATABASE_NAME)
365
+ container = database.get_container_client(CONTAINER_NAME)
366
+ process_text(query, container)
367
+ else:
368
+ st.error("Database client not initialized.")
369
+ st.stop() # Stop further execution
370
+ except Exception as e:
371
+ st.markdown(' ')
372
+
373
+ # πŸ” Automatic Login
374
+ if Key:
375
+ st.session_state.primary_key = Key
376
+ st.session_state.logged_in = True
377
+ else:
378
+ st.error("Cosmos DB Key is not set in environment variables. πŸ”‘βŒ")
379
+ return # Can't proceed without a key
380
+
381
+ if st.session_state.logged_in:
382
+ # 🌌 Initialize Cosmos DB client
383
+ try:
384
+ if st.session_state.client is None:
385
+ st.session_state.client = CosmosClient(ENDPOINT, credential=st.session_state.primary_key)
386
+
387
+ # πŸ—„οΈ Sidebar for database, container, and document selection
388
+ st.sidebar.title("πŸ™Git🌌CosmosπŸ’«πŸ—„οΈNavigator")
389
+
390
+ databases = get_databases(st.session_state.client)
391
+ selected_db = st.sidebar.selectbox("πŸ—ƒοΈ Select Database", databases)
392
+
393
+ if selected_db != st.session_state.selected_database:
394
+ st.session_state.selected_database = selected_db
395
+ st.session_state.selected_container = None
396
+ st.session_state.selected_document_id = None
397
+ st.session_state.current_index = 0
398
+ st.rerun()
399
+
400
+ if st.session_state.selected_database:
401
+ database = st.session_state.client.get_database_client(st.session_state.selected_database)
402
+ containers = get_containers(database)
403
+ selected_container = st.sidebar.selectbox("πŸ“ Select Container", containers)
404
+
405
+ if selected_container != st.session_state.selected_container:
406
+ st.session_state.selected_container = selected_container
407
+ st.session_state.selected_document_id = None
408
+ st.session_state.current_index = 0
409
+ st.rerun()
410
+
411
+ if st.session_state.selected_container:
412
+ container = database.get_container_client(st.session_state.selected_container)
413
+
414
+ # πŸ“¦ Add Export button
415
+ if st.button("πŸ“¦ Export Container Data"):
416
+ download_link = archive_current_container(st.session_state.selected_database, st.session_state.selected_container, st.session_state.client)
417
+ if download_link.startswith('<a'):
418
+ st.markdown(download_link, unsafe_allow_html=True)
419
+ else:
420
+ st.error(download_link)
421
+
422
+ # Fetch documents
423
+ documents = get_documents(container)
424
+ total_docs = len(documents)
425
+
426
+ if total_docs > 5:
427
+ documents_to_display = documents[:5]
428
+ st.info("Showing top 5 most recent documents.")
429
+ else:
430
+ documents_to_display = documents
431
+ st.info(f"Showing all {len(documents_to_display)} documents.")
432
+
433
+ if documents_to_display:
434
+ # 🎨 Add Viewer/Editor selection
435
+ view_options = ['Show as Markdown', 'Show as Code Editor', 'Show as Edit and Save', 'Clone Document', 'New Record']
436
+ selected_view = st.selectbox("Select Viewer/Editor", view_options, index=2)
437
+
438
+ if selected_view == 'Show as Markdown':
439
+ # πŸ–ŒοΈ Show each record as Markdown with navigation
440
+ total_docs = len(documents)
441
+ doc = documents[st.session_state.current_index]
442
+ st.markdown(f"#### Document ID: {doc.get('id', '')}")
443
+
444
+ # πŸ•΅οΈβ€β™‚οΈ Let's extract values from the JSON that have at least one space
445
+ values_with_space = []
446
+ def extract_values(obj):
447
+ if isinstance(obj, dict):
448
+ for k, v in obj.items():
449
+ extract_values(v)
450
+ elif isinstance(obj, list):
451
+ for item in obj:
452
+ extract_values(item)
453
+ elif isinstance(obj, str):
454
+ if ' ' in obj:
455
+ values_with_space.append(obj)
456
+
457
+ extract_values(doc)
458
+
459
+ # πŸ”— Let's create a list of links for these values
460
+ search_urls = {
461
+ "πŸš€πŸŒŒArXiv": lambda k: f"{LOCAL_APP_URL}/?q={quote(k)}",
462
+ "πŸƒAnalyst": lambda k: f"{LOCAL_APP_URL}/?q={quote(k)}-{quote('PromptPrefix')}",
463
+ "πŸ“šPyCoder": lambda k: f"{LOCAL_APP_URL}/?q={quote(k)}-{quote('PromptPrefix2')}",
464
+ "πŸ”¬JSCoder": lambda k: f"{LOCAL_APP_URL}/?q={quote(k)}-{quote('PromptPrefix3')}",
465
+ "🏠": lambda k: f"{LOCAL_APP_URL}/?q={quote(k)}",
466
+ "πŸ“–": lambda k: f"https://en.wikipedia.org/wiki/{quote(k)}",
467
+ "πŸ”": lambda k: f"https://www.google.com/search?q={quote(k)}",
468
+ "▢️": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}",
469
+ "πŸ”Ž": lambda k: f"https://www.bing.com/search?q={quote(k)}",
470
+ "πŸŽ₯": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}",
471
+ "🐦": lambda k: f"https://twitter.com/search?q={quote(k)}",
472
+ }
473
+
474
+ st.markdown("#### πŸ”— Links for Extracted Texts")
475
+ for term in values_with_space:
476
+ links_md = ' '.join([f"[{emoji}]({url(term)})" for emoji, url in search_urls.items()])
477
+ st.markdown(f"**{term}** <small>{links_md}</small>", unsafe_allow_html=True)
478
+
479
+ # Show the document content as markdown
480
+ content = json.dumps(doc, indent=2)
481
+ st.markdown(f"```json\n{content}\n```")
482
+
483
+ # Navigation buttons
484
+ col_prev, col_next = st.columns([1, 1])
485
+ with col_prev:
486
+ if st.button("⬅️ Previous", key='prev_markdown'):
487
+ if st.session_state.current_index > 0:
488
+ st.session_state.current_index -= 1
489
+ st.rerun()
490
+ with col_next:
491
+ if st.button("➑️ Next", key='next_markdown'):
492
+ if st.session_state.current_index < total_docs - 1:
493
+ st.session_state.current_index += 1
494
+ st.rerun()
495
+
496
+ elif selected_view == 'Show as Code Editor':
497
+ # πŸ’» Show each record in a code editor with navigation
498
+ total_docs = len(documents)
499
+ doc = documents[st.session_state.current_index]
500
+ st.markdown(f"#### Document ID: {doc.get('id', '')}")
501
+ doc_str = st.text_area("Edit Document", value=json.dumps(doc, indent=2), height=300, key=f'code_editor_{st.session_state.current_index}')
502
+ col_prev, col_next = st.columns([1, 1])
503
+ with col_prev:
504
+ if st.button("⬅️ Previous", key='prev_code'):
505
+ if st.session_state.current_index > 0:
506
+ st.session_state.current_index -= 1
507
+ st.rerun()
508
+ with col_next:
509
+ if st.button("➑️ Next", key='next_code'):
510
+ if st.session_state.current_index < total_docs - 1:
511
+ st.session_state.current_index += 1
512
+ st.rerun()
513
+ if st.button("πŸ’Ύ Save Changes", key=f'save_button_{st.session_state.current_index}'):
514
+ try:
515
+ updated_doc = json.loads(doc_str)
516
+ success, message = update_record(container, updated_doc)
517
+ if success:
518
+ st.success(f"Document {updated_doc['id']} saved successfully.")
519
+ st.session_state.selected_document_id = updated_doc['id']
520
+ st.rerun()
521
+ else:
522
+ st.error(message)
523
+ except json.JSONDecodeError as e:
524
+ st.error(f"Invalid JSON: {str(e)} 🚫")
525
+ elif selected_view == 'Show as Edit and Save':
526
+ # ✏️ Show as Edit and Save in columns
527
+ st.markdown("#### Edit the document fields below:")
528
+
529
+ # Create columns for each document
530
+ num_cols = len(documents_to_display)
531
+ cols = st.columns(num_cols)
532
+
533
+ for idx, (col, doc) in enumerate(zip(cols, documents_to_display)):
534
+ with col:
535
+ st.markdown(f"##### Document ID: {doc.get('id', '')}")
536
+ editable_id = st.text_input("ID", value=doc.get('id', ''), key=f'edit_id_{idx}')
537
+ # Remove 'id' from the document for editing other fields
538
+ editable_doc = doc.copy()
539
+ editable_doc.pop('id', None)
540
+ doc_str = st.text_area("Document Content (in JSON format)", value=json.dumps(editable_doc, indent=2), height=300, key=f'doc_str_{idx}')
541
+ if st.button("πŸ’Ύ Save Changes", key=f'save_button_{idx}'):
542
+ try:
543
+ updated_doc = json.loads(doc_str)
544
+ updated_doc['id'] = editable_id # Include the possibly edited ID
545
+ success, message = update_record(container, updated_doc)
546
+ if success:
547
+ st.success(f"Document {updated_doc['id']} saved successfully.")
548
+ st.session_state.selected_document_id = updated_doc['id']
549
+ st.rerun()
550
+ else:
551
+ st.error(message)
552
+ except json.JSONDecodeError as e:
553
+ st.error(f"Invalid JSON: {str(e)} 🚫")
554
+ elif selected_view == 'Clone Document':
555
+ # 🧬 Clone Document per record
556
+ st.markdown("#### Clone a document:")
557
+ for idx, doc in enumerate(documents_to_display):
558
+ st.markdown(f"##### Document ID: {doc.get('id', '')}")
559
+ if st.button("πŸ“„ Clone Document", key=f'clone_button_{idx}'):
560
+ cloned_doc = doc.copy()
561
+ # Generate a unique ID
562
+ cloned_doc['id'] = generate_unique_id()
563
+ st.session_state.cloned_doc = cloned_doc
564
+ st.session_state.cloned_doc_str = json.dumps(cloned_doc, indent=2)
565
+ st.session_state.clone_mode = True
566
+ st.rerun()
567
+ if st.session_state.get('clone_mode', False):
568
+ st.markdown("#### Edit Cloned Document:")
569
+ cloned_doc_str = st.text_area("Cloned Document Content (in JSON format)", value=st.session_state.cloned_doc_str, height=300)
570
+ if st.button("πŸ’Ύ Save Cloned Document"):
571
+ try:
572
+ new_doc = json.loads(cloned_doc_str)
573
+ success, message = insert_record(container, new_doc)
574
+ if success:
575
+ st.success(f"Cloned document saved with id: {new_doc['id']} πŸŽ‰")
576
+ st.session_state.selected_document_id = new_doc['id']
577
+ st.session_state.clone_mode = False
578
+ st.session_state.cloned_doc = None
579
+ st.session_state.cloned_doc_str = ''
580
+ st.rerun()
581
+ else:
582
+ st.error(message)
583
+ except json.JSONDecodeError as e:
584
+ st.error(f"Invalid JSON: {str(e)} 🚫")
585
+ elif selected_view == 'New Record':
586
+ # πŸ†• New Record
587
+ st.markdown("#### Create a new document:")
588
+ new_id = st.text_input("ID", value=generate_unique_id(), key='new_id')
589
+ new_doc_str = st.text_area("Document Content (in JSON format)", value='{}', height=300)
590
+ if st.button("βž• Create New Document"):
591
+ try:
592
+ new_doc = json.loads(new_doc_str)
593
+ new_doc['id'] = new_id # Use the provided ID
594
+ success, message = insert_record(container, new_doc)
595
+ if success:
596
+ st.success(f"New document created with id: {new_doc['id']} πŸŽ‰")
597
+ st.session_state.selected_document_id = new_doc['id']
598
+ # Switch to 'Show as Edit and Save' mode
599
+ st.rerun()
600
+ else:
601
+ st.error(message)
602
+ except json.JSONDecodeError as e:
603
+ st.error(f"Invalid JSON: {str(e)} 🚫")
604
+ else:
605
+ st.sidebar.info("No documents found in this container. πŸ“­")
606
+
607
+ # πŸŽ‰ Main content area
608
+ st.subheader(f"πŸ“Š Container: {st.session_state.selected_container}")
609
+ if st.session_state.selected_container:
610
+ if documents_to_display:
611
+ df = pd.DataFrame(documents_to_display)
612
+ st.dataframe(df)
613
+ else:
614
+ st.info("No documents to display. 🧐")
615
+
616
+ # πŸ—‚οΈ List saved files
617
+ st.sidebar.title("πŸ—‚οΈ Saved Files")
618
+
619
+ # Get list of saved files
620
+ def get_saved_files():
621
+ save_dir = "saved_files"
622
+ if not os.path.exists(save_dir):
623
+ return []
624
+ files = [f for f in os.listdir(save_dir) if os.path.isfile(os.path.join(save_dir, f))]
625
+ # Sort files descending by date (assuming filenames include timestamp)
626
+ files.sort(reverse=True)
627
+ return files
628
+
629
+ saved_files = get_saved_files()
630
+ if saved_files:
631
+ selected_file = st.sidebar.radio("Select a saved file", saved_files)
632
+ st.session_state.selected_file = selected_file
633
+ else:
634
+ st.sidebar.info("No saved files available.")
635
+
636
+ # In the main area, display the selected file content
637
+ if 'selected_file' in st.session_state:
638
+ file_path = os.path.join("saved_files", st.session_state.selected_file)
639
+ if os.path.exists(file_path):
640
+ with open(file_path, "r", encoding="utf-8") as f:
641
+ file_content = f.read()
642
+ st.markdown(file_content)
643
+ else:
644
+ st.error("File not found.")
645
+
646
+ # πŸ™ GitHub section
647
+ st.subheader("πŸ™ GitHub Operations")
648
+ github_token = os.environ.get("GITHUB") # Read GitHub token from environment variable
649
+ source_repo = st.text_input("Source GitHub Repository URL", value="https://github.com/AaronCWacker/AIExamples-8-24-Streamlit")
650
+ new_repo_name = st.text_input("New Repository Name (for cloning)", value=f"AIExample-Clone-{datetime.now().strftime('%Y%m%d_%H%M%S')}")
651
+
652
+ col1, col2 = st.columns(2)
653
+ with col1:
654
+ if st.button("πŸ“₯ Clone Repository"):
655
+ if github_token and source_repo:
656
+ try:
657
+ local_path = f"./temp_repo_{datetime.now().strftime('%Y%m%d%H%M%S')}"
658
+ download_github_repo(source_repo, local_path)
659
+ zip_filename = f"{new_repo_name}.zip"
660
+ create_zip_file(local_path, zip_filename[:-4])
661
+ st.markdown(get_base64_download_link(zip_filename, zip_filename), unsafe_allow_html=True)
662
+ st.success("Repository cloned successfully! πŸŽ‰")
663
+ except Exception as e:
664
+ st.error(f"An error occurred: {str(e)} 😒")
665
+ finally:
666
+ if os.path.exists(local_path):
667
+ shutil.rmtree(local_path)
668
+ if os.path.exists(zip_filename):
669
+ os.remove(zip_filename)
670
+ else:
671
+ st.error("Please ensure GitHub token is set in environment variables and source repository URL is provided. πŸ”‘β“")
672
+
673
+ with col2:
674
+ if st.button("πŸ“€ Push to New Repository"):
675
+ if github_token and source_repo:
676
+ try:
677
+ g = Github(github_token)
678
+ new_repo = create_repo(g, new_repo_name)
679
+ local_path = f"./temp_repo_{datetime.now().strftime('%Y%m%d%H%M%S')}"
680
+ download_github_repo(source_repo, local_path)
681
+ push_to_github(local_path, new_repo, github_token)
682
+ st.success(f"Repository pushed successfully to {new_repo.html_url} πŸš€")
683
+ except Exception as e:
684
+ st.error(f"An error occurred: {str(e)} 😒")
685
+ finally:
686
+ if os.path.exists(local_path):
687
+ shutil.rmtree(local_path)
688
+ else:
689
+ st.error("Please ensure GitHub token is set in environment variables and source repository URL is provided. πŸ”‘β“")
690
+
691
+ except exceptions.CosmosHttpResponseError as e:
692
+ st.error(f"Failed to connect to Cosmos DB. HTTP error: {str(e)} 🚨")
693
+ except Exception as e:
694
+ st.error(f"An unexpected error occurred: {str(e)} 😱")
695
+
696
+ # πŸšͺ Logout button
697
+ if st.session_state.logged_in and st.sidebar.button("πŸšͺ Logout"):
698
+ st.session_state.logged_in = False
699
+ st.session_state.selected_records.clear()
700
+ st.session_state.client = None
701
+ st.session_state.selected_database = None
702
+ st.session_state.selected_container = None
703
+ st.session_state.selected_document_id = None
704
+ st.session_state.current_index = 0
705
+ st.rerun()
706
+
707
+ if __name__ == "__main__":
708
+ main()