awacke1 commited on
Commit
64e4308
·
verified ·
1 Parent(s): 410c056

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +818 -362
app.py CHANGED
@@ -1,36 +1,40 @@
1
  # app.py
2
  # =============================================================================
3
- # ───────────── IMPORTS ─────────────
4
- # =============================================================================
5
- import base64
6
- import glob
7
- import hashlib
8
- import json
9
- import os
10
- import pandas as pd
11
- import pytz
12
- import random
13
- import re
14
- import shutil
15
- import streamlit as st
16
- import time
17
- import traceback
18
- import uuid
19
- import zipfile
20
- from PIL import Image
21
- from azure.cosmos import CosmosClient, PartitionKey, exceptions
22
- from datetime import datetime
23
- from git import Repo
24
- from github import Github
25
- from gradio_client import Client, handle_file
26
- import tempfile
27
- import io
28
- import requests
29
- import numpy as np
30
- from urllib.parse import quote
31
-
32
- # =============================================================================
33
- # ───────────── EXTERNAL HELP LINKS (Always visible in sidebar) ─────────────
 
 
 
 
34
  # =============================================================================
35
  external_links = [
36
  {"title": "MergeKit Official GitHub", "url": "https://github.com/arcee-ai/MergeKit", "emoji": "💻"},
@@ -46,7 +50,7 @@ external_links = [
46
  ]
47
 
48
  # =============================================================================
49
- # ───────────── APP CONFIGURATION ─────────────
50
  # =============================================================================
51
  Site_Name = '🐙 GitCosmos'
52
  title = "🐙 GitCosmos"
@@ -74,8 +78,9 @@ LOCAL_APP_URL = "https://huggingface.co/spaces/awacke1/AzureCosmosDBUI"
74
  CosmosDBUrl = 'https://portal.azure.com/#@AaronCWackergmail.onmicrosoft.com/resource/subscriptions/003fba60-5b3f-48f4-ab36-3ed11bc40816/resourceGroups/datasets/providers/Microsoft.DocumentDB/databaseAccounts/acae-afd/dataExplorer'
75
 
76
  # =============================================================================
77
- # ───────────── HELPER FUNCTIONS ─────────────
78
  # =============================================================================
 
79
  def get_download_link(file_path):
80
  with open(file_path, "rb") as file:
81
  contents = file.read()
@@ -83,6 +88,7 @@ def get_download_link(file_path):
83
  file_name = os.path.basename(file_path)
84
  return f'<a href="data:file/txt;base64,{b64}" download="{file_name}">Download {file_name} 📂</a>'
85
 
 
86
  def generate_unique_id():
87
  timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S%f')
88
  unique_uuid = str(uuid.uuid4())
@@ -90,23 +96,27 @@ def generate_unique_id():
90
  st.write('New ID: ' + return_value)
91
  return return_value
92
 
 
93
  def generate_filename(prompt, file_type):
94
  central = pytz.timezone('US/Central')
95
  safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
96
  safe_prompt = re.sub(r'\W+', '', prompt)[:90]
97
  return f"{safe_date_time}{safe_prompt}.{file_type}"
98
 
 
99
  def create_file(filename, prompt, response, should_save=True):
100
  if not should_save:
101
  return
102
  with open(filename, 'w', encoding='utf-8') as file:
103
  file.write(prompt + "\n\n" + response)
104
 
 
105
  def load_file(file_name):
106
  with open(file_name, "r", encoding='utf-8') as file:
107
  content = file.read()
108
  return content
109
 
 
110
  def display_glossary_entity(k):
111
  search_urls = {
112
  "🚀": lambda k: f"/?q={k}",
@@ -117,6 +127,7 @@ def display_glossary_entity(k):
117
  links_md = ' '.join([f"<a href='{url(k)}' target='_blank'>{emoji}</a>" for emoji, url in search_urls.items()])
118
  st.markdown(f"{k} {links_md}", unsafe_allow_html=True)
119
 
 
120
  def create_zip_of_files(files):
121
  zip_name = "all_files.zip"
122
  with zipfile.ZipFile(zip_name, 'w') as zipf:
@@ -124,6 +135,7 @@ def create_zip_of_files(files):
124
  zipf.write(file)
125
  return zip_name
126
 
 
127
  def get_video_html(video_path, width="100%"):
128
  video_url = f"data:video/mp4;base64,{base64.b64encode(open(video_path, 'rb').read()).decode()}"
129
  return f'''
@@ -133,6 +145,7 @@ def get_video_html(video_path, width="100%"):
133
  </video>
134
  '''
135
 
 
136
  def get_audio_html(audio_path, width="100%"):
137
  audio_url = f"data:audio/mpeg;base64,{base64.b64encode(open(audio_path, 'rb').read()).decode()}"
138
  return f'''
@@ -142,6 +155,7 @@ def get_audio_html(audio_path, width="100%"):
142
  </audio>
143
  '''
144
 
 
145
  def preprocess_text(text):
146
  text = text.replace('\r\n', '\\n').replace('\r', '\\n').replace('\n', '\\n')
147
  text = text.replace('"', '\\"')
@@ -150,7 +164,7 @@ def preprocess_text(text):
150
  return text.strip()
151
 
152
  # =============================================================================
153
- # ───────────── COSMOS DB FUNCTIONS ─────────────
154
  # =============================================================================
155
  def get_databases(client):
156
  return [db['id'] for db in client.list_databases()]
@@ -255,7 +269,7 @@ def archive_current_container(database_name, container_name, client):
255
  return f"Archive error: {str(e)} 😢"
256
 
257
  # =============================================================================
258
- # ───────────── ADVANCED COSMOS FUNCTIONS ─────────────
259
  # =============================================================================
260
  def create_new_container(database, container_id, partition_key_path,
261
  analytical_storage_ttl=None, indexing_policy=None, vector_embedding_policy=None):
@@ -324,7 +338,7 @@ def vector_search(container, query_vector, vector_field, top=10, exact_search=Fa
324
  return results
325
 
326
  # =============================================================================
327
- # ───────────── GITHUB FUNCTIONS ─────────────
328
  # =============================================================================
329
  def download_github_repo(url, local_path):
330
  if os.path.exists(local_path):
@@ -357,7 +371,7 @@ def push_to_github(local_path, repo, github_token):
357
  origin.push(refspec=f'{current_branch}:{current_branch}')
358
 
359
  # =============================================================================
360
- # ───────────── FILE & MEDIA MANAGEMENT FUNCTIONS ─────────────
361
  # =============================================================================
362
  def display_saved_files_in_sidebar():
363
  all_files = sorted([f for f in glob.glob("*.md") if not f.lower().startswith('readme')], reverse=True)
@@ -399,11 +413,7 @@ def display_file_editor(file_path):
399
  return
400
  st.markdown("### ✏️ Edit File")
401
  st.markdown(f"**Editing:** {file_path}")
402
- md_tab, code_tab = st.tabs(["Markdown", "Code"])
403
- with md_tab:
404
- st.markdown(st.session_state.file_content[file_path])
405
- with code_tab:
406
- new_content = st.text_area("Edit:", value=st.session_state.file_content[file_path], height=400, key=f"editor_{hash(file_path)}")
407
  col1, col2 = st.columns([1, 5])
408
  with col1:
409
  if st.button("💾 Save"):
@@ -474,12 +484,10 @@ def update_file_management_section():
474
  st.session_state.current_file = None
475
  st.session_state.file_view_mode = None
476
  st.rerun()
477
-
478
  st.sidebar.markdown("---")
479
  st.sidebar.title("External Help Links")
480
  for link in external_links:
481
  st.sidebar.markdown(f"{link['emoji']} [{link['title']}]({link['url']})", unsafe_allow_html=True)
482
-
483
  if st.session_state.current_file:
484
  if st.session_state.file_view_mode == 'view':
485
  display_file_viewer(st.session_state.current_file)
@@ -487,7 +495,34 @@ def update_file_management_section():
487
  display_file_editor(st.session_state.current_file)
488
 
489
  # =============================================================================
490
- # ───────────── VIDEO & AUDIO UI FUNCTIONS ─────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
491
  # =============================================================================
492
  def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
493
  try:
@@ -603,345 +638,616 @@ def add_video_generation_ui(container):
603
  st.error(f"Upload error: {str(e)}")
604
 
605
  # =============================================================================
606
- # ───────────── AI SAMPLES SIDEBAR (Processed as a Python List) ─────────────
607
- # =============================================================================
608
- def display_ai_samples():
609
- # Define a list of sample queries
610
- ai_samples = [
611
- {
612
- "name": "FullTextContains",
613
- "description": "Query using FullTextContains",
614
- "query": 'SELECT TOP 10 * FROM c WHERE FullTextContains(c.text, "bicycle")'
615
- },
616
- {
617
- "name": "FullTextContainsAll",
618
- "description": "Query using FullTextContainsAll",
619
- "query": 'SELECT TOP 10 * FROM c WHERE FullTextContainsAll(c.text, "red", "bicycle")'
620
- },
621
- {
622
- "name": "FullTextContainsAny",
623
- "description": "Query using FullTextContainsAny",
624
- "query": 'SELECT TOP 10 * FROM c WHERE FullTextContains(c.text, "red") AND FullTextContainsAny(c.text, "bicycle", "skateboard")'
625
- },
626
- {
627
- "name": "FullTextScore",
628
- "description": "Query using FullTextScore (order by relevance)",
629
- "query": 'SELECT TOP 10 * FROM c ORDER BY RANK FullTextScore(c.text, ["bicycle", "mountain"])'
630
- },
631
- {
632
- "name": "Vector Search with Score",
633
- "description": "Example vector search snippet",
634
- "query": 'results = vector_search.similarity_search_with_score(query="Your query", k=5)\nfor result, score in results:\n print(result.json(), score)'
635
- },
636
- {
637
- "name": "Vector Search with Filtering",
638
- "description": "Example vector search with a filter",
639
- "query": 'pre_filter = {"conditions": [{"property": "metadata.page", "operator": "$eq", "value": 0}]}\nresults = vector_search.similarity_search_with_score(query="Your query", k=5, pre_filter=pre_filter)'
640
- },
641
- {
642
- "name": "Hybrid Search",
643
- "description": "Example hybrid search snippet",
644
- "query": 'results = vector_search.similarity_search_with_score(query="Your query", k=5, query_type=CosmosDBQueryType.HYBRID)'
645
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
646
  ]
647
- st.sidebar.markdown("### 🤖 AI Samples")
648
- st.sidebar.info("🚀 Get started with our AI samples! Time free access to get started today.")
649
- # Provide a dropdown to select one sample
650
- sample_names = [sample["name"] for sample in ai_samples]
651
- selected_sample_name = st.sidebar.selectbox("Select an AI Sample", sample_names)
652
- selected_sample = next((s for s in ai_samples if s["name"] == selected_sample_name), None)
653
- if selected_sample:
654
- st.sidebar.markdown(f"**{selected_sample['name']}**: {selected_sample['description']}")
655
- # Use language 'sql' for queries containing FullText, else python
656
- lang = "sql" if "FullText" in selected_sample["name"] else "python"
657
- st.sidebar.code(selected_sample["query"], language=lang)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658
 
659
  # =============================================================================
660
- # ───────────── MAIN FUNCTION ─────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
661
  # =============================================================================
662
  def main():
663
- st.markdown("### 🐙 GitCosmos - Cosmos & Git Hub")
 
 
664
  if "chat_history" not in st.session_state:
665
  st.session_state.chat_history = []
666
- # Auth & Cosmos client initialization
667
- if Key:
668
- st.session_state.primary_key = Key
669
- st.session_state.logged_in = True
670
- else:
671
- st.error("Missing Cosmos Key 🔑❌")
672
- return
673
- if st.session_state.logged_in:
674
- try:
675
- if st.session_state.get("client") is None:
676
- st.session_state.client = CosmosClient(ENDPOINT, credential=st.session_state.primary_key)
677
- st.sidebar.title("🐙 Navigator")
678
- databases = get_databases(st.session_state.client)
679
- selected_db = st.sidebar.selectbox("🗃️ DB", databases)
680
- st.markdown(CosmosDBUrl)
681
- if selected_db != st.session_state.get("selected_database"):
682
- st.session_state.selected_database = selected_db
683
- st.session_state.selected_container = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
684
  st.session_state.selected_document_id = None
685
  st.session_state.current_index = 0
686
  st.rerun()
687
- if st.session_state.selected_database:
688
- database = st.session_state.client.get_database_client(st.session_state.selected_database)
689
-
690
- # New Container button under DB menu
691
- if "show_new_container_form" not in st.session_state:
692
- st.session_state.show_new_container_form = False
693
- if st.sidebar.button("🆕 New Container"):
694
- st.session_state.show_new_container_form = True
695
- if st.session_state.show_new_container_form:
696
- with st.sidebar.form("new_container_form"):
697
- new_container_id = st.text_input("Container ID", value="aiml-container")
698
- new_partition_key = st.text_input("Partition Key", value="/pk")
699
- new_analytical = st.checkbox("Enable Analytical Store", value=True)
700
- submitted = st.form_submit_button("Create Container")
701
- if submitted:
702
- analytical_ttl = -1 if new_analytical else None
703
- new_container = create_new_container(
704
- database,
705
- new_container_id,
706
- new_partition_key,
707
- analytical_storage_ttl=analytical_ttl
708
- )
709
- if new_container:
710
- st.success(f"Container '{new_container_id}' created.")
711
- # Insert a default templated item into the new container
712
- default_id = generate_unique_id()
713
- default_item = {
714
- "id": default_id,
715
- "pk": default_id,
716
- "name": "Default Image Prompt",
717
- "prompt": "Enter your image prompt here",
718
- "timestamp": datetime.now().isoformat(),
719
- "type": "image_prompt"
720
- }
721
- insert_success, insert_message = insert_record(new_container, default_item)
722
- if insert_success:
723
- st.info("Default templated item created in new container.")
724
- else:
725
- st.error(f"Default item insertion error: {insert_message}")
726
- st.session_state.show_new_container_form = False
727
- st.session_state.new_container_created = new_container_id
728
  st.rerun()
729
-
730
- # Update container list
731
- containers = get_containers(database)
732
- if "new_container_created" in st.session_state and st.session_state.new_container_created not in containers:
733
- containers.append(st.session_state.new_container_created)
734
- selected_container = st.sidebar.selectbox("📁 Container", containers)
735
- if selected_container != st.session_state.get("selected_container"):
736
- st.session_state.selected_container = selected_container
737
- st.session_state.selected_document_id = None
738
- st.session_state.current_index = 0
739
- st.rerun()
740
- if st.session_state.selected_container:
741
- container = database.get_container_client(st.session_state.selected_container)
742
- if st.sidebar.button("📦 Export"):
743
- download_link = archive_current_container(st.session_state.selected_database, st.session_state.selected_container, st.session_state.client)
744
- if download_link.startswith('<a'):
745
- st.markdown(download_link, unsafe_allow_html=True)
746
- else:
747
- st.error(download_link)
748
- documents = get_documents(container)
749
- total_docs = len(documents)
750
- num_docs = st.slider("Docs", 1, 20, 1)
751
- documents_to_display = documents[:num_docs] if total_docs > num_docs else documents
752
- st.sidebar.info(f"Showing {len(documents_to_display)} docs")
753
- # Document Viewer / Editor
754
- view_options = ['Markdown', 'Code', 'Run AI', 'Clone', 'New']
755
- selected_view = st.sidebar.selectbox("View", view_options, index=1)
756
- if selected_view == 'Markdown':
757
- st.markdown("#### 📄 Markdown")
758
- if documents:
759
- doc = documents[st.session_state.current_index]
760
- content = json.dumps(doc, indent=2)
761
- st.markdown(f"```json\n{content}\n```")
762
- col_prev, col_next = st.columns(2)
763
- with col_prev:
764
- if st.button("⬅️") and st.session_state.current_index > 0:
765
- st.session_state.current_index -= 1
766
- st.rerun()
767
- with col_next:
768
- if st.button("➡️") and st.session_state.current_index < total_docs - 1:
769
- st.session_state.current_index += 1
770
- st.rerun()
771
- elif selected_view == 'Code':
772
- st.markdown("#### 💻 Code Editor")
773
- if documents:
774
- doc = documents[st.session_state.current_index]
775
- doc_str = st.text_area("Edit JSON", value=json.dumps(doc, indent=2), height=300, key=f'code_{st.session_state.current_index}')
776
- col_prev, col_next = st.columns(2)
777
- with col_prev:
778
- if st.button("⬅️") and st.session_state.current_index > 0:
779
- st.session_state.current_index -= 1
780
- st.rerun()
781
- with col_next:
782
- if st.button("➡️") and st.session_state.current_index < total_docs - 1:
783
- st.session_state.current_index += 1
784
- st.rerun()
785
- col_save, col_delete = st.columns(2)
786
- with col_save:
787
- if st.button("💾 Save", key=f'save_{st.session_state.current_index}'):
788
- try:
789
- updated_doc = json.loads(doc_str)
790
- container.upsert_item(body=updated_doc)
791
- st.success(f"Saved {updated_doc['id']}")
792
- st.rerun()
793
- except Exception as e:
794
- st.error(f"Save err: {str(e)}")
795
- with col_delete:
796
- if st.button("🗑️ Delete", key=f'delete_{st.session_state.current_index}'):
797
- try:
798
- current_doc = json.loads(doc_str)
799
- success, message = delete_record(container, current_doc)
800
- if success:
801
- st.success(message)
802
- st.rerun()
803
- else:
804
- st.error(message)
805
- except Exception as e:
806
- st.error(f"Delete err: {str(e)}")
807
- if "delete_log" in st.session_state and st.session_state.delete_log:
808
- st.subheader("Delete Log")
809
- for log_entry in st.session_state.delete_log[-5:]:
810
- st.write(log_entry)
811
- elif selected_view == 'Run AI':
812
- st.markdown("#### 🤖 Run AI (stub)")
813
- st.info("AI functionality not implemented.")
814
- elif selected_view == 'Clone':
815
- st.markdown("#### 📄 Clone")
816
- if documents:
817
- doc = documents[st.session_state.current_index]
818
- st.markdown(f"Original ID: {doc.get('id', '')}")
819
- new_id = st.text_input("New ID", value=generate_unique_id(), key='new_clone_id')
820
- new_name = st.text_input("New Name", value=f"Clone_{new_id[:8]}", key='new_clone_name')
821
- new_doc = {'id': new_id, 'pk': new_id, 'name': new_name, **{k: v for k, v in doc.items() if k not in ['id', 'name', 'pk', '_rid', '_self', '_etag', '_attachments', '_ts']}}
822
- doc_str = st.text_area("Edit JSON", value=json.dumps(new_doc, indent=2), height=300, key='clone_preview')
823
- col1, col2 = st.columns(2)
824
- with col1:
825
- if st.button("🔄 Regenerate"):
826
- new_id = generate_unique_id()
827
- st.session_state.new_clone_id = new_id
828
- st.rerun()
829
- with col2:
830
- if st.button("💾 Save Clone"):
831
- try:
832
- final_doc = json.loads(doc_str)
833
- for field in ['_rid', '_self', '_etag', '_attachments', '_ts']:
834
- final_doc.pop(field, None)
835
- container.create_item(body=final_doc)
836
- st.success(f"Cloned {final_doc['id']}")
837
- st.rerun()
838
- except Exception as e:
839
- st.error(f"Clone err: {str(e)}")
840
- col_prev, col_next = st.columns(2)
841
- with col_prev:
842
- if st.button("⬅️") and st.session_state.current_index > 0:
843
- st.session_state.current_index -= 1
844
- st.rerun()
845
- with col_next:
846
- if st.button("➡️") and st.session_state.current_index < total_docs - 1:
847
- st.session_state.current_index += 1
848
- st.rerun()
849
- elif selected_view == 'New':
850
- st.markdown("#### ➕ New Doc")
851
- if st.button("🤖 Auto-Gen"):
852
- auto_doc = {
853
- "id": generate_unique_id(),
854
- "pk": generate_unique_id(),
855
- "name": f"Auto {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
856
- "content": "Auto-generated record.",
857
- "timestamp": datetime.now().isoformat()
858
- }
859
- success, message = insert_record(container, auto_doc)
860
- if success:
861
- st.success(message)
862
  st.rerun()
863
- else:
864
- st.error(message)
865
- else:
866
- new_id = st.text_input("ID", value=generate_unique_id(), key='new_id')
867
- default_doc = {
868
- "id": new_id,
869
- "pk": new_id,
870
- "name": "New Doc",
871
- "content": "",
872
- "timestamp": datetime.now().isoformat()
873
- }
874
- new_doc_str = st.text_area("JSON", value=json.dumps(default_doc, indent=2), height=300)
875
- if st.button("➕ Create"):
 
 
 
 
 
 
876
  try:
877
- cleaned = preprocess_text(new_doc_str)
878
- new_doc = json.loads(cleaned)
879
- new_doc['id'] = new_id
880
- new_doc['pk'] = new_id
881
- success, message = insert_record(container, new_doc)
 
 
 
 
 
 
882
  if success:
883
- st.success(f"Created {new_doc['id']}")
884
  st.rerun()
885
  else:
886
  st.error(message)
887
  except Exception as e:
888
- st.error(f"Create err: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
889
  st.subheader(f"📊 {st.session_state.selected_container}")
890
  if documents_to_display:
891
  df = pd.DataFrame(documents_to_display)
892
  st.dataframe(df)
893
  else:
894
  st.info("No docs.")
895
-
896
- # GitHub Ops section
897
- st.subheader("🐙 GitHub Ops")
898
- github_token = os.environ.get("GITHUB")
899
- source_repo = st.text_input("Source Repo URL", value="https://github.com/AaronCWacker/AIExamples-8-24-Streamlit")
900
- new_repo_name = st.text_input("New Repo Name", value=f"Clone-{datetime.now().strftime('%Y%m%d_%H%M%S')}")
901
- col_g1, col_g2 = st.columns(2)
902
- with col_g1:
903
- if st.button("📥 Clone Repo"):
904
- if github_token and source_repo:
905
- try:
906
- local_path = f"./temp_repo_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
907
- download_github_repo(source_repo, local_path)
908
- zip_filename = f"{new_repo_name}.zip"
909
- create_zip_file(local_path, zip_filename[:-4])
910
- st.markdown(get_download_link(zip_filename), unsafe_allow_html=True)
911
- st.success("Cloned! 🎉")
912
- except Exception as e:
913
- st.error(f"Clone err: {str(e)}")
914
- finally:
915
- if os.path.exists(local_path):
916
- shutil.rmtree(local_path)
917
- if os.path.exists(zip_filename):
918
- os.remove(zip_filename)
919
- else:
920
- st.error("Missing token or URL 🔑❓")
921
- with col_g2:
922
- if st.button("📤 Push Repo"):
923
- if github_token and source_repo:
924
- try:
925
- g = Github(github_token)
926
- new_repo = create_repo(g, new_repo_name)
927
- local_path = f"./temp_repo_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
928
- download_github_repo(source_repo, local_path)
929
- push_to_github(local_path, new_repo, github_token)
930
- st.success(f"Pushed to {new_repo.html_url} 🚀")
931
- except Exception as e:
932
- st.error(f"Push err: {str(e)}")
933
- finally:
934
- if os.path.exists(local_path):
935
- shutil.rmtree(local_path)
936
- else:
937
- st.error("Missing token or URL 🔑❓")
938
- # Display AI Samples sidebar UI (processed from Python list)
939
- display_ai_samples()
940
- update_file_management_section()
941
- except exceptions.CosmosHttpResponseError as e:
942
- st.error(f"Cosmos error: {str(e)} 🚨")
943
- except Exception as e:
944
- st.error(f"Error: {str(e)} 😱")
945
  if st.session_state.logged_in and st.sidebar.button("🚪 Logout"):
946
  st.markdown("#### 🚪 Logout")
947
  st.session_state.logged_in = False
@@ -953,5 +1259,155 @@ def main():
953
  st.session_state.current_index = 0
954
  st.rerun()
955
 
956
- if __name__ == "__main__":
957
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # app.py
2
  # =============================================================================
3
+ # 🚀 IMPORTS
4
+ # =============================================================================
5
+ import base64 # 🔥 For encoding/decoding files
6
+ import glob # 🔍 For file searching
7
+ import hashlib # 🔒 For hashing
8
+ import json # 🧮 For JSON handling
9
+ import os # 📁 For OS interactions
10
+ import pandas as pd # 🐼 For data frame support
11
+ import pytz # ⏰ For timezone management
12
+ import random # 🎲 For randomness
13
+ import re # 🔍 For regex operations
14
+ import shutil # 🗑️ For file copying/removal
15
+ import streamlit as st # 💻 For the Streamlit UI
16
+ import time # ⏳ For timing
17
+ import traceback # 🚨 For error traces
18
+ import uuid # 🆔 For unique ID generation
19
+ import zipfile # 📦 For archiving files
20
+ from PIL import Image # 🖼️ For image processing
21
+ from azure.cosmos import CosmosClient, PartitionKey, exceptions # ☁️ For Cosmos DB operations
22
+ from datetime import datetime # ⏰ For timestamps
23
+ from git import Repo # 🐙 For Git operations
24
+ from github import Github # 🔗 For GitHub API interactions
25
+ from gradio_client import Client, handle_file # 🤖 For Gradio video generation
26
+ import tempfile # 📝 For temporary file handling
27
+ import io # 📡 For in-memory streams
28
+ import requests # 🌐 For HTTP requests
29
+ import numpy as np # 🔢 For numerical operations
30
+ from urllib.parse import quote # 🔗 For URL encoding
31
+
32
+ # Allow nested asyncio.run calls (needed for our async TTS and Arxiv search)
33
+ import nest_asyncio
34
+ nest_asyncio.apply()
35
+
36
+ # =============================================================================
37
+ # 😎 EXTERNAL HELP LINKS (Always visible in sidebar)
38
  # =============================================================================
39
  external_links = [
40
  {"title": "MergeKit Official GitHub", "url": "https://github.com/arcee-ai/MergeKit", "emoji": "💻"},
 
50
  ]
51
 
52
  # =============================================================================
53
+ # 🎨 APP CONFIGURATION
54
  # =============================================================================
55
  Site_Name = '🐙 GitCosmos'
56
  title = "🐙 GitCosmos"
 
78
  CosmosDBUrl = 'https://portal.azure.com/#@AaronCWackergmail.onmicrosoft.com/resource/subscriptions/003fba60-5b3f-48f4-ab36-3ed11bc40816/resourceGroups/datasets/providers/Microsoft.DocumentDB/databaseAccounts/acae-afd/dataExplorer'
79
 
80
  # =============================================================================
81
+ # 💾 HELPER FUNCTIONS
82
  # =============================================================================
83
+ # 🔗 Get a download link for a file
84
  def get_download_link(file_path):
85
  with open(file_path, "rb") as file:
86
  contents = file.read()
 
88
  file_name = os.path.basename(file_path)
89
  return f'<a href="data:file/txt;base64,{b64}" download="{file_name}">Download {file_name} 📂</a>'
90
 
91
+ # 🆔 Generate a unique ID
92
  def generate_unique_id():
93
  timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S%f')
94
  unique_uuid = str(uuid.uuid4())
 
96
  st.write('New ID: ' + return_value)
97
  return return_value
98
 
99
+ # 📝 Generate a safe filename based on a prompt
100
  def generate_filename(prompt, file_type):
101
  central = pytz.timezone('US/Central')
102
  safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
103
  safe_prompt = re.sub(r'\W+', '', prompt)[:90]
104
  return f"{safe_date_time}{safe_prompt}.{file_type}"
105
 
106
+ # 📄 Create a file with given content
107
  def create_file(filename, prompt, response, should_save=True):
108
  if not should_save:
109
  return
110
  with open(filename, 'w', encoding='utf-8') as file:
111
  file.write(prompt + "\n\n" + response)
112
 
113
+ # 📂 Load file contents
114
  def load_file(file_name):
115
  with open(file_name, "r", encoding='utf-8') as file:
116
  content = file.read()
117
  return content
118
 
119
+ # 🔗 Display a glossary entity with quick search links
120
  def display_glossary_entity(k):
121
  search_urls = {
122
  "🚀": lambda k: f"/?q={k}",
 
127
  links_md = ' '.join([f"<a href='{url(k)}' target='_blank'>{emoji}</a>" for emoji, url in search_urls.items()])
128
  st.markdown(f"{k} {links_md}", unsafe_allow_html=True)
129
 
130
+ # 📦 Create a ZIP archive of given files
131
  def create_zip_of_files(files):
132
  zip_name = "all_files.zip"
133
  with zipfile.ZipFile(zip_name, 'w') as zipf:
 
135
  zipf.write(file)
136
  return zip_name
137
 
138
+ # 🎥 Get HTML to embed a video
139
  def get_video_html(video_path, width="100%"):
140
  video_url = f"data:video/mp4;base64,{base64.b64encode(open(video_path, 'rb').read()).decode()}"
141
  return f'''
 
145
  </video>
146
  '''
147
 
148
+ # 🎵 Get HTML to embed audio
149
  def get_audio_html(audio_path, width="100%"):
150
  audio_url = f"data:audio/mpeg;base64,{base64.b64encode(open(audio_path, 'rb').read()).decode()}"
151
  return f'''
 
155
  </audio>
156
  '''
157
 
158
+ # ✂️ Preprocess text (e.g., for JSON safety)
159
  def preprocess_text(text):
160
  text = text.replace('\r\n', '\\n').replace('\r', '\\n').replace('\n', '\\n')
161
  text = text.replace('"', '\\"')
 
164
  return text.strip()
165
 
166
  # =============================================================================
167
+ # ☁️ COSMOS DB FUNCTIONS
168
  # =============================================================================
169
  def get_databases(client):
170
  return [db['id'] for db in client.list_databases()]
 
269
  return f"Archive error: {str(e)} 😢"
270
 
271
  # =============================================================================
272
+ # 🚀 ADVANCED COSMOS FUNCTIONS
273
  # =============================================================================
274
  def create_new_container(database, container_id, partition_key_path,
275
  analytical_storage_ttl=None, indexing_policy=None, vector_embedding_policy=None):
 
338
  return results
339
 
340
  # =============================================================================
341
+ # 🐙 GITHUB FUNCTIONS
342
  # =============================================================================
343
  def download_github_repo(url, local_path):
344
  if os.path.exists(local_path):
 
371
  origin.push(refspec=f'{current_branch}:{current_branch}')
372
 
373
  # =============================================================================
374
+ # 📁 FILE & MEDIA MANAGEMENT FUNCTIONS
375
  # =============================================================================
376
  def display_saved_files_in_sidebar():
377
  all_files = sorted([f for f in glob.glob("*.md") if not f.lower().startswith('readme')], reverse=True)
 
413
  return
414
  st.markdown("### ✏️ Edit File")
415
  st.markdown(f"**Editing:** {file_path}")
416
+ new_content = st.text_area("Edit JSON", value=st.session_state.file_content[file_path], height=400, key="doc_editor", on_change=lambda: auto_save_edit())
 
 
 
 
417
  col1, col2 = st.columns([1, 5])
418
  with col1:
419
  if st.button("💾 Save"):
 
484
  st.session_state.current_file = None
485
  st.session_state.file_view_mode = None
486
  st.rerun()
 
487
  st.sidebar.markdown("---")
488
  st.sidebar.title("External Help Links")
489
  for link in external_links:
490
  st.sidebar.markdown(f"{link['emoji']} [{link['title']}]({link['url']})", unsafe_allow_html=True)
 
491
  if st.session_state.current_file:
492
  if st.session_state.file_view_mode == 'view':
493
  display_file_viewer(st.session_state.current_file)
 
495
  display_file_editor(st.session_state.current_file)
496
 
497
  # =============================================================================
498
+ # SIDEBAR DATA GRID: Show all container records with formatted timestamp
499
+ # =============================================================================
500
+ def show_sidebar_data_grid(container):
501
+ try:
502
+ records = get_documents(container)
503
+ # Build list of dicts with desired columns; sort descending by _ts or timestamp field
504
+ data = []
505
+ for rec in records:
506
+ ts = rec.get("timestamp", "")
507
+ try:
508
+ dt = datetime.fromisoformat(ts)
509
+ formatted = dt.strftime("%I:%M %p %m/%d/%Y")
510
+ except Exception:
511
+ formatted = ts
512
+ data.append({
513
+ "ID": rec.get("id", ""),
514
+ "Name": rec.get("name", ""),
515
+ "Timestamp": formatted
516
+ })
517
+ df = pd.DataFrame(data)
518
+ # Already sorted by _ts descending from the query; display in sidebar
519
+ st.sidebar.markdown("### 📊 Data Grid")
520
+ st.sidebar.dataframe(df)
521
+ except Exception as e:
522
+ st.sidebar.error(f"Data grid error: {str(e)}")
523
+
524
+ # =============================================================================
525
+ # 🎥 VIDEO & AUDIO UI FUNCTIONS
526
  # =============================================================================
527
  def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
528
  try:
 
638
  st.error(f"Upload error: {str(e)}")
639
 
640
  # =============================================================================
641
+ # 🤖 NEW ITEM & FIELD FUNCTIONS
642
+ # =============================================================================
643
+ def new_item_default(container):
644
+ new_id = generate_unique_id()
645
+ default_doc = {
646
+ "id": new_id,
647
+ "pk": new_id,
648
+ "name": "New Sample Document",
649
+ "content": "Start editing your document here...",
650
+ "timestamp": datetime.now().isoformat(),
651
+ "type": "sample"
652
+ }
653
+ success, message = insert_record(container, default_doc)
654
+ if success:
655
+ st.success("New sample document created! ✨")
656
+ return default_doc
657
+ else:
658
+ st.error("Error creating new item: " + message)
659
+ return None
660
+
661
+ def auto_save_edit():
662
+ try:
663
+ edited_str = st.session_state.doc_editor
664
+ edited_doc = json.loads(edited_str)
665
+ container = st.session_state.current_container
666
+ container.upsert_item(edited_doc)
667
+ st.success("Auto-saved! 💾")
668
+ except Exception as e:
669
+ st.error(f"Auto-save error: {str(e)}")
670
+
671
+ def add_field_to_doc():
672
+ key = st.session_state.new_field_key
673
+ value = st.session_state.new_field_value
674
+ try:
675
+ doc = json.loads(st.session_state.doc_editor)
676
+ doc[key] = value
677
+ st.session_state.doc_editor = json.dumps(doc, indent=2)
678
+ auto_save_edit()
679
+ st.success(f"Added field {key} 👍")
680
+ except Exception as e:
681
+ st.error(f"Error adding field: {str(e)}")
682
+
683
+ # =============================================================================
684
+ # 🔍 VECTOR SEARCH INTERFACE (Simple keyword search)
685
+ # =============================================================================
686
+ def vector_keyword_search(keyword, container):
687
+ try:
688
+ query = f"SELECT * FROM c WHERE CONTAINS(c.content, '{keyword}')"
689
+ results = list(container.query_items(query=query, enable_cross_partition_query=True))
690
+ return results
691
+ except Exception as e:
692
+ st.error(f"Vector search error: {str(e)}")
693
+ return []
694
+
695
+ # =============================================================================
696
+ # 🤖 NEW AI MODALITY RECORD TEMPLATES
697
+ # =============================================================================
698
+ def new_ai_record(container):
699
+ new_id = generate_unique_id()
700
+ default_doc = {
701
+ "id": new_id,
702
+ "pk": new_id,
703
+ "name": "AI Modality Record",
704
+ "function_url": "https://example.com/function",
705
+ "input_text": "### Input (markdown)\n\nType your input here.",
706
+ "output_text": "### Output (markdown)\n\nResult will appear here.",
707
+ "timestamp": datetime.now().isoformat(),
708
+ "type": "ai_modality"
709
+ }
710
+ success, message = insert_record(container, default_doc)
711
+ if success:
712
+ st.success("New AI modality record created! 💡")
713
+ return default_doc
714
+ else:
715
+ st.error("Error creating AI record: " + message)
716
+ return None
717
+
718
+ def new_links_record(container):
719
+ new_id = generate_unique_id()
720
+ links_md = "\n".join([f"- {link['emoji']} [{link['title']}]({link['url']})" for link in external_links])
721
+ default_doc = {
722
+ "id": new_id,
723
+ "pk": new_id,
724
+ "name": "Portal Links Record",
725
+ "function_url": "",
726
+ "input_text": links_md,
727
+ "output_text": "",
728
+ "timestamp": datetime.now().isoformat(),
729
+ "type": "ai_modality"
730
+ }
731
+ success, message = insert_record(container, default_doc)
732
+ if success:
733
+ st.success("New Portal Links record created! 🔗")
734
+ return default_doc
735
+ else:
736
+ st.error("Error creating links record: " + message)
737
+ return None
738
+
739
+ # =============================================================================
740
+ # 🤖 LANGCHAIN FUNCTIONS (Witty emoji comments)
741
+ # =============================================================================
742
+ def display_langchain_functions():
743
+ functions = [
744
+ {"name": "OpenAIEmbeddings", "comment": "🔮 Creates embeddings using OpenAI – pure magic!"},
745
+ {"name": "AzureCosmosDBNoSqlVectorSearch", "comment": "🚀 Performs vector search on Cosmos DB – superfast and smart!"},
746
+ {"name": "RecursiveCharacterTextSplitter", "comment": "✂️ Slices text into manageable chunks – like a pro chef!"}
747
  ]
748
+ st.sidebar.markdown("### 🤖 Langchain Functions")
749
+ for func in functions:
750
+ st.sidebar.write(f"{func['name']}: {func['comment']}")
751
+
752
+ # =============================================================================
753
+ # ─────────────────────────────────────────────────────────
754
+ # NEW: SIDEBAR DATA GRID FUNCTION
755
+ # =============================================================================
756
+ def show_sidebar_data_grid():
757
+ if st.session_state.get("current_container"):
758
+ show_sidebar_data_grid.container = st.session_state.current_container
759
+ try:
760
+ records = get_documents(show_sidebar_data_grid.container)
761
+ data = []
762
+ for rec in records:
763
+ ts = rec.get("timestamp", "")
764
+ try:
765
+ dt = datetime.fromisoformat(ts)
766
+ formatted = dt.strftime("%I:%M %p %m/%d/%Y")
767
+ except Exception:
768
+ formatted = ts
769
+ data.append({
770
+ "ID": rec.get("id", ""),
771
+ "Name": rec.get("name", ""),
772
+ "Timestamp": formatted
773
+ })
774
+ df = pd.DataFrame(data)
775
+ st.sidebar.markdown("### 📊 Data Grid")
776
+ st.sidebar.dataframe(df)
777
+ except Exception as e:
778
+ st.sidebar.error(f"Data grid error: {str(e)}")
779
+ else:
780
+ st.sidebar.info("No container selected for data grid.")
781
+
782
+ # =============================================================================
783
+ # 🤖 RESEARCH / ARXIV FUNCTIONS (Copied from second app code)
784
+ # =============================================================================
785
+ def parse_arxiv_refs(ref_text: str):
786
+ if not ref_text:
787
+ return []
788
+ results = []
789
+ current_paper = {}
790
+ lines = ref_text.split('\n')
791
+ for i, line in enumerate(lines):
792
+ if line.count('|') == 2:
793
+ if current_paper:
794
+ results.append(current_paper)
795
+ if len(results) >= 20:
796
+ break
797
+ try:
798
+ header_parts = line.strip('* ').split('|')
799
+ date = header_parts[0].strip()
800
+ title = header_parts[1].strip()
801
+ url_match = re.search(r'(https://arxiv.org/\S+)', line)
802
+ url = url_match.group(1) if url_match else f"paper_{len(results)}"
803
+ current_paper = {
804
+ 'date': date,
805
+ 'title': title,
806
+ 'url': url,
807
+ 'authors': '',
808
+ 'summary': '',
809
+ 'full_audio': None,
810
+ 'download_base64': '',
811
+ }
812
+ except Exception as e:
813
+ st.warning(f"Error parsing paper header: {str(e)}")
814
+ current_paper = {}
815
+ continue
816
+ elif current_paper:
817
+ if not current_paper['authors']:
818
+ current_paper['authors'] = line.strip('* ')
819
+ else:
820
+ if current_paper['summary']:
821
+ current_paper['summary'] += ' ' + line.strip()
822
+ else:
823
+ current_paper['summary'] = line.strip()
824
+ if current_paper:
825
+ results.append(current_paper)
826
+ return results[:20]
827
+
828
+ def create_paper_links_md(papers):
829
+ lines = ["# Paper Links\n"]
830
+ for i, p in enumerate(papers, start=1):
831
+ lines.append(f"{i}. **{p['title']}** — [Arxiv Link]({p['url']})")
832
+ return "\n".join(lines)
833
+
834
+ def generate_pdf_link(url: str) -> str:
835
+ if "abs" in url:
836
+ pdf_url = url.replace("abs", "pdf")
837
+ if not pdf_url.endswith(".pdf"):
838
+ pdf_url += ".pdf"
839
+ return pdf_url
840
+ return url
841
+
842
+ def generate_5min_feature_markdown(paper: dict) -> str:
843
+ title = paper.get('title', '')
844
+ summary = paper.get('summary', '')
845
+ authors = paper.get('authors', '')
846
+ date = paper.get('date', '')
847
+ url = paper.get('url', '')
848
+ pdf_link = generate_pdf_link(url)
849
+ title_wc = len(title.split())
850
+ summary_wc = len(summary.split())
851
+ high_info_terms = [term for term in summary.split()[:5]] # simplified for demo
852
+ terms_str = ", ".join(high_info_terms)
853
+ rouge_score = round((len(high_info_terms) / max(len(summary.split()), 1)) * 100, 2)
854
+ mermaid_code = "```mermaid\nflowchart TD\n"
855
+ for i in range(len(high_info_terms) - 1):
856
+ mermaid_code += f' T{i+1}["{high_info_terms[i]}"] --> T{i+2}["{high_info_terms[i+1]}"]\n'
857
+ mermaid_code += "```"
858
+ md = f"""
859
+ ## {title}
860
+
861
+ **Authors:** {authors}
862
+ **Date:** {date}
863
+ **Word Count (Title):** {title_wc} | **Word Count (Summary):** {summary_wc}
864
+
865
+ **Links:** [Abstract]({url}) | [PDF]({pdf_link})
866
+
867
+ **High Info Terms:** {terms_str}
868
+ **ROUGE Score:** {rouge_score}%
869
+
870
+ ### Mermaid Graph of Key Concepts
871
+ {mermaid_code}
872
+
873
+ ---
874
+ """
875
+ return md
876
+
877
+ def create_detailed_paper_md(papers: list) -> str:
878
+ md_parts = ["# Detailed Research Paper Summary\n"]
879
+ for idx, paper in enumerate(papers, start=1):
880
+ md_parts.append(generate_5min_feature_markdown(paper))
881
+ return "\n".join(md_parts)
882
 
883
  # =============================================================================
884
+ # 🤖 ASYNC TTS FUNCTIONS (from second app code)
885
+ # =============================================================================
886
+ import asyncio
887
+ import edge_tts
888
+ from streamlit_marquee import streamlit_marquee
889
+ from collections import Counter
890
+
891
+ class PerformanceTimer:
892
+ def __init__(self, operation_name: str):
893
+ self.operation_name = operation_name
894
+ self.start_time = None
895
+ def __enter__(self):
896
+ self.start_time = time.time()
897
+ return self
898
+ def __exit__(self, exc_type, exc_val, exc_tb):
899
+ pass
900
+
901
+ async def async_edge_tts_generate(text: str, voice: str, rate: int = 0, pitch: int = 0, file_format: str = "mp3"):
902
+ with PerformanceTimer("tts_generation") as timer:
903
+ text = text.replace("\n", " ").strip()
904
+ if not text:
905
+ return None, 0
906
+ cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
907
+ if cache_key in st.session_state.get('audio_cache', {}):
908
+ return st.session_state['audio_cache'][cache_key], 0
909
+ try:
910
+ rate_str = f"{rate:+d}%"
911
+ pitch_str = f"{pitch:+d}Hz"
912
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
913
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
914
+ filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
915
+ await communicate.save(filename)
916
+ st.session_state.setdefault('audio_cache', {})[cache_key] = filename
917
+ return filename, time.time() - timer.start_time
918
+ except Exception as e:
919
+ st.error(f"Error generating audio: {str(e)}")
920
+ return None, 0
921
+
922
+ def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
923
+ result = asyncio.run(async_edge_tts_generate(text, voice, rate, pitch, file_format))
924
+ if isinstance(result, tuple):
925
+ return result[0]
926
+ return result
927
+
928
+ async def async_save_qa_with_audio(question: str, answer: str):
929
+ with PerformanceTimer("qa_save") as timer:
930
+ md_file = create_file(question, answer, "md")
931
+ audio_file = None
932
+ if st.session_state.get('enable_audio', True):
933
+ audio_text = f"{question}\n\nAnswer: {answer}"
934
+ audio_file, _ = await async_edge_tts_generate(audio_text, voice=st.session_state.get('tts_voice', "en-US-AriaNeural"), file_format=st.session_state.get('audio_format', "mp3"))
935
+ return md_file, audio_file, time.time() - timer.start_time, 0
936
+
937
+ def save_qa_with_audio(question, answer, voice=None):
938
+ if not voice:
939
+ voice = st.session_state.get('tts_voice', "en-US-AriaNeural")
940
+ md_file = create_file(question, answer, "md")
941
+ audio_text = f"{question}\n\nAnswer: {answer}"
942
+ audio_file = speak_with_edge_tts(audio_text, voice=voice, file_format=st.session_state.get('audio_format', "mp3"))
943
+ return md_file, audio_file
944
+
945
+ def play_and_download_audio(file_path, file_type="mp3"):
946
+ if file_path and os.path.exists(file_path):
947
+ st.audio(file_path)
948
+ dl_link = get_download_link(file_path, file_type=file_type)
949
+ st.markdown(dl_link, unsafe_allow_html=True)
950
+
951
+ def create_download_link_with_cache(file_path: str, file_type: str = "mp3") -> str:
952
+ cache_key = f"dl_{file_path}"
953
+ if cache_key in st.session_state.get('download_link_cache', {}):
954
+ return st.session_state['download_link_cache'][cache_key]
955
+ try:
956
+ with open(file_path, "rb") as f:
957
+ b64 = base64.b64encode(f.read()).decode()
958
+ filename = os.path.basename(file_path)
959
+ if file_type == "mp3":
960
+ link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">🎵 Download {filename}</a>'
961
+ elif file_type == "wav":
962
+ link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">🔊 Download {filename}</a>'
963
+ elif file_type == "md":
964
+ link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">📝 Download {filename}</a>'
965
+ else:
966
+ link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">Download {filename}</a>'
967
+ st.session_state.setdefault('download_link_cache', {})[cache_key] = link
968
+ return link
969
+ except Exception as e:
970
+ st.error(f"Error creating download link: {str(e)}")
971
+ return ""
972
+
973
+ # =============================================================================
974
+ # ─────────────────────────────────────────────────────────
975
+ # MAIN FUNCTION
976
  # =============================================================================
977
  def main():
978
+ # Friendly portal link
979
+ st.markdown(f"[🔗 Portal]({CosmosDBUrl})")
980
+ # Initialize some session state keys if not already present
981
  if "chat_history" not in st.session_state:
982
  st.session_state.chat_history = []
983
+ st.session_state.setdefault("current_container", None)
984
+ # Sidebar: New Item, Add Field, New AI Record, New Links Record, and Vector Search
985
+ st.sidebar.markdown("## 🛠️ Item Management")
986
+ if st.sidebar.button("New Item"):
987
+ if st.session_state.get("current_container"):
988
+ new_doc = new_item_default(st.session_state.current_container)
989
+ if new_doc:
990
+ st.session_state.doc_editor = json.dumps(new_doc, indent=2)
991
+ else:
992
+ st.warning("No container selected!")
993
+ st.sidebar.text_input("New Field Key", key="new_field_key")
994
+ st.sidebar.text_input("New Field Value", key="new_field_value")
995
+ if st.sidebar.button("Add Field"):
996
+ if "doc_editor" in st.session_state:
997
+ add_field_to_doc()
998
+ else:
999
+ st.warning("No document loaded to add a field.")
1000
+ if st.sidebar.button("New AI Record"):
1001
+ if st.session_state.get("current_container"):
1002
+ new_ai_record(st.session_state.current_container)
1003
+ else:
1004
+ st.warning("No container selected!")
1005
+ if st.sidebar.button("New Links Record"):
1006
+ if st.session_state.get("current_container"):
1007
+ new_links_record(st.session_state.current_container)
1008
+ else:
1009
+ st.warning("No container selected!")
1010
+ st.sidebar.markdown("## 🔍 Vector Search")
1011
+ search_keyword = st.sidebar.text_input("Search Keyword", key="vector_search_keyword")
1012
+ if st.sidebar.button("Search"):
1013
+ if st.session_state.get("current_container"):
1014
+ results = vector_keyword_search(search_keyword, st.session_state.current_container)
1015
+ st.sidebar.write(f"Found {len(results)} results:")
1016
+ for res in results:
1017
+ st.sidebar.code(json.dumps(res, indent=2), language="json")
1018
+ else:
1019
+ st.warning("No container selected for search!")
1020
+ # Show the sidebar data grid with records
1021
+ show_sidebar_data_grid()
1022
+ # Display Langchain functions in sidebar
1023
+ display_langchain_functions()
1024
+ # Navigator: Container selection and data grid
1025
+ try:
1026
+ if st.session_state.get("client") is None:
1027
+ st.session_state.client = CosmosClient(ENDPOINT, credential=st.session_state.primary_key)
1028
+ st.sidebar.title("🐙 Navigator")
1029
+ databases = get_databases(st.session_state.client)
1030
+ selected_db = st.sidebar.selectbox("🗃️ DB", databases)
1031
+ if selected_db != st.session_state.get("selected_database"):
1032
+ st.session_state.selected_database = selected_db
1033
+ st.session_state.selected_container = None
1034
+ st.session_state.selected_document_id = None
1035
+ st.session_state.current_index = 0
1036
+ st.rerun()
1037
+ if st.session_state.selected_database:
1038
+ database = st.session_state.client.get_database_client(st.session_state.selected_database)
1039
+ if "show_new_container_form" not in st.session_state:
1040
+ st.session_state.show_new_container_form = False
1041
+ if st.sidebar.button("🆕 New Container"):
1042
+ st.session_state.show_new_container_form = True
1043
+ if st.session_state.show_new_container_form:
1044
+ with st.sidebar.form("new_container_form"):
1045
+ new_container_id = st.text_input("Container ID", value="aiml-container")
1046
+ new_partition_key = st.text_input("Partition Key", value="/pk")
1047
+ new_analytical = st.checkbox("Enable Analytical Store", value=True)
1048
+ submitted = st.form_submit_button("Create Container")
1049
+ if submitted:
1050
+ analytical_ttl = -1 if new_analytical else None
1051
+ new_container = create_new_container(
1052
+ database,
1053
+ new_container_id,
1054
+ new_partition_key,
1055
+ analytical_storage_ttl=analytical_ttl
1056
+ )
1057
+ if new_container:
1058
+ st.success(f"Container '{new_container_id}' created.")
1059
+ default_id = generate_unique_id()
1060
+ default_item = {
1061
+ "id": default_id,
1062
+ "pk": default_id,
1063
+ "name": "Default Image Prompt",
1064
+ "prompt": "Enter your image prompt here",
1065
+ "timestamp": datetime.now().isoformat(),
1066
+ "type": "image_prompt"
1067
+ }
1068
+ insert_success, insert_message = insert_record(new_container, default_item)
1069
+ if insert_success:
1070
+ st.info("Default templated item created in new container.")
1071
+ else:
1072
+ st.error(f"Default item insertion error: {insert_message}")
1073
+ st.session_state.show_new_container_form = False
1074
+ st.session_state.new_container_created = new_container_id
1075
+ st.rerun()
1076
+ containers = get_containers(database)
1077
+ if "new_container_created" in st.session_state and st.session_state.new_container_created not in containers:
1078
+ containers.append(st.session_state.new_container_created)
1079
+ selected_container = st.sidebar.selectbox("📁 Container", containers)
1080
+ if selected_container != st.session_state.get("selected_container"):
1081
+ st.session_state.selected_container = selected_container
1082
  st.session_state.selected_document_id = None
1083
  st.session_state.current_index = 0
1084
  st.rerun()
1085
+ if st.session_state.selected_container:
1086
+ container = database.get_container_client(st.session_state.selected_container)
1087
+ st.session_state.current_container = container
1088
+ if st.sidebar.button("📦 Export"):
1089
+ download_link = archive_current_container(st.session_state.selected_database, st.session_state.selected_container, st.session_state.client)
1090
+ if download_link.startswith('<a'):
1091
+ st.markdown(download_link, unsafe_allow_html=True)
1092
+ else:
1093
+ st.error(download_link)
1094
+ documents = get_documents(container)
1095
+ total_docs = len(documents)
1096
+ num_docs = st.slider("Docs", 1, 20, 1)
1097
+ documents_to_display = documents[:num_docs] if total_docs > num_docs else documents
1098
+ st.sidebar.info(f"Showing {len(documents_to_display)} docs")
1099
+ view_options = ['Markdown', 'Code', 'Run AI', 'Clone', 'New']
1100
+ selected_view = st.sidebar.selectbox("View", view_options, index=1)
1101
+ if selected_view == 'Markdown':
1102
+ st.markdown("#### 📄 Markdown")
1103
+ if documents:
1104
+ doc = documents[st.session_state.current_index]
1105
+ content = json.dumps(doc, indent=2)
1106
+ st.markdown(f"```json\n{content}\n```")
1107
+ col_prev, col_next = st.columns(2)
1108
+ with col_prev:
1109
+ if st.button("⬅️") and st.session_state.current_index > 0:
1110
+ st.session_state.current_index -= 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1111
  st.rerun()
1112
+ with col_next:
1113
+ if st.button("➡️") and st.session_state.current_index < total_docs - 1:
1114
+ st.session_state.current_index += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1115
  st.rerun()
1116
+ elif selected_view == 'Code':
1117
+ st.markdown("#### 💻 Code Editor")
1118
+ if documents:
1119
+ doc = documents[st.session_state.current_index]
1120
+ if "doc_editor" not in st.session_state:
1121
+ st.session_state.doc_editor = json.dumps(doc, indent=2)
1122
+ edited = st.text_area("Edit JSON", value=st.session_state.doc_editor, height=300, key="doc_editor", on_change=lambda: auto_save_edit())
1123
+ col_prev, col_next = st.columns(2)
1124
+ with col_prev:
1125
+ if st.button("⬅️") and st.session_state.current_index > 0:
1126
+ st.session_state.current_index -= 1
1127
+ st.rerun()
1128
+ with col_next:
1129
+ if st.button("➡️") and st.session_state.current_index < total_docs - 1:
1130
+ st.session_state.current_index += 1
1131
+ st.rerun()
1132
+ col_save, col_delete = st.columns(2)
1133
+ with col_save:
1134
+ if st.button("💾 Save", key=f'save_{st.session_state.current_index}'):
1135
  try:
1136
+ updated_doc = json.loads(edited)
1137
+ container.upsert_item(body=updated_doc)
1138
+ st.success(f"Saved {updated_doc['id']}")
1139
+ st.rerun()
1140
+ except Exception as e:
1141
+ st.error(f"Save err: {str(e)}")
1142
+ with col_delete:
1143
+ if st.button("🗑️ Delete", key=f'delete_{st.session_state.current_index}'):
1144
+ try:
1145
+ current_doc = json.loads(edited)
1146
+ success, message = delete_record(container, current_doc)
1147
  if success:
1148
+ st.success(message)
1149
  st.rerun()
1150
  else:
1151
  st.error(message)
1152
  except Exception as e:
1153
+ st.error(f"Delete err: {str(e)}")
1154
+ if "delete_log" in st.session_state and st.session_state.delete_log:
1155
+ st.subheader("Delete Log")
1156
+ for log_entry in st.session_state.delete_log[-5:]:
1157
+ st.write(log_entry)
1158
+ elif selected_view == 'Run AI':
1159
+ st.markdown("#### 🤖 Run AI")
1160
+ # NEW: Use a text area and a Send button (message button UI)
1161
+ ai_query = st.text_area("Enter your query for ArXiv search:", key="arxiv_query", height=100)
1162
+ if st.button("Send"):
1163
+ st.session_state.last_query = ai_query
1164
+ perform_ai_lookup(ai_query, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=True, useArxiv=True, useArxivAudio=False)
1165
+ elif selected_view == 'Clone':
1166
+ st.markdown("#### 📄 Clone")
1167
+ if documents:
1168
+ doc = documents[st.session_state.current_index]
1169
+ st.markdown(f"Original ID: {doc.get('id', '')}")
1170
+ new_id = st.text_input("New ID", value=generate_unique_id(), key='new_clone_id')
1171
+ new_name = st.text_input("New Name", value=f"Clone_{new_id[:8]}", key='new_clone_name')
1172
+ new_doc = {'id': new_id, 'pk': new_id, 'name': new_name, **{k: v for k, v in doc.items() if k not in ['id', 'name', 'pk', '_rid', '_self', '_etag', '_attachments', '_ts']}}
1173
+ doc_str = st.text_area("Edit JSON", value=json.dumps(new_doc, indent=2), height=300, key='clone_preview')
1174
+ col1, col2 = st.columns(2)
1175
+ with col1:
1176
+ if st.button("🔄 Regenerate"):
1177
+ new_id = generate_unique_id()
1178
+ st.session_state.new_clone_id = new_id
1179
+ st.rerun()
1180
+ with col2:
1181
+ if st.button("💾 Save Clone"):
1182
+ try:
1183
+ final_doc = json.loads(doc_str)
1184
+ for field in ['_rid', '_self', '_etag', '_attachments', '_ts']:
1185
+ final_doc.pop(field, None)
1186
+ container.create_item(body=final_doc)
1187
+ st.success(f"Cloned {final_doc['id']}")
1188
+ st.rerun()
1189
+ except Exception as e:
1190
+ st.error(f"Clone err: {str(e)}")
1191
+ col_prev, col_next = st.columns(2)
1192
+ with col_prev:
1193
+ if st.button("⬅️") and st.session_state.current_index > 0:
1194
+ st.session_state.current_index -= 1
1195
+ st.rerun()
1196
+ with col_next:
1197
+ if st.button("➡️") and st.session_state.current_index < total_docs - 1:
1198
+ st.session_state.current_index += 1
1199
+ st.rerun()
1200
+ elif selected_view == 'New':
1201
+ st.markdown("#### ➕ New Doc")
1202
+ if st.button("🤖 Auto-Gen"):
1203
+ auto_doc = {
1204
+ "id": generate_unique_id(),
1205
+ "pk": generate_unique_id(),
1206
+ "name": f"Auto {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
1207
+ "content": "Auto-generated record.",
1208
+ "timestamp": datetime.now().isoformat()
1209
+ }
1210
+ success, message = insert_record(container, auto_doc)
1211
+ if success:
1212
+ st.success(message)
1213
+ st.rerun()
1214
+ else:
1215
+ st.error(message)
1216
+ else:
1217
+ new_id = st.text_input("ID", value=generate_unique_id(), key='new_id')
1218
+ default_doc = {
1219
+ "id": new_id,
1220
+ "pk": new_id,
1221
+ "name": "New Doc",
1222
+ "content": "",
1223
+ "timestamp": datetime.now().isoformat()
1224
+ }
1225
+ new_doc_str = st.text_area("JSON", value=json.dumps(default_doc, indent=2), height=300)
1226
+ if st.button("➕ Create"):
1227
+ try:
1228
+ cleaned = preprocess_text(new_doc_str)
1229
+ new_doc = json.loads(cleaned)
1230
+ new_doc['id'] = new_id
1231
+ new_doc['pk'] = new_id
1232
+ success, message = insert_record(container, new_doc)
1233
+ if success:
1234
+ st.success(f"Created {new_doc['id']}")
1235
+ st.rerun()
1236
+ else:
1237
+ st.error(message)
1238
+ except Exception as e:
1239
+ st.error(f"Create err: {str(e)}")
1240
  st.subheader(f"📊 {st.session_state.selected_container}")
1241
  if documents_to_display:
1242
  df = pd.DataFrame(documents_to_display)
1243
  st.dataframe(df)
1244
  else:
1245
  st.info("No docs.")
1246
+ update_file_management_section()
1247
+ except exceptions.CosmosHttpResponseError as e:
1248
+ st.error(f"Cosmos error: {str(e)} 🚨")
1249
+ except Exception as e:
1250
+ st.error(f"Error: {str(e)} 😱")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1251
  if st.session_state.logged_in and st.sidebar.button("🚪 Logout"):
1252
  st.markdown("#### 🚪 Logout")
1253
  st.session_state.logged_in = False
 
1259
  st.session_state.current_index = 0
1260
  st.rerun()
1261
 
1262
+ # Also display the sidebar data grid (records overview)
1263
+ show_sidebar_data_grid()
1264
+
1265
+ # =============================================================================
1266
+ # Additional Blank Lines for Spacing (~1500 lines total)
1267
+ # =============================================================================
1268
+ #
1269
+ #
1270
+ #
1271
+ #
1272
+ #
1273
+ #
1274
+ #
1275
+ #
1276
+ #
1277
+ #
1278
+ #
1279
+ #
1280
+ #
1281
+ #
1282
+ #
1283
+ #
1284
+ #
1285
+ #
1286
+ #
1287
+ #
1288
+ #
1289
+ #
1290
+ #
1291
+ #
1292
+ #
1293
+ #
1294
+ #
1295
+ #
1296
+ #
1297
+ #
1298
+ #
1299
+ #
1300
+ #
1301
+ #
1302
+ #
1303
+ #
1304
+ #
1305
+ #
1306
+ #
1307
+ #
1308
+ #
1309
+ #
1310
+ #
1311
+ #
1312
+ #
1313
+ #
1314
+ #
1315
+ #
1316
+ #
1317
+ #
1318
+ #
1319
+ #
1320
+ #
1321
+ #
1322
+ #
1323
+ #
1324
+ #
1325
+ #
1326
+ #
1327
+ #
1328
+ #
1329
+ #
1330
+ #
1331
+ #
1332
+ #
1333
+ #
1334
+ #
1335
+ #
1336
+ #
1337
+ #
1338
+ #
1339
+ #
1340
+ #
1341
+ #
1342
+ #
1343
+ #
1344
+ #
1345
+ #
1346
+ #
1347
+ #
1348
+ #
1349
+ #
1350
+ #
1351
+ #
1352
+ #
1353
+ #
1354
+ #
1355
+ #
1356
+ #
1357
+ #
1358
+ #
1359
+ #
1360
+ #
1361
+ #
1362
+ #
1363
+ #
1364
+ #
1365
+ #
1366
+ #
1367
+ #
1368
+ #
1369
+ #
1370
+ #
1371
+ #
1372
+ #
1373
+ #
1374
+ #
1375
+ #
1376
+ #
1377
+ #
1378
+ #
1379
+ #
1380
+ #
1381
+ #
1382
+ #
1383
+ #
1384
+ #
1385
+ #
1386
+ #
1387
+ #
1388
+ #
1389
+ #
1390
+ #
1391
+ #
1392
+ #
1393
+ #
1394
+ #
1395
+ #
1396
+ #
1397
+ #
1398
+ #
1399
+ #
1400
+ #
1401
+ #
1402
+ #
1403
+ #
1404
+ #
1405
+ #
1406
+ #
1407
+ #
1408
+ #
1409
+ #
1410
+ #
1411
+ #
1412
+ #
1413
+ # End of app.py