# app.py # ============================================================================= # ───────────── IMPORTS ───────────── # ============================================================================= import base64 import glob import hashlib import json import os import pandas as pd import pytz import random import re import shutil import streamlit as st import time import traceback import uuid import zipfile from PIL import Image from azure.cosmos import CosmosClient, PartitionKey, exceptions from datetime import datetime from git import Repo from github import Github from gradio_client import Client, handle_file import tempfile import io import requests import numpy as np from urllib.parse import quote # ============================================================================= # ───────────── EXTERNAL HELP LINKS (Always visible in sidebar) ───────────── # ============================================================================= external_links = [ {"title": "MergeKit Official GitHub", "url": "https://github.com/arcee-ai/MergeKit", "emoji": "💻"}, {"title": "MergeKit arXiv Paper", "url": "https://arxiv.org/abs/xxxx.xxxxx", "emoji": "📘"}, {"title": "MergeKit Tutorial", "url": "https://huggingface.co/blog/mergekit-tutorial", "emoji": "✍️"}, {"title": "MergeKit Sample Usage", "url": "https://github.com/arcee-ai/MergeKit#examples", "emoji": "📚"}, {"title": "DistillKit Official GitHub", "url": "https://github.com/arcee-ai/DistillKit", "emoji": "💻"}, {"title": "DistillKit Announcing Blog Post", "url": "https://arcee.ai/blog/distillkit-announcement", "emoji": "✍️"}, {"title": "DistillKit Sample Usage", "url": "https://github.com/arcee-ai/DistillKit#usage", "emoji": "📚"}, {"title": "Spectrum Hugging Face Blog Post", "url": "https://huggingface.co/blog/spectrum", "emoji": "✍️"}, {"title": "Hugging Face Model Merging Docs", "url": "https://huggingface.co/docs/peft/model_merging", "emoji": "📚"}, {"title": "arcee.ai Official Website", "url": "https://arcee.ai", "emoji": "🌐"}, ] # ============================================================================= # ───────────── APP CONFIGURATION ───────────── # ============================================================================= Site_Name = '🐙 GitCosmos' title = "🐙 GitCosmos" helpURL = 'https://huggingface.co/awacke1' bugURL = 'https://huggingface.co/spaces/awacke1/AzureCosmosDBUI/' icons = '🐙🌌💫' st.set_page_config( page_title=title, page_icon=icons, layout="wide", initial_sidebar_state="auto", menu_items={ 'Get Help': helpURL, 'Report a bug': bugURL, 'About': title } ) # Cosmos DB & App URLs ENDPOINT = "https://acae-afd.documents.azure.com:443/" DATABASE_NAME = os.environ.get("COSMOS_DATABASE_NAME") CONTAINER_NAME = os.environ.get("COSMOS_CONTAINER_NAME") Key = os.environ.get("Key") LOCAL_APP_URL = "https://huggingface.co/spaces/awacke1/AzureCosmosDBUI" CosmosDBUrl = 'https://portal.azure.com/#@AaronCWackergmail.onmicrosoft.com/resource/subscriptions/003fba60-5b3f-48f4-ab36-3ed11bc40816/resourceGroups/datasets/providers/Microsoft.DocumentDB/databaseAccounts/acae-afd/dataExplorer' # ============================================================================= # ───────────── HELPER FUNCTIONS ───────────── # ============================================================================= def get_download_link(file_path): with open(file_path, "rb") as file: contents = file.read() b64 = base64.b64encode(contents).decode() file_name = os.path.basename(file_path) return f'Download {file_name} 📂' def generate_unique_id(): timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S%f') unique_uuid = str(uuid.uuid4()) return_value = f"{timestamp}-{unique_uuid}" st.write('New ID: ' + return_value) return return_value def generate_filename(prompt, file_type): central = pytz.timezone('US/Central') safe_date_time = datetime.now(central).strftime("%m%d_%H%M") safe_prompt = re.sub(r'\W+', '', prompt)[:90] return f"{safe_date_time}{safe_prompt}.{file_type}" def create_file(filename, prompt, response, should_save=True): if not should_save: return with open(filename, 'w', encoding='utf-8') as file: file.write(prompt + "\n\n" + response) def load_file(file_name): with open(file_name, "r", encoding='utf-8') as file: content = file.read() return content def display_glossary_entity(k): search_urls = { "🚀": lambda k: f"/?q={k}", "📖": lambda k: f"https://en.wikipedia.org/wiki/{quote(k)}", "🔍": lambda k: f"https://www.google.com/search?q={quote(k)}", "🎥": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}", } links_md = ' '.join([f"{emoji}" for emoji, url in search_urls.items()]) st.markdown(f"{k} {links_md}", unsafe_allow_html=True) def create_zip_of_files(files): zip_name = "all_files.zip" with zipfile.ZipFile(zip_name, 'w') as zipf: for file in files: zipf.write(file) return zip_name def get_video_html(video_path, width="100%"): video_url = f"data:video/mp4;base64,{base64.b64encode(open(video_path, 'rb').read()).decode()}" return f''' ''' def get_audio_html(audio_path, width="100%"): audio_url = f"data:audio/mpeg;base64,{base64.b64encode(open(audio_path, 'rb').read()).decode()}" return f''' ''' def preprocess_text(text): text = text.replace('\r\n', '\\n').replace('\r', '\\n').replace('\n', '\\n') text = text.replace('"', '\\"') text = re.sub(r'[\t]', ' ', text) text = re.sub(r'[^\x00-\x7F]+', '', text) return text.strip() # ============================================================================= # ───────────── COSMOS DB FUNCTIONS ───────────── # ============================================================================= def get_databases(client): return [db['id'] for db in client.list_databases()] def get_containers(database): return [container['id'] for container in database.list_containers()] def get_documents(container, limit=None): query = "SELECT * FROM c ORDER BY c._ts DESC" items = list(container.query_items(query=query, enable_cross_partition_query=True, max_item_count=limit)) return items def insert_record(container, record): try: container.create_item(body=record) return True, "Inserted! 🎉" except exceptions.CosmosHttpResponseError as e: return False, f"HTTP error: {str(e)} 🚨" except Exception as e: return False, f"Error: {str(e)} 😱" def update_record(container, updated_record): try: container.upsert_item(body=updated_record) return True, f"Updated {updated_record['id']} 🛠️" except exceptions.CosmosHttpResponseError as e: return False, f"HTTP error: {str(e)} 🚨" except Exception as e: return False, f"Error: {traceback.format_exc()} 😱" def delete_record(container, record): try: if "id" not in record: return False, "Record must contain an 'id' field. 🛑" doc_id = record["id"] if "delete_log" not in st.session_state: st.session_state.delete_log = [] st.session_state.delete_log.append(f"Attempting to delete document: {json.dumps(record, indent=2)}") partition_key_value = record.get("pk", doc_id) st.session_state.delete_log.append(f"Using ID and Partition Key: {partition_key_value}") container.delete_item(item=doc_id, partition_key=partition_key_value) success_msg = f"Record {doc_id} successfully deleted from Cosmos DB. 🗑️" st.session_state.delete_log.append(success_msg) return True, success_msg except exceptions.CosmosResourceNotFoundError: success_msg = f"Record {doc_id} not found in Cosmos DB (already deleted or never existed). 🗑️" st.session_state.delete_log.append(success_msg) return True, success_msg except exceptions.CosmosHttpResponseError as e: error_msg = f"HTTP error deleting {doc_id}: {str(e)}. 🚨" st.session_state.delete_log.append(error_msg) return False, error_msg except Exception as e: error_msg = f"Unexpected error deleting {doc_id}: {str(traceback.format_exc())}. 😱" st.session_state.delete_log.append(error_msg) return False, error_msg def save_to_cosmos_db(container, query, response1, response2): try: if container: timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S%f') unique_uuid = str(uuid.uuid4()) new_id = f"{timestamp}-{unique_uuid}" record = { "id": new_id, "pk": new_id, "name": new_id, "query": query, "response1": response1, "response2": response2, "timestamp": datetime.utcnow().isoformat(), "type": "ai_response", "version": "1.0" } container.create_item(body=record) st.success(f"Saved: {record['id']}") st.session_state.documents = get_documents(container) else: st.error("Cosmos container not initialized.") except Exception as e: st.error(f"Save error: {str(e)}") def archive_current_container(database_name, container_name, client): try: base_dir = "./cosmos_archive_current_container" if os.path.exists(base_dir): shutil.rmtree(base_dir) os.makedirs(base_dir) db_client = client.get_database_client(database_name) container_client = db_client.get_container_client(container_name) items = list(container_client.read_all_items()) container_dir = os.path.join(base_dir, container_name) os.makedirs(container_dir) for item in items: item_id = item.get('id', f"unknown_{datetime.now().strftime('%Y%m%d%H%M%S')}") with open(os.path.join(container_dir, f"{item_id}.json"), 'w') as f: json.dump(item, f, indent=2) archive_name = f"{container_name}_archive_{datetime.now().strftime('%Y%m%d%H%M%S')}" shutil.make_archive(archive_name, 'zip', base_dir) return get_download_link(f"{archive_name}.zip") except Exception as e: return f"Archive error: {str(e)} 😢" # ============================================================================= # ───────────── ADVANCED COSMOS FUNCTIONS ───────────── # ============================================================================= def create_new_container(database, container_id, partition_key_path, analytical_storage_ttl=None, indexing_policy=None, vector_embedding_policy=None): try: if analytical_storage_ttl is not None: container = database.create_container( id=container_id, partition_key=PartitionKey(path=partition_key_path), analytical_storage_ttl=analytical_storage_ttl, indexing_policy=indexing_policy, vector_embedding_policy=vector_embedding_policy ) else: container = database.create_container( id=container_id, partition_key=PartitionKey(path=partition_key_path), indexing_policy=indexing_policy, vector_embedding_policy=vector_embedding_policy ) except exceptions.CosmosHttpResponseError as e: if analytical_storage_ttl is not None and "analyticalStorageTtl" in str(e): try: container = database.create_container( id=container_id, partition_key=PartitionKey(path=partition_key_path), indexing_policy=indexing_policy, vector_embedding_policy=vector_embedding_policy ) except Exception as e2: st.error(f"Error creating container without analytical_storage_ttl: {str(e2)}") return None elif isinstance(e, exceptions.CosmosResourceExistsError): container = database.get_container_client(container_id) else: st.error(f"Error creating container: {str(e)}") return None return container def advanced_insert_item(container, item): try: container.upsert_item(item) return True, f"Item {item.get('id', '')} inserted. ➕" except Exception as e: return False, str(e) def advanced_update_item(container, item): try: container.upsert_item(item) return True, f"Item {item.get('id', '')} updated. ✏️" except Exception as e: return False, str(e) def advanced_delete_item(container, item_id, partition_key_value): try: container.delete_item(item=item_id, partition_key=partition_key_value) return True, f"Item {item_id} deleted. 🗑️" except Exception as e: return False, str(e) def vector_search(container, query_vector, vector_field, top=10, exact_search=False): query_vector_str = json.dumps(query_vector) query = f"""SELECT TOP {top} c.id, VectorDistance(c.{vector_field}, {query_vector_str}, {str(exact_search).lower()}, {{'dataType':'float32','distanceFunction':'cosine'}}) AS SimilarityScore FROM c ORDER BY SimilarityScore""" results = list(container.query_items(query=query, enable_cross_partition_query=True)) return results # ============================================================================= # ───────────── GITHUB FUNCTIONS ───────────── # ============================================================================= def download_github_repo(url, local_path): if os.path.exists(local_path): shutil.rmtree(local_path) Repo.clone_from(url, local_path) def create_zip_file(source_dir, output_filename): shutil.make_archive(output_filename, 'zip', source_dir) def create_repo(g, repo_name): user = g.get_user() return user.create_repo(repo_name) def push_to_github(local_path, repo, github_token): repo_url = f"https://{github_token}@github.com/{repo.full_name}.git" local_repo = Repo(local_path) if 'origin' in [remote.name for remote in local_repo.remotes]: origin = local_repo.remote('origin') origin.set_url(repo_url) else: origin = local_repo.create_remote('origin', repo_url) if not local_repo.heads: local_repo.git.checkout('-b', 'main') current_branch = 'main' else: current_branch = local_repo.active_branch.name local_repo.git.add(A=True) if local_repo.is_dirty(): local_repo.git.commit('-m', 'Initial commit') origin.push(refspec=f'{current_branch}:{current_branch}') # ============================================================================= # ───────────── FILE & MEDIA MANAGEMENT FUNCTIONS ───────────── # ============================================================================= def display_saved_files_in_sidebar(): all_files = sorted([f for f in glob.glob("*.md") if not f.lower().startswith('readme')], reverse=True) st.sidebar.markdown("## 📁 Files") for file in all_files: col1, col2, col3 = st.sidebar.columns([6, 2, 1]) with col1: st.markdown(f"📄 {file}") with col2: st.sidebar.download_button( label="⬇️", data=open(file, 'rb').read(), file_name=file ) with col3: if st.sidebar.button("🗑", key=f"delete_{file}"): os.remove(file) st.rerun() def display_file_viewer(file_path): content = load_file(file_path) if content: st.markdown("### 📄 File Viewer") st.markdown(f"**{file_path}**") file_stats = os.stat(file_path) st.markdown(f"**Mod:** {datetime.fromtimestamp(file_stats.st_mtime).strftime('%Y-%m-%d %H:%M:%S')} | **Size:** {file_stats.st_size} bytes") st.markdown("---") st.markdown(content) st.download_button("⬇️", data=content, file_name=os.path.basename(file_path), mime="text/markdown") def display_file_editor(file_path): if 'file_content' not in st.session_state: st.session_state.file_content = {} if file_path not in st.session_state.file_content: content = load_file(file_path) if content is not None: st.session_state.file_content[file_path] = content else: return st.markdown("### ✏️ Edit File") st.markdown(f"**Editing:** {file_path}") md_tab, code_tab = st.tabs(["Markdown", "Code"]) with md_tab: st.markdown(st.session_state.file_content[file_path]) with code_tab: new_content = st.text_area("Edit:", value=st.session_state.file_content[file_path], height=400, key=f"editor_{hash(file_path)}") col1, col2 = st.columns([1, 5]) with col1: if st.button("💾 Save"): if save_file_content(file_path, new_content): st.session_state.file_content[file_path] = new_content st.success("Saved! 🎉") time.sleep(1) st.rerun() with col2: st.download_button("⬇️", data=new_content, file_name=os.path.basename(file_path), mime="text/markdown") def save_file_content(file_path, content): try: with open(file_path, 'w', encoding='utf-8') as file: file.write(content) return True except Exception as e: st.error(f"Save error: {str(e)}") return False def update_file_management_section(): if 'file_view_mode' not in st.session_state: st.session_state.file_view_mode = None if 'current_file' not in st.session_state: st.session_state.current_file = None if 'file_content' not in st.session_state: st.session_state.file_content = {} all_files = sorted(glob.glob("*.md"), reverse=True) st.sidebar.title("📁 Files") if st.sidebar.button("🗑 Delete All"): for file in all_files: os.remove(file) st.session_state.file_content = {} st.session_state.current_file = None st.session_state.file_view_mode = None st.rerun() if st.sidebar.button("⬇️ Download All"): zip_file = create_zip_of_files(all_files) st.sidebar.markdown(get_download_link(zip_file), unsafe_allow_html=True) for file in all_files: col1, col2, col3, col4 = st.sidebar.columns([1, 3, 1, 1]) with col1: if st.button("🌐", key=f"view_{file}"): st.session_state.current_file = file st.session_state.file_view_mode = 'view' if file not in st.session_state.file_content: content = load_file(file) if content is not None: st.session_state.file_content[file] = content st.rerun() with col2: st.markdown(get_download_link(file), unsafe_allow_html=True) with col3: if st.button("📂", key=f"edit_{file}"): st.session_state.current_file = file st.session_state.file_view_mode = 'edit' if file not in st.session_state.file_content: content = load_file(file) if content is not None: st.session_state.file_content[file] = content st.rerun() with col4: if st.button("🗑", key=f"delete_{file}"): os.remove(file) if file in st.session_state.file_content: del st.session_state.file_content[file] if st.session_state.current_file == file: st.session_state.current_file = None st.session_state.file_view_mode = None st.rerun() st.sidebar.markdown("---") st.sidebar.title("External Help Links") for link in external_links: st.sidebar.markdown(f"{link['emoji']} [{link['title']}]({link['url']})", unsafe_allow_html=True) if st.session_state.current_file: if st.session_state.file_view_mode == 'view': display_file_viewer(st.session_state.current_file) elif st.session_state.file_view_mode == 'edit': display_file_editor(st.session_state.current_file) # ============================================================================= # ───────────── VIDEO & AUDIO UI FUNCTIONS ───────────── # ============================================================================= def validate_and_preprocess_image(file_data, target_size=(576, 1024)): try: st.write("Preprocessing image...") if isinstance(file_data, bytes): img = Image.open(io.BytesIO(file_data)) elif hasattr(file_data, 'read'): if hasattr(file_data, 'seek'): file_data.seek(0) img = Image.open(file_data) elif isinstance(file_data, Image.Image): img = file_data else: raise ValueError(f"Unsupported input: {type(file_data)}") if img.mode != 'RGB': img = img.convert('RGB') aspect_ratio = img.size[0] / img.size[1] if aspect_ratio > target_size[0] / target_size[1]: new_width = target_size[0] new_height = int(new_width / aspect_ratio) else: new_height = target_size[1] new_width = int(new_height * aspect_ratio) new_width = (new_width // 2) * 2 new_height = (new_height // 2) * 2 resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS) final_img = Image.new('RGB', target_size, (255, 255, 255)) paste_x = (target_size[0] - new_width) // 2 paste_y = (target_size[1] - new_height) // 2 final_img.paste(resized_img, (paste_x, paste_y)) return final_img except Exception as e: st.error(f"Image error: {str(e)}") return None def add_video_generation_ui(container): st.markdown("### 🎥 Video Gen") col1, col2 = st.columns([2, 1]) with col1: uploaded_file = st.file_uploader("Upload Image 🖼️", type=['png', 'jpg', 'jpeg']) with col2: st.markdown("#### Params") motion = st.slider("🌊 Motion", 1, 255, 127) fps = st.slider("🎬 FPS", 1, 30, 6) with st.expander("Advanced"): use_custom = st.checkbox("Custom Seed") seed = st.number_input("Seed", value=int(time.time() * 1000)) if use_custom else None if uploaded_file is not None: try: file_data = uploaded_file.read() preview1, preview2 = st.columns(2) with preview1: st.write("Original") st.image(Image.open(io.BytesIO(file_data)), use_column_width=True) with preview2: proc_img = validate_and_preprocess_image(io.BytesIO(file_data)) if proc_img: st.write("Processed") st.image(proc_img, use_column_width=True) else: st.error("Preprocess failed") return if st.button("🎥 Generate"): with st.spinner("Generating video..."): with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_file: proc_img.save(temp_file.name, format='PNG') try: client = Client("awacke1/stable-video-diffusion", hf_token=os.environ.get("HUGGINGFACE_TOKEN")) result = client.predict( image=temp_file.name, seed=seed if seed is not None else int(time.time() * 1000), randomize_seed=seed is None, motion_bucket_id=motion, fps_id=fps, api_name="/video" ) if result and isinstance(result, tuple) and len(result) >= 1: video_path = result[0].get('video') if isinstance(result[0], dict) else None if video_path and os.path.exists(video_path): video_filename = f"generated_video_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4" shutil.copy(video_path, video_filename) st.success(f"Video generated! 🎉") st.video(video_filename) if container: video_record = { "id": generate_unique_id(), "pk": generate_unique_id(), "type": "generated_video", "filename": video_filename, "seed": seed if seed is not None else "random", "motion": motion, "fps": fps, "timestamp": datetime.now().isoformat() } success, message = insert_record(container, video_record) if success: st.success("DB record saved!") else: st.error(f"DB error: {message}") else: st.error("Invalid result format") else: st.error("No result returned") except Exception as e: st.error(f"Video gen error: {str(e)}") finally: try: os.unlink(temp_file.name) st.write("Temp file removed") except Exception as e: st.warning(f"Cleanup error: {str(e)}") except Exception as e: st.error(f"Upload error: {str(e)}") # ============================================================================= # ───────────── AI SAMPLES SIDEBAR (Full Text & RAG/Vector Search Demos) ───────────── # ============================================================================= def display_ai_samples(): st.sidebar.markdown("### 🤖 AI Samples") st.sidebar.info("🚀 Get started with our AI samples! Time free access to get started today.") with st.sidebar.expander("🔍 Full Text Search Examples"): st.markdown(""" **FullTextContains Example** ```sql SELECT TOP 10 * FROM c WHERE FullTextContains(c.text, "bicycle")