Spaces:
Sleeping
Sleeping
import streamlit as st | |
from azure.cosmos import CosmosClient, exceptions | |
import os | |
import pandas as pd | |
import traceback | |
import shutil | |
from github import Github | |
from git import Repo | |
from datetime import datetime | |
import base64 | |
import json | |
import uuid # π² For generating unique IDs | |
from urllib.parse import quote # π For encoding URLs | |
from gradio_client import Client # π For connecting to Gradio apps | |
# π Welcome to our fun-filled Cosmos DB and GitHub Integration app! | |
st.set_page_config(layout="wide") | |
# π Cosmos DB configuration | |
ENDPOINT = "https://acae-afd.documents.azure.com:443/" | |
DATABASE_NAME = os.environ.get("COSMOS_DATABASE_NAME") | |
CONTAINER_NAME = os.environ.get("COSMOS_CONTAINER_NAME") | |
Key = os.environ.get("Key") # π Don't forget your key! | |
# π Your local app URL (Change this to your app's URL) | |
LOCAL_APP_URL = "http://localhost:8501" | |
# π€ OpenAI configuration | |
#openai.api_key = os.environ.get("OPENAI_API_KEY") | |
#MODEL = "gpt-3.5-turbo" # Replace with your desired model | |
# π GitHub configuration | |
def download_github_repo(url, local_path): | |
# π Let's download that GitHub repo! | |
if os.path.exists(local_path): | |
shutil.rmtree(local_path) | |
Repo.clone_from(url, local_path) | |
def create_zip_file(source_dir, output_filename): | |
# π¦ Zipping up files like a pro! | |
shutil.make_archive(output_filename, 'zip', source_dir) | |
def create_repo(g, repo_name): | |
# π οΈ Creating a new GitHub repo. Magic! | |
user = g.get_user() | |
return user.create_repo(repo_name) | |
def push_to_github(local_path, repo, github_token): | |
# π Pushing code to GitHub. Hold on tight! | |
repo_url = f"https://{github_token}@github.com/{repo.full_name}.git" | |
local_repo = Repo(local_path) | |
if 'origin' in [remote.name for remote in local_repo.remotes]: | |
origin = local_repo.remote('origin') | |
origin.set_url(repo_url) | |
else: | |
origin = local_repo.create_remote('origin', repo_url) | |
if not local_repo.heads: | |
local_repo.git.checkout('-b', 'main') | |
current_branch = 'main' | |
else: | |
current_branch = local_repo.active_branch.name | |
local_repo.git.add(A=True) | |
if local_repo.is_dirty(): | |
local_repo.git.commit('-m', 'Initial commit') | |
origin.push(refspec=f'{current_branch}:{current_branch}') | |
def get_base64_download_link(file_path, file_name): | |
# π§ββοΈ Generating a magical download link! | |
with open(file_path, "rb") as file: | |
contents = file.read() | |
base64_encoded = base64.b64encode(contents).decode() | |
return f'<a href="data:application/zip;base64,{base64_encoded}" download="{file_name}">β¬οΈ Download {file_name}</a>' | |
# π§ New functions for dynamic sidebar navigation | |
def get_databases(client): | |
# π Fetching list of databases. So many options! | |
return [db['id'] for db in client.list_databases()] | |
def get_containers(database): | |
# π Getting containers. Containers within containers! | |
return [container['id'] for container in database.list_containers()] | |
def get_documents(container, limit=None): | |
# π Retrieving documents. Shhh, don't tell anyone! | |
query = "SELECT * FROM c ORDER BY c._ts DESC" | |
items = list(container.query_items(query=query, enable_cross_partition_query=True, max_item_count=limit)) | |
return items | |
# π Cosmos DB functions | |
def insert_record(container, record): | |
try: | |
container.create_item(body=record) | |
return True, "Record inserted successfully! π" | |
except exceptions.CosmosHttpResponseError as e: | |
return False, f"HTTP error occurred: {str(e)} π¨" | |
except Exception as e: | |
return False, f"An unexpected error occurred: {str(e)} π±" | |
def update_record(container, updated_record): | |
try: | |
container.upsert_item(body=updated_record) | |
return True, f"Record with id {updated_record['id']} successfully updated. π οΈ" | |
except exceptions.CosmosHttpResponseError as e: | |
return False, f"HTTP error occurred: {str(e)} π¨" | |
except Exception as e: | |
return False, f"An unexpected error occurred: {traceback.format_exc()} π±" | |
def delete_record(container, name, id): | |
try: | |
container.delete_item(item=id, partition_key=id) | |
return True, f"Successfully deleted record with name: {name} and id: {id} ποΈ" | |
except exceptions.CosmosResourceNotFoundError: | |
return False, f"Record with id {id} not found. It may have been already deleted. π΅οΈββοΈ" | |
except exceptions.CosmosHttpResponseError as e: | |
return False, f"HTTP error occurred: {str(e)} π¨" | |
except Exception as e: | |
return False, f"An unexpected error occurred: {traceback.format_exc()} π±" | |
# π² Function to generate a unique UUID | |
def generate_unique_id(): | |
# π§ββοΈ Generating a unique UUID! | |
return str(uuid.uuid4()) | |
# π¦ Function to archive current container | |
def archive_current_container(database_name, container_name, client): | |
try: | |
base_dir = "./cosmos_archive_current_container" | |
if os.path.exists(base_dir): | |
shutil.rmtree(base_dir) | |
os.makedirs(base_dir) | |
db_client = client.get_database_client(database_name) | |
container_client = db_client.get_container_client(container_name) | |
items = list(container_client.read_all_items()) | |
container_dir = os.path.join(base_dir, container_name) | |
os.makedirs(container_dir) | |
for item in items: | |
item_id = item.get('id', f"unknown_{datetime.now().strftime('%Y%m%d%H%M%S')}") | |
with open(os.path.join(container_dir, f"{item_id}.json"), 'w') as f: | |
json.dump(item, f, indent=2) | |
archive_name = f"{container_name}_archive_{datetime.now().strftime('%Y%m%d%H%M%S')}" | |
shutil.make_archive(archive_name, 'zip', base_dir) | |
return get_base64_download_link(f"{archive_name}.zip", f"{archive_name}.zip") | |
except Exception as e: | |
return f"An error occurred while archiving data: {str(e)} π’" | |
# π Search Glossary function | |
def search_glossary(query): | |
# π΅οΈββοΈ Searching the glossary for: query | |
all_results = "" | |
st.markdown(f"- {query}") | |
# π Run 1 - ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM | |
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") | |
response2 = client.predict( | |
query, # str in 'parameter_13' Textbox component | |
"google/gemma-7b-it", # LLM Model Dropdown component | |
True, # Stream output Checkbox component | |
api_name="/ask_llm" | |
) | |
st.write('π Run of Multi-Agent System Paper Summary Spec is Complete') | |
st.markdown(response2) | |
# ArXiv searcher ~-<>-~ Paper References - Update with RAG | |
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") | |
response1 = client.predict( | |
query, | |
10, | |
"Semantic Search - up to 10 Mar 2024", # Search Source Dropdown component | |
"mistralai/Mixtral-8x7B-Instruct-v0.1", # LLM Model Dropdown component | |
api_name="/update_with_rag_md" | |
) | |
st.write('π Run of Multi-Agent System Paper References is Complete') | |
responseall = response2 + response1[0] + response1[1] | |
st.markdown(responseall) | |
return responseall | |
# π Function to process text input | |
def process_text(text_input): | |
if text_input: | |
if 'messages' not in st.session_state: | |
st.session_state.messages = [] | |
st.session_state.messages.append({"role": "user", "content": text_input}) | |
with st.chat_message("user"): | |
st.markdown(text_input) | |
with st.chat_message("assistant"): | |
completion = openai.ChatCompletion.create( | |
model=MODEL, | |
messages=[ | |
{"role": m["role"], "content": m["content"]} | |
for m in st.session_state.messages | |
], | |
stream=False | |
) | |
return_text = completion.choices[0].message.content | |
st.write("Assistant: " + return_text) | |
filename = generate_filename(text_input, "md") | |
create_and_save_file(return_text, file_type="md", prompt=text_input, is_image=False, should_save=True) | |
st.session_state.messages.append({"role": "assistant", "content": return_text}) | |
# π Function to generate a filename | |
def generate_filename(text, file_type): | |
# π Generate a filename based on the text input | |
safe_text = "".join(c if c.isalnum() or c in (' ', '.', '_') else '_' for c in text) | |
safe_text = "_".join(safe_text.strip().split()) | |
filename = f"{safe_text}.{file_type}" | |
return filename | |
# π·οΈ Function to extract markdown title | |
def extract_markdown_title(content): | |
# π Extract the first markdown heading as the title | |
lines = content.splitlines() | |
for line in lines: | |
if line.startswith('#'): | |
return line.lstrip('#').strip() | |
return None | |
# πΎ Function to create and save a file | |
def create_and_save_file(content, file_type="md", prompt=None, is_image=False, should_save=True): | |
""" | |
Combines file name generation and file creation into one function. | |
If the file is a markdown file, extracts the title from the content (if available) and uses it for the filename. | |
""" | |
if not should_save: | |
return None | |
# Step 1: Generate filename based on the prompt or content | |
filename = generate_filename(prompt if prompt else content, file_type) | |
# Step 2: If it's a markdown file, check if it has a title (e.g., # Heading in markdown) | |
if file_type == "md": | |
title_from_content = extract_markdown_title(content) | |
if title_from_content: | |
filename = generate_filename(title_from_content, file_type) | |
# Step 3: Save the file | |
with open(filename, "w", encoding="utf-8") as f: | |
if is_image: | |
f.write(content) | |
else: | |
f.write(prompt + "\n\n" + content) | |
return filename | |
# π Let's modify the main app to be more fun! | |
def main(): | |
st.title("πGitπCosmosπ« - Azure Cosmos DB and Github Agent") | |
# π¦ Initialize session state | |
if 'logged_in' not in st.session_state: | |
st.session_state.logged_in = False | |
if 'selected_records' not in st.session_state: | |
st.session_state.selected_records = [] | |
if 'client' not in st.session_state: | |
st.session_state.client = None | |
if 'selected_database' not in st.session_state: | |
st.session_state.selected_database = None | |
if 'selected_container' not in st.session_state: | |
st.session_state.selected_container = None | |
if 'selected_document_id' not in st.session_state: | |
st.session_state.selected_document_id = None | |
if 'current_index' not in st.session_state: | |
st.session_state.current_index = 0 | |
if 'cloned_doc' not in st.session_state: | |
st.session_state.cloned_doc = None | |
# βοΈ q= Run ArXiv search from query parameters | |
try: | |
query_params = st.query_params | |
query = (query_params.get('q') or query_params.get('query') or [''])[0] | |
if query: | |
# π΅οΈββοΈ We have a query! Let's process it! | |
process_text(query) | |
st.stop() # Stop further execution | |
except Exception as e: | |
st.markdown(' ') | |
# π Automatic Login | |
if Key: | |
st.session_state.primary_key = Key | |
st.session_state.logged_in = True | |
else: | |
st.error("Cosmos DB Key is not set in environment variables. πβ") | |
return # Can't proceed without a key | |
if st.session_state.logged_in: | |
# π Initialize Cosmos DB client | |
try: | |
if st.session_state.client is None: | |
st.session_state.client = CosmosClient(ENDPOINT, credential=st.session_state.primary_key) | |
# ποΈ Sidebar for database, container, and document selection | |
st.sidebar.title("πGitπCosmosπ«ποΈNavigator") | |
databases = get_databases(st.session_state.client) | |
selected_db = st.sidebar.selectbox("ποΈ Select Database", databases) | |
if selected_db != st.session_state.selected_database: | |
st.session_state.selected_database = selected_db | |
st.session_state.selected_container = None | |
st.session_state.selected_document_id = None | |
st.session_state.current_index = 0 | |
st.rerun() | |
if st.session_state.selected_database: | |
database = st.session_state.client.get_database_client(st.session_state.selected_database) | |
containers = get_containers(database) | |
selected_container = st.sidebar.selectbox("π Select Container", containers) | |
if selected_container != st.session_state.selected_container: | |
st.session_state.selected_container = selected_container | |
st.session_state.selected_document_id = None | |
st.session_state.current_index = 0 | |
st.rerun() | |
if st.session_state.selected_container: | |
container = database.get_container_client(st.session_state.selected_container) | |
# π¦ Add Export button | |
if st.button("π¦ Export Container Data"): | |
download_link = archive_current_container(st.session_state.selected_database, st.session_state.selected_container, st.session_state.client) | |
if download_link.startswith('<a'): | |
st.markdown(download_link, unsafe_allow_html=True) | |
else: | |
st.error(download_link) | |
# Fetch documents | |
documents = get_documents(container) | |
total_docs = len(documents) | |
if total_docs > 5: | |
documents_to_display = documents[:5] | |
st.info("Showing top 5 most recent documents.") | |
else: | |
documents_to_display = documents | |
st.info(f"Showing all {len(documents_to_display)} documents.") | |
if documents_to_display: | |
# π¨ Add Viewer/Editor selection | |
view_options = ['Show as Markdown', 'Show as Code Editor', 'Show as Edit and Save', 'Clone Document', 'New Record'] | |
selected_view = st.selectbox("Select Viewer/Editor", view_options, index=2) | |
if selected_view == 'Show as Markdown': | |
# ποΈ Show each record as Markdown with navigation | |
total_docs = len(documents) | |
doc = documents[st.session_state.current_index] | |
st.markdown(f"#### Document ID: {doc.get('id', '')}") | |
# π΅οΈββοΈ Let's extract values from the JSON that have at least one space | |
values_with_space = [] | |
def extract_values(obj): | |
if isinstance(obj, dict): | |
for k, v in obj.items(): | |
extract_values(v) | |
elif isinstance(obj, list): | |
for item in obj: | |
extract_values(item) | |
elif isinstance(obj, str): | |
if ' ' in obj: | |
values_with_space.append(obj) | |
extract_values(doc) | |
# π Let's create a list of links for these values | |
search_urls = { | |
"ππArXiv": lambda k: f"{LOCAL_APP_URL}/?q={quote(k)}", | |
"πAnalyst": lambda k: f"{LOCAL_APP_URL}/?q={quote(k)}-{quote('PromptPrefix')}", | |
"πPyCoder": lambda k: f"{LOCAL_APP_URL}/?q={quote(k)}-{quote('PromptPrefix2')}", | |
"π¬JSCoder": lambda k: f"{LOCAL_APP_URL}/?q={quote(k)}-{quote('PromptPrefix3')}", | |
"π ": lambda k: f"{LOCAL_APP_URL}/?q={quote(k)}", | |
"π": lambda k: f"https://en.wikipedia.org/wiki/{quote(k)}", | |
"π": lambda k: f"https://www.google.com/search?q={quote(k)}", | |
"βΆοΈ": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}", | |
"π": lambda k: f"https://www.bing.com/search?q={quote(k)}", | |
"π₯": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}", | |
"π¦": lambda k: f"https://twitter.com/search?q={quote(k)}", | |
} | |
st.markdown("#### π Links for Extracted Texts") | |
for term in values_with_space: | |
links_md = ' '.join([f"[{emoji}]({url(term)})" for emoji, url in search_urls.items()]) | |
st.markdown(f"**{term}** <small>{links_md}</small>", unsafe_allow_html=True) | |
# Show the document content as markdown | |
content = json.dumps(doc, indent=2) | |
st.markdown(f"```json\n{content}\n```") | |
# Navigation buttons | |
col_prev, col_next = st.columns([1, 1]) | |
with col_prev: | |
if st.button("β¬ οΈ Previous", key='prev_markdown'): | |
if st.session_state.current_index > 0: | |
st.session_state.current_index -= 1 | |
st.rerun() | |
with col_next: | |
if st.button("β‘οΈ Next", key='next_markdown'): | |
if st.session_state.current_index < total_docs - 1: | |
st.session_state.current_index += 1 | |
st.rerun() | |
elif selected_view == 'Show as Code Editor': | |
# π» Show each record in a code editor with navigation | |
total_docs = len(documents) | |
doc = documents[st.session_state.current_index] | |
st.markdown(f"#### Document ID: {doc.get('id', '')}") | |
doc_str = st.text_area("Edit Document", value=json.dumps(doc, indent=2), height=300, key=f'code_editor_{st.session_state.current_index}') | |
col_prev, col_next = st.columns([1, 1]) | |
with col_prev: | |
if st.button("β¬ οΈ Previous", key='prev_code'): | |
if st.session_state.current_index > 0: | |
st.session_state.current_index -= 1 | |
st.rerun() | |
with col_next: | |
if st.button("β‘οΈ Next", key='next_code'): | |
if st.session_state.current_index < total_docs - 1: | |
st.session_state.current_index += 1 | |
st.rerun() | |
if st.button("πΎ Save Changes", key=f'save_button_{st.session_state.current_index}'): | |
try: | |
updated_doc = json.loads(doc_str) | |
success, message = update_record(container, updated_doc) | |
if success: | |
st.success(f"Document {updated_doc['id']} saved successfully.") | |
st.session_state.selected_document_id = updated_doc['id'] | |
st.rerun() | |
else: | |
st.error(message) | |
except json.JSONDecodeError as e: | |
st.error(f"Invalid JSON: {str(e)} π«") | |
elif selected_view == 'Show as Edit and Save': | |
# βοΈ Show as Edit and Save in columns | |
st.markdown("#### Edit the document fields below:") | |
# Create columns for each document | |
num_cols = len(documents_to_display) | |
cols = st.columns(num_cols) | |
for idx, (col, doc) in enumerate(zip(cols, documents_to_display)): | |
with col: | |
st.markdown(f"##### Document ID: {doc.get('id', '')}") | |
editable_id = st.text_input("ID", value=doc.get('id', ''), key=f'edit_id_{idx}') | |
# Remove 'id' from the document for editing other fields | |
editable_doc = doc.copy() | |
editable_doc.pop('id', None) | |
doc_str = st.text_area("Document Content (in JSON format)", value=json.dumps(editable_doc, indent=2), height=300, key=f'doc_str_{idx}') | |
if st.button("πΎ Save Changes", key=f'save_button_{idx}'): | |
try: | |
updated_doc = json.loads(doc_str) | |
updated_doc['id'] = editable_id # Include the possibly edited ID | |
success, message = update_record(container, updated_doc) | |
if success: | |
st.success(f"Document {updated_doc['id']} saved successfully.") | |
st.session_state.selected_document_id = updated_doc['id'] | |
st.rerun() | |
else: | |
st.error(message) | |
except json.JSONDecodeError as e: | |
st.error(f"Invalid JSON: {str(e)} π«") | |
elif selected_view == 'Clone Document': | |
# 𧬠Clone Document per record | |
st.markdown("#### Clone a document:") | |
for idx, doc in enumerate(documents_to_display): | |
st.markdown(f"##### Document ID: {doc.get('id', '')}") | |
if st.button("π Clone Document", key=f'clone_button_{idx}'): | |
cloned_doc = doc.copy() | |
# Generate a unique ID | |
cloned_doc['id'] = generate_unique_id() | |
st.session_state.cloned_doc = cloned_doc | |
st.session_state.cloned_doc_str = json.dumps(cloned_doc, indent=2) | |
st.session_state.clone_mode = True | |
st.rerun() | |
if st.session_state.get('clone_mode', False): | |
st.markdown("#### Edit Cloned Document:") | |
cloned_doc_str = st.text_area("Cloned Document Content (in JSON format)", value=st.session_state.cloned_doc_str, height=300) | |
if st.button("πΎ Save Cloned Document"): | |
try: | |
new_doc = json.loads(cloned_doc_str) | |
success, message = insert_record(container, new_doc) | |
if success: | |
st.success(f"Cloned document saved with id: {new_doc['id']} π") | |
st.session_state.selected_document_id = new_doc['id'] | |
st.session_state.clone_mode = False | |
st.session_state.cloned_doc = None | |
st.session_state.cloned_doc_str = '' | |
st.rerun() | |
else: | |
st.error(message) | |
except json.JSONDecodeError as e: | |
st.error(f"Invalid JSON: {str(e)} π«") | |
elif selected_view == 'New Record': | |
# π New Record | |
st.markdown("#### Create a new document:") | |
new_id = st.text_input("ID", value=generate_unique_id(), key='new_id') | |
new_doc_str = st.text_area("Document Content (in JSON format)", value='{}', height=300) | |
if st.button("β Create New Document"): | |
try: | |
new_doc = json.loads(new_doc_str) | |
new_doc['id'] = new_id # Use the provided ID | |
success, message = insert_record(container, new_doc) | |
if success: | |
st.success(f"New document created with id: {new_doc['id']} π") | |
st.session_state.selected_document_id = new_doc['id'] | |
# Switch to 'Show as Edit and Save' mode | |
st.rerun() | |
else: | |
st.error(message) | |
except json.JSONDecodeError as e: | |
st.error(f"Invalid JSON: {str(e)} π«") | |
else: | |
st.sidebar.info("No documents found in this container. π") | |
# π Main content area | |
st.subheader(f"π Container: {st.session_state.selected_container}") | |
if st.session_state.selected_container: | |
if documents_to_display: | |
df = pd.DataFrame(documents_to_display) | |
st.dataframe(df) | |
else: | |
st.info("No documents to display. π§") | |
# π GitHub section | |
st.subheader("π GitHub Operations") | |
github_token = os.environ.get("GITHUB") # Read GitHub token from environment variable | |
source_repo = st.text_input("Source GitHub Repository URL", value="https://github.com/AaronCWacker/AIExamples-8-24-Streamlit") | |
new_repo_name = st.text_input("New Repository Name (for cloning)", value=f"AIExample-Clone-{datetime.now().strftime('%Y%m%d_%H%M%S')}") | |
col1, col2 = st.columns(2) | |
with col1: | |
if st.button("π₯ Clone Repository"): | |
if github_token and source_repo: | |
try: | |
local_path = f"./temp_repo_{datetime.now().strftime('%Y%m%d%H%M%S')}" | |
download_github_repo(source_repo, local_path) | |
zip_filename = f"{new_repo_name}.zip" | |
create_zip_file(local_path, zip_filename[:-4]) | |
st.markdown(get_base64_download_link(zip_filename, zip_filename), unsafe_allow_html=True) | |
st.success("Repository cloned successfully! π") | |
except Exception as e: | |
st.error(f"An error occurred: {str(e)} π’") | |
finally: | |
if os.path.exists(local_path): | |
shutil.rmtree(local_path) | |
if os.path.exists(zip_filename): | |
os.remove(zip_filename) | |
else: | |
st.error("Please ensure GitHub token is set in environment variables and source repository URL is provided. πβ") | |
with col2: | |
if st.button("π€ Push to New Repository"): | |
if github_token and source_repo: | |
try: | |
g = Github(github_token) | |
new_repo = create_repo(g, new_repo_name) | |
local_path = f"./temp_repo_{datetime.now().strftime('%Y%m%d%H%M%S')}" | |
download_github_repo(source_repo, local_path) | |
push_to_github(local_path, new_repo, github_token) | |
st.success(f"Repository pushed successfully to {new_repo.html_url} π") | |
except Exception as e: | |
st.error(f"An error occurred: {str(e)} π’") | |
finally: | |
if os.path.exists(local_path): | |
shutil.rmtree(local_path) | |
else: | |
st.error("Please ensure GitHub token is set in environment variables and source repository URL is provided. πβ") | |
except exceptions.CosmosHttpResponseError as e: | |
st.error(f"Failed to connect to Cosmos DB. HTTP error: {str(e)} π¨") | |
except Exception as e: | |
st.error(f"An unexpected error occurred: {str(e)} π±") | |
# πͺ Logout button | |
if st.session_state.logged_in and st.sidebar.button("πͺ Logout"): | |
st.session_state.logged_in = False | |
st.session_state.selected_records.clear() | |
st.session_state.client = None | |
st.session_state.selected_database = None | |
st.session_state.selected_container = None | |
st.session_state.selected_document_id = None | |
st.session_state.current_index = 0 | |
st.rerun() | |
if __name__ == "__main__": | |
main() | |