DeepResearchEvaluator

Running on CPU Upgrade

App Files Files Community

awacke1 commited on Jan 19

Commit

a484f78

verified ·

1 Parent(s): 2a5bffd

Create backup22-fulltoobigimho.app.py

Browse files

Files changed (1) hide show

backup22-fulltoobigimho.app.py +1869 -0

backup22-fulltoobigimho.app.py ADDED Viewed

	@@ -0,0 +1,1869 @@

+import streamlit as st
+import anthropic
+import openai
+import base64
+import cv2
+import glob
+import json
+import math
+import os
+import pytz
+import random
+import re
+import requests
+import textract
+import time
+import zipfile
+import plotly.graph_objects as go
+import streamlit.components.v1 as components
+from datetime import datetime
+from audio_recorder_streamlit import audio_recorder
+from bs4 import BeautifulSoup
+from collections import defaultdict, deque, Counter
+from dotenv import load_dotenv
+from gradio_client import Client
+from huggingface_hub import InferenceClient
+from io import BytesIO
+from PIL import Image
+from PyPDF2 import PdfReader
+from urllib.parse import quote
+from xml.etree import ElementTree as ET
+from openai import OpenAI
+import extra_streamlit_components as stx
+from streamlit.runtime.scriptrunner import get_script_run_ctx
+import asyncio
+import edge_tts
+from streamlit_marquee import streamlit_marquee
+from concurrent.futures import ThreadPoolExecutor
+from functools import partial
+from typing import Dict, List, Optional, Tuple, Union
+# ─────────────────────────────────────────────────────────
+# 1. CORE CONFIGURATION & SETUP
+# ─────────────────────────────────────────────────────────
+st.set_page_config(
+    page_title="🚲TalkingAIResearcher🏆",
+    page_icon="🚲🏆",
+    layout="wide",
+    initial_sidebar_state="auto",
+    menu_items={
+        'Get Help': 'https://huggingface.co/awacke1',
+        'Report a bug': 'https://huggingface.co/spaces/awacke1',
+        'About': "🚲TalkingAIResearcher🏆"
+    }
+)
+load_dotenv()
+# Available English voices for Edge TTS
+EDGE_TTS_VOICES = [
+    "en-US-AriaNeural",
+    "en-US-GuyNeural",
+    "en-US-JennyNeural",
+    "en-GB-SoniaNeural",
+    "en-GB-RyanNeural",
+    "en-AU-NatashaNeural",
+    "en-AU-WilliamNeural",
+    "en-CA-ClaraNeural",
+    "en-CA-LiamNeural"
+]
+# Session state initialization with default values
+DEFAULT_SESSION_STATE = {
+    'marquee_settings': {
+        "background": "#1E1E1E",
+        "color": "#FFFFFF",
+        "font-size": "14px",
+        "animationDuration": "20s",
+        "width": "100%",
+        "lineHeight": "35px"
+    },
+    'tts_voice': EDGE_TTS_VOICES[0],
+    'audio_format': 'mp3',
+    'transcript_history': [],
+    'chat_history': [],
+    'openai_model': "gpt-4o-2024-05-13",
+    'messages': [],
+    'last_voice_input': "",
+    'editing_file': None,
+    'edit_new_name': "",
+    'edit_new_content': "",
+    'viewing_prefix': None,
+    'should_rerun': False,
+    'old_val': None,
+    'last_query': "",
+    'marquee_content': "🚀 Welcome to TalkingAIResearcher | 🤖 Your Research Assistant",
+    'enable_audio': False,
+    'enable_download': False,
+    'enable_claude': True,
+    'audio_cache': {},
+    'paper_cache': {},
+    'download_link_cache': {},
+    'performance_metrics': defaultdict(list),
+    'operation_timings': defaultdict(float)
+}
+# Initialize session state
+for key, value in DEFAULT_SESSION_STATE.items():
+    if key not in st.session_state:
+        st.session_state[key] = value
+# API Keys and Configuration
+openai_api_key = os.getenv('OPENAI_API_KEY', "")
+anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
+xai_key = os.getenv('xai', "")
+if 'OPENAI_API_KEY' in st.secrets:
+    openai_api_key = st.secrets['OPENAI_API_KEY']
+if 'ANTHROPIC_API_KEY' in st.secrets:
+    anthropic_key = st.secrets["ANTHROPIC_API_KEY"]
+openai.api_key = openai_api_key
+openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID'))
+HF_KEY = os.getenv('HF_KEY')
+API_URL = os.getenv('API_URL')
+# File type emojis for display
+FILE_EMOJIS = {
+    "md": "📝",
+    "mp3": "🎵",
+    "wav": "🔊",
+    "pdf": "📄",
+    "txt": "📋",
+    "json": "📊",
+    "csv": "📈"
+}
+# ─────────────────────────────────────────────────────────
+# 2. PERFORMANCE MONITORING & TIMING
+# ─────────────────────────────────────────────────────────
+class PerformanceTimer:
+    """Context manager for timing operations with automatic logging."""
+    def __init__(self, operation_name: str):
+        self.operation_name = operation_name
+        self.start_time = None
+    def __enter__(self):
+        self.start_time = time.time()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if not exc_type:  # Only log if no exception occurred
+            duration = time.time() - self.start_time
+            st.session_state['operation_timings'][self.operation_name] = duration
+            st.session_state['performance_metrics'][self.operation_name].append(duration)
+def log_performance_metrics():
+    """Display performance metrics in the sidebar."""
+    st.sidebar.markdown("### ⏱️ Performance Metrics")
+    metrics = st.session_state['operation_timings']
+    if metrics:
+        total_time = sum(metrics.values())
+        st.sidebar.write(f"**Total Processing Time:** {total_time:.2f}s")
+        # Create timing breakdown
+        for operation, duration in metrics.items():
+            percentage = (duration / total_time) * 100
+            st.sidebar.write(f"**{operation}:** {duration:.2f}s ({percentage:.1f}%)")
+        # Show timing history chart
+        if st.session_state['performance_metrics']:
+            history_data = []
+            for op, times in st.session_state['performance_metrics'].items():
+                if times:  # Only show if we have timing data
+                    avg_time = sum(times) / len(times)
+                    history_data.append({"Operation": op, "Avg Time (s)": avg_time})
+            if history_data:  # Create chart if we have data
+                st.sidebar.markdown("### 📈 Timing History")
+                chart_data = pd.DataFrame(history_data)
+                st.sidebar.bar_chart(chart_data.set_index("Operation"))
+# ─────────────────────────────────────────────────────────
+# 3. OPTIMIZED AUDIO GENERATION
+# ─────────────────────────────────────────────────────────
+def clean_for_speech(text: str) -> str:
+    """Clean up text for TTS output with enhanced cleaning."""
+    with PerformanceTimer("text_cleaning"):
+        # Remove markdown formatting
+        text = re.sub(r'#+ ', '', text)  # Remove headers
+        text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)  # Clean links
+        text = re.sub(r'[*_~`]', '', text)  # Remove emphasis markers
+        # Remove code blocks
+        text = re.sub(r'```[\s\S]*?```', '', text)
+        text = re.sub(r'`[^`]*`', '', text)
+        # Clean up whitespace
+        text = re.sub(r'\s+', ' ', text)
+        text = text.replace("\n", " ")
+        text = text.replace("</s>", " ")
+        # Remove URLs
+        text = re.sub(r'https?://\S+', '', text)
+        text = re.sub(r'\(https?://[^\)]+\)', '', text)
+        # Final cleanup
+        text = text.strip()
+        return text
+async def async_edge_tts_generate(
+    text: str,
+    voice: str,
+    rate: int = 0,
+    pitch: int = 0,
+    file_format: str = "mp3"
+) -> Tuple[Optional[str], float]:
+    """Asynchronous TTS generation with performance tracking and caching."""
+    with PerformanceTimer("tts_generation") as timer:
+        # Clean and validate text
+        text = clean_for_speech(text)
+        if not text.strip():
+            return None, 0
+        # Check cache
+        cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
+        if cache_key in st.session_state['audio_cache']:
+            return st.session_state['audio_cache'][cache_key], 0
+        try:
+            # Generate audio
+            rate_str = f"{rate:+d}%"
+            pitch_str = f"{pitch:+d}Hz"
+            communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
+            # Generate unique filename
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
+            # Save audio file
+            await communicate.save(filename)
+            # Cache result
+            st.session_state['audio_cache'][cache_key] = filename
+            return filename, time.time() - timer.start_time
+        except Exception as e:
+            st.error(f"Error generating audio: {str(e)}")
+            return None, 0
+async def async_save_qa_with_audio(
+    question: str,
+    answer: str,
+    voice: Optional[str] = None
+) -> Tuple[str, Optional[str], float, float]:
+    """Asynchronously save Q&A to markdown and generate audio with timing."""
+    voice = voice or st.session_state['tts_voice']
+    with PerformanceTimer("qa_save") as timer:
+        # Save markdown
+        md_start = time.time()
+        combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
+        md_file = create_file(question, answer, "md")
+        md_time = time.time() - md_start
+        # Generate audio if enabled
+        audio_file = None
+        audio_time = 0
+        if st.session_state['enable_audio']:
+            audio_text = f"{question}\n\nAnswer: {answer}"
+            audio_file, audio_time = await async_edge_tts_generate(
+                audio_text,
+                voice=voice,
+                file_format=st.session_state['audio_format']
+            )
+        return md_file, audio_file, md_time, audio_time
+def create_download_link_with_cache(
+    file_path: str,
+    file_type: str = "mp3"
+) -> str:
+    """Create download link with caching and error handling."""
+    with PerformanceTimer("download_link_generation"):
+        # Check cache first
+        cache_key = f"dl_{file_path}"
+        if cache_key in st.session_state['download_link_cache']:
+            return st.session_state['download_link_cache'][cache_key]
+        try:
+            with open(file_path, "rb") as f:
+                b64 = base64.b64encode(f.read()).decode()
+            # Generate appropriate link based on file type
+            filename = os.path.basename(file_path)
+            if file_type == "mp3":
+                link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">🎵 Download {filename}</a>'
+            elif file_type == "wav":
+                link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">🔊 Download {filename}</a>'
+            elif file_type == "md":
+                link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">📝 Download {filename}</a>'
+            else:
+                link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">⬇️ Download {filename}</a>'
+            # Cache and return
+            st.session_state['download_link_cache'][cache_key] = link
+            return link
+        except Exception as e:
+            st.error(f"Error creating download link: {str(e)}")
+            return ""
+# ---
+def display_voice_tab():
+    """Display voice input tab with TTS settings."""
+    st.subheader("🎤 Voice Input")
+    # Voice Settings Section
+    st.markdown("### 🎤 Voice Settings")
+    captionFemale='Top: 🌸 **Aria** – 🎶 **Jenny** – 🌺 **Sonia** – 🌌 **Natasha** – 🌷 **Clara**'
+    captionMale='Bottom: 🌟 **Guy** – 🛠️ **Ryan** – 🎻 **William** – 🌟 **Liam**'
+    st.sidebar.image('Group Picture - Voices.png', caption=captionFemale + ' - ' + captionMale)
+    st.sidebar.markdown("""
+    # 🎙️ Voice Character Agent Selector 🎭
+        1. Female:
+        - 🌸 **Aria** – Female: 🌟 The voice of elegance and creativity, perfect for soothing storytelling or inspiring ideas.
+        - 🎶 **Jenny** – Female: 💖 Sweet and friendly, she’s the go-to for warm, conversational tones.
+        - 🌺 **Sonia** – Female: 💃 Bold and confident, ideal for commanding attention and delivering with flair.
+        - 🌌 **Natasha** – Female: ✨ Enigmatic and sophisticated, Natasha is great for a touch of mystery and charm.
+        - 🌷 **Clara** – Female: 🎀 Cheerful and gentle, perfect for nurturing, empathetic conversations.
+        ---
+        2. Male:
+        - 🌟 **Guy** – Male: 🎩 Sophisticated and versatile, a natural fit for clear and authoritative delivery.
+        - 🛠️ **Ryan** – Male: 🤝 Down-to-earth and approachable, ideal for friendly and casual exchanges.
+        - 🎻 **William** – Male: 📚 Classic and refined, perfect for a scholarly or thoughtful tone.
+        - 🌟 **Liam** – Male: ⚡ Energetic and upbeat, great for dynamic, engaging interactions.
+    """)
+    selected_voice = st.selectbox(
+        "Select TTS Voice:",
+        options=EDGE_TTS_VOICES,
+        index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
+    )
+    # Audio Format Selection
+    st.markdown("### 🔊 Audio Format")
+    selected_format = st.radio(
+        "Choose Audio Format:",
+        options=["MP3", "WAV"],
+        index=0
+    )
+    # Update session state if settings change
+    if selected_voice != st.session_state['tts_voice']:
+        st.session_state['tts_voice'] = selected_voice
+        st.rerun()
+    if selected_format.lower() != st.session_state['audio_format']:
+        st.session_state['audio_format'] = selected_format.lower()
+        st.rerun()
+    # Text Input Area
+    user_text = st.text_area("💬 Message:", height=100)
+    user_text = user_text.strip().replace('\n', ' ')
+    # Send Button
+    if st.button("📨 Send"):
+        process_voice_input(user_text)
+    # Chat History
+    st.subheader("📜 Chat History")
+    for c in st.session_state.chat_history:
+        st.write("**You:**", c["user"])
+        st.write("**Response:**", c["claude"])
+def display_arxiv_tab():
+    """Display ArXiv search tab with options."""
+    st.subheader("🔍 Query ArXiv")
+    q = st.text_input("🔍 Query:", key="arxiv_query")
+    # Options Section
+    st.markdown("### 🎛 Options")
+    col1, col2 = st.columns(2)
+    with col1:
+        vocal_summary = st.checkbox("🎙 Short Audio", value=True,
+                                  key="option_vocal_summary")
+        extended_refs = st.checkbox("📜 Long Refs", value=False,
+                                  key="option_extended_refs")
+    with col2:
+        titles_summary = st.checkbox("🔖 Titles Only", value=True,
+                                   key="option_titles_summary")
+        full_audio = st.checkbox("📚 Full Audio", value=False,
+                               key="option_full_audio")
+    full_transcript = st.checkbox("🧾 Full Transcript", value=False,
+                                key="option_full_transcript")
+    if q and st.button("🔍 Run Search"):
+        st.session_state.last_query = q
+        result, timings = perform_ai_lookup(
+            q,
+            vocal_summary=vocal_summary,
+            extended_refs=extended_refs,
+            titles_summary=titles_summary,
+            full_audio=full_audio
+        )
+        if full_transcript:
+            create_file(q, result, "md")
+def display_media_tab():
+    """Display media gallery tab with audio, images, and video."""
+    st.header("📸 Media Gallery")
+    # Create tabs for different media types
+    tabs = st.tabs(["🎵 Audio", "🖼 Images", "🎥 Video"])
+    # Audio Files Tab
+    with tabs[0]:
+        st.subheader("🎵 Audio Files")
+        audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
+        if audio_files:
+            for audio_file in audio_files:
+                with st.expander(os.path.basename(audio_file)):
+                    st.audio(audio_file)
+                    ext = os.path.splitext(audio_file)[1].replace('.', '')
+                    dl_link = get_download_link(audio_file, file_type=ext)
+                    st.markdown(dl_link, unsafe_allow_html=True)
+        else:
+            st.write("No audio files found.")
+    # Images Tab
+    with tabs[1]:
+        st.subheader("🖼 Image Files")
+        image_files = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
+        if image_files:
+            cols = st.slider("Columns:", 1, 5, 3, key="cols_images")
+            image_cols = st.columns(cols)
+            for i, img_file in enumerate(image_files):
+                with image_cols[i % cols]:
+                    try:
+                        img = Image.open(img_file)
+                        st.image(img, use_column_width=True)
+                    except Exception as e:
+                        st.error(f"Error loading image {img_file}: {str(e)}")
+        else:
+            st.write("No images found.")
+    # Video Tab
+    with tabs[2]:
+        st.subheader("🎥 Video Files")
+        video_files = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
+        if video_files:
+            for video_file in video_files:
+                with st.expander(os.path.basename(video_file)):
+                    st.video(video_file)
+        else:
+            st.write("No videos found.")
+def display_editor_tab():
+    """Display text editor tab with file management."""
+    st.subheader("📝 Text Editor")
+    # File Management Section
+    st.markdown("### 📂 File Management")
+    # File Selection
+    md_files = glob.glob("*.md")
+    selected_file = st.selectbox(
+        "Select file to edit:",
+        ["New File"] + md_files,
+        key="file_selector"
+    )
+    # Edit Area
+    if selected_file == "New File":
+        new_filename = st.text_input("New filename (without extension):")
+        file_content = st.text_area("Content:", height=300)
+        if st.button("💾 Save File"):
+            if new_filename:
+                try:
+                    with open(f"{new_filename}.md", 'w', encoding='utf-8') as f:
+                        f.write(file_content)
+                    st.success(f"File {new_filename}.md saved successfully!")
+                    st.session_state.should_rerun = True
+                except Exception as e:
+                    st.error(f"Error saving file: {str(e)}")
+            else:
+                st.warning("Please enter a filename.")
+    else:
+        try:
+            # Load existing file content
+            with open(selected_file, 'r', encoding='utf-8') as f:
+                file_content = f.read()
+            # Edit existing file
+            edited_content = st.text_area(
+                "Edit content:",
+                value=file_content,
+                height=300
+            )
+            col1, col2 = st.columns(2)
+            with col1:
+                if st.button("💾 Save Changes"):
+                    try:
+                        with open(selected_file, 'w', encoding='utf-8') as f:
+                            f.write(edited_content)
+                        st.success("Changes saved successfully!")
+                    except Exception as e:
+                        st.error(f"Error saving changes: {str(e)}")
+            with col2:
+                if st.button("🗑 Delete File"):
+                    try:
+                        os.remove(selected_file)
+                        st.success(f"File {selected_file} deleted successfully!")
+                        st.session_state.should_rerun = True
+                    except Exception as e:
+                        st.error(f"Error deleting file: {str(e)}")
+        except Exception as e:
+            st.error(f"Error loading file {selected_file}: {str(e)}")
+def display_settings_tab():
+    """Display application settings tab."""
+    st.subheader("⚙️ Settings")
+    # General Settings
+    st.markdown("### 🔧 General Settings")
+    # Theme Selection
+    theme = st.selectbox(
+        "Color Theme:",
+        ["Dark", "Light", "Custom"],
+        index=0
+    )
+    if theme == "Custom":
+        st.color_picker("Primary Color:", "#1E1E1E")
+        st.color_picker("Secondary Color:", "#2D2D2D")
+    # Performance Settings
+    st.markdown("### ⚡ Performance Settings")
+    # Cache Settings
+    cache_size = st.slider(
+        "Maximum Cache Size (MB):",
+        0, 1000, 100
+    )
+    if st.button("Clear Cache"):
+        st.session_state['audio_cache'] = {}
+        st.session_state['paper_cache'] = {}
+        st.session_state['download_link_cache'] = {}
+        st.success("Cache cleared successfully!")
+    # API Settings
+    st.markdown("### 🔑 API Settings")
+    # Show/hide API keys
+    show_keys = st.checkbox("Show API Keys")
+    if show_keys:
+        st.text_input("OpenAI API Key:", value=openai_api_key)
+        st.text_input("Anthropic API Key:", value=anthropic_key)
+    # Save Settings
+    if st.button("💾 Save Settings"):
+        st.success("Settings saved successfully!")
+        st.session_state.should_rerun = True
+def get_download_link(file: str, file_type: str = "zip") -> str:
+    """
+    Convert a file to base64 and return an HTML link for download.
+    Supports multiple file types with appropriate MIME types.
+    """
+    try:
+        with open(file, "rb") as f:
+            b64 = base64.b64encode(f.read()).decode()
+        # Get filename for display
+        filename = os.path.basename(file)
+        # Define MIME types and emoji icons for different file types
+        mime_types = {
+            "zip": ("application/zip", "📂"),
+            "mp3": ("audio/mpeg", "🎵"),
+            "wav": ("audio/wav", "🔊"),
+            "md": ("text/markdown", "📝"),
+            "pdf": ("application/pdf", "📄"),
+            "txt": ("text/plain", "📋"),
+            "json": ("application/json", "📊"),
+            "csv": ("text/csv", "📈"),
+            "png": ("image/png", "🖼"),
+            "jpg": ("image/jpeg", "🖼"),
+            "jpeg": ("image/jpeg", "🖼")
+        }
+        # Get MIME type and emoji for file
+        mime_type, emoji = mime_types.get(
+            file_type.lower(),
+            ("application/octet-stream", "⬇️")
+        )
+        # Create download link with appropriate MIME type
+        link = f'<a href="data:{mime_type};base64,{b64}" download="{filename}">{emoji} Download {filename}</a>'
+        return link
+    except FileNotFoundError:
+        return f"<p style='color: red'>❌ File not found: {file}</p>"
+    except Exception as e:
+        return f"<p style='color: red'>❌ Error creating download link: {str(e)}</p>"
+def play_and_download_audio(file_path: str, file_type: str = "mp3"):
+    """
+    Display audio player and download link for audio file.
+    Includes error handling and file validation.
+    """
+    if not file_path:
+        st.warning("No audio file provided.")
+        return
+    if not os.path.exists(file_path):
+        st.error(f"Audio file not found: {file_path}")
+        return
+    try:
+        # Display audio player
+        st.audio(file_path)
+        # Create and display download link
+        dl_link = get_download_link(file_path, file_type=file_type)
+        st.markdown(dl_link, unsafe_allow_html=True)
+    except Exception as e:
+        st.error(f"Error playing audio: {str(e)}")
+def get_file_info(file_path: str) -> dict:
+    """
+    Get detailed information about a file.
+    Returns dictionary with size, modification time, and other metadata.
+    """
+    try:
+        stats = os.stat(file_path)
+        # Get basic file information
+        info = {
+            'name': os.path.basename(file_path),
+            'path': file_path,
+            'size': stats.st_size,
+            'modified': datetime.fromtimestamp(stats.st_mtime),
+            'created': datetime.fromtimestamp(stats.st_ctime),
+            'type': os.path.splitext(file_path)[1].lower().strip('.'),
+        }
+        # Add formatted size
+        if info['size'] < 1024:
+            info['size_fmt'] = f"{info['size']} B"
+        elif info['size'] < 1024 * 1024:
+            info['size_fmt'] = f"{info['size']/1024:.1f} KB"
+        else:
+            info['size_fmt'] = f"{info['size']/(1024*1024):.1f} MB"
+        # Add formatted dates
+        info['modified_fmt'] = info['modified'].strftime("%Y-%m-%d %H:%M:%S")
+        info['created_fmt'] = info['created'].strftime("%Y-%m-%d %H:%M:%S")
+        return info
+    except Exception as e:
+        st.error(f"Error getting file info: {str(e)}")
+        return None
+def sanitize_filename(filename: str) -> str:
+    """
+    Clean and sanitize a filename to ensure it's safe for filesystem.
+    Removes/replaces unsafe characters and enforces length limits.
+    """
+    # Remove or replace unsafe characters
+    filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
+    # Remove leading/trailing spaces and dots
+    filename = filename.strip('. ')
+    # Limit length (reserving space for extension)
+    max_length = 255
+    name, ext = os.path.splitext(filename)
+    if len(filename) > max_length:
+        return name[:(max_length-len(ext))] + ext
+    return filename
+def create_file_with_metadata(filename: str, content: str, metadata: dict = None):
+    """
+    Create a file with optional metadata header.
+    Useful for storing additional information with files.
+    """
+    try:
+        # Sanitize filename
+        safe_filename = sanitize_filename(filename)
+        # Ensure directory exists
+        os.makedirs(os.path.dirname(safe_filename) or '.', exist_ok=True)
+        # Prepare content with metadata
+        if metadata:
+            metadata_str = json.dumps(metadata, indent=2)
+            full_content = f"""---
+{metadata_str}
+---
+{content}"""
+        else:
+            full_content = content
+        # Write file
+        with open(safe_filename, 'w', encoding='utf-8') as f:
+            f.write(full_content)
+        return safe_filename
+    except Exception as e:
+        st.error(f"Error creating file: {str(e)}")
+        return None
+def read_file_with_metadata(filename: str) -> tuple:
+    """
+    Read a file and extract any metadata header.
+    Returns tuple of (content, metadata).
+    """
+    try:
+        with open(filename, 'r', encoding='utf-8') as f:
+            content = f.read()
+        # Check for metadata section
+        if content.startswith('---\n'):
+            # Find end of metadata section
+            end_meta = content.find('\n---\n', 4)
+            if end_meta != -1:
+                try:
+                    metadata = json.loads(content[4:end_meta])
+                    content = content[end_meta+5:]
+                    return content, metadata
+                except json.JSONDecodeError:
+                    pass
+        return content, None
+    except Exception as e:
+        st.error(f"Error reading file: {str(e)}")
+        return None, None
+def archive_files(file_paths: list, archive_name: str = None) -> str:
+    """
+    Create a zip archive containing the specified files.
+    Returns path to created archive.
+    """
+    try:
+        # Generate archive name if not provided
+        if not archive_name:
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            archive_name = f"archive_{timestamp}.zip"
+        # Create zip file
+        with zipfile.ZipFile(archive_name, 'w', zipfile.ZIP_DEFLATED) as zf:
+            for file_path in file_paths:
+                if os.path.exists(file_path):
+                    zf.write(file_path, os.path.basename(file_path))
+        return archive_name
+    except Exception as e:
+        st.error(f"Error creating archive: {str(e)}")
+        return None
+def list_files_by_type(directory: str = ".",
+                      extensions: list = None,
+                      recursive: bool = False) -> dict:
+    """
+    List files in directory filtered by extension.
+    Returns dict grouping files by type.
+    """
+    try:
+        if extensions is None:
+            extensions = ['md', 'mp3', 'wav', 'pdf', 'txt', 'json', 'csv']
+        files = {}
+        pattern = "**/*" if recursive else "*"
+        for ext in extensions:
+            glob_pattern = f"{pattern}.{ext}"
+            matches = glob.glob(os.path.join(directory, glob_pattern),
+                              recursive=recursive)
+            if matches:
+                files[ext] = matches
+        return files
+    except Exception as e:
+        st.error(f"Error listing files: {str(e)}")
+        return {}
+def get_central_time() -> datetime:
+    """Get current time in US Central timezone."""
+    central = pytz.timezone('US/Central')
+    return datetime.now(central)
+def format_timestamp_prefix() -> str:
+    """Generate timestamp prefix in format MM_dd_yy_hh_mm_AM/PM."""
+    ct = get_central_time()
+    return ct.strftime("%m_%d_%y_%I_%M_%p")
+def get_formatted_time(dt: datetime = None,
+                      timezone: str = 'US/Central',
+                      include_timezone: bool = True,
+                      include_seconds: bool = False) -> str:
+    """
+    Format a datetime object with specified options.
+    If no datetime is provided, uses current time.
+    """
+    if dt is None:
+        tz = pytz.timezone(timezone)
+        dt = datetime.now(tz)
+    elif dt.tzinfo is None:
+        tz = pytz.timezone(timezone)
+        dt = tz.localize(dt)
+    format_string = "%Y-%m-%d %I:%M"
+    if include_seconds:
+        format_string += ":%S"
+    format_string += " %p"
+    if include_timezone:
+        format_string += " %Z"
+    return dt.strftime(format_string)
+def parse_timestamp(timestamp_str: str,
+                   timezone: str = 'US/Central') -> Optional[datetime]:
+    """
+    Parse a timestamp string in various formats.
+    Returns timezone-aware datetime object.
+    """
+    try:
+        # Try different format patterns
+        patterns = [
+            "%m_%d_%y_%I_%M_%p",  # Standard app format
+            "%Y-%m-%d %I:%M %p",   # Common 12-hour format
+            "%Y-%m-%d %H:%M",      # 24-hour format
+            "%m/%d/%y %I:%M %p",   # US date format
+            "%d/%m/%y %I:%M %p"    # European date format
+        ]
+        dt = None
+        for pattern in patterns:
+            try:
+                dt = datetime.strptime(timestamp_str, pattern)
+                break
+            except ValueError:
+                continue
+        if dt is None:
+            raise ValueError(f"Could not parse timestamp: {timestamp_str}")
+        # Add timezone if not present
+        if dt.tzinfo is None:
+            tz = pytz.timezone(timezone)
+            dt = tz.localize(dt)
+        return dt
+    except Exception as e:
+        st.error(f"Error parsing timestamp: {str(e)}")
+        return None
+def get_time_ago(dt: datetime) -> str:
+    """
+    Convert datetime to human-readable "time ago" format.
+    E.g., "2 hours ago", "3 days ago", etc.
+    """
+    try:
+        now = datetime.now(dt.tzinfo)
+        diff = now - dt
+        seconds = diff.total_seconds()
+        if seconds < 60:
+            return "just now"
+        elif seconds < 3600:
+            minutes = int(seconds / 60)
+            return f"{minutes} minute{'s' if minutes != 1 else ''} ago"
+        elif seconds < 86400:
+            hours = int(seconds / 3600)
+            return f"{hours} hour{'s' if hours != 1 else ''} ago"
+        elif seconds < 604800:
+            days = int(seconds / 86400)
+            return f"{days} day{'s' if days != 1 else ''} ago"
+        elif seconds < 2592000:
+            weeks = int(seconds / 604800)
+            return f"{weeks} week{'s' if weeks != 1 else ''} ago"
+        elif seconds < 31536000:
+            months = int(seconds / 2592000)
+            return f"{months} month{'s' if months != 1 else ''} ago"
+        else:
+            years = int(seconds / 31536000)
+            return f"{years} year{'s' if years != 1 else ''} ago"
+    except Exception as e:
+        st.error(f"Error calculating time ago: {str(e)}")
+        return "unknown time ago"
+def format_duration(seconds: float) -> str:
+    """
+    Format a duration in seconds to human-readable string.
+    E.g., "2m 30s", "1h 15m", etc.
+    """
+    try:
+        if seconds < 0:
+            return "invalid duration"
+        # Handle special cases
+        if seconds < 1:
+            return f"{seconds * 1000:.0f}ms"
+        if seconds < 60:
+            return f"{seconds:.1f}s"
+        # Calculate hours, minutes, seconds
+        hours = int(seconds // 3600)
+        minutes = int((seconds % 3600) // 60)
+        secs = seconds % 60
+        # Build duration string
+        parts = []
+        if hours > 0:
+            parts.append(f"{hours}h")
+        if minutes > 0:
+            parts.append(f"{minutes}m")
+        if secs > 0 and hours == 0:  # Only show seconds if less than an hour
+            parts.append(f"{secs:.1f}s")
+        return " ".join(parts)
+    except Exception as e:
+        st.error(f"Error formatting duration: {str(e)}")
+        return "unknown duration"
+async def create_paper_audio_files(papers: List[Dict], input_question: str):
+    """Generate audio files for papers asynchronously with improved naming."""
+    with PerformanceTimer("paper_audio_generation"):
+        tasks = []
+        for paper in papers:
+            try:
+                # Prepare text for audio generation
+                audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
+                audio_text = clean_for_speech(audio_text)
+                # Create sanitized title for filename
+                safe_title = paper['title'].lower()
+                safe_title = re.sub(r'[^\w\s-]', '', safe_title)  # Remove special chars
+                safe_title = re.sub(r'\s+', '_', safe_title)      # Replace spaces with underscores
+                safe_title = safe_title[:100]                     # Limit length
+                # Generate timestamp
+                timestamp = format_timestamp_prefix()
+                # Create filename with timestamp and title
+                filename = f"{timestamp}_{safe_title}.{st.session_state['audio_format']}"
+                # Create task for audio generation
+                async def generate_audio(text, filename):
+                    rate_str = "0%"
+                    pitch_str = "0Hz"
+                    communicate = edge_tts.Communicate(text, st.session_state['tts_voice'])
+                    await communicate.save(filename)
+                    return filename
+                task = generate_audio(audio_text, filename)
+                tasks.append((paper, task, filename))
+            except Exception as e:
+                st.warning(f"Error preparing audio for paper {paper['title']}: {str(e)}")
+                continue
+        # Process all audio generation tasks concurrently
+        for paper, task, filename in tasks:
+            try:
+                audio_file = await task
+                if audio_file:
+                    paper['full_audio'] = audio_file
+                    if st.session_state['enable_download']:
+                        paper['download_base64'] = create_download_link_with_cache(
+                            audio_file,
+                            st.session_state['audio_format']
+                        )
+            except Exception as e:
+                st.warning(f"Error generating audio for paper {paper['title']}: {str(e)}")
+                paper['full_audio'] = None
+                paper['download_base64'] = ''
+# ─────────────────────────────────────────────────────────
+# 4. PAPER PROCESSING & DISPLAY
+# ─────────────────────────────────────────────────────────
+def parse_arxiv_refs(ref_text: str) -> List[Dict[str, str]]:
+    """Parse arxiv references with improved error handling."""
+    if not ref_text:
+        return []
+    with PerformanceTimer("parse_refs"):
+        results = []
+        current_paper = {}
+        lines = ref_text.split('\n')
+        for i, line in enumerate(lines):
+            try:
+                if line.count('|') == 2:
+                    # Found a new paper line
+                    if current_paper:
+                        results.append(current_paper)
+                        if len(results) >= 20:  # Limit to 20 papers
+                            break
+                    # Parse header parts
+                    header_parts = line.strip('* ').split('|')
+                    date = header_parts[0].strip()
+                    title = header_parts[1].strip()
+                    url_match = re.search(r'(https://arxiv.org/\S+)', line)
+                    url = url_match.group(1) if url_match else f"paper_{len(results)}"
+                    current_paper = {
+                        'date': date,
+                        'title': title,
+                        'url': url,
+                        'authors': '',
+                        'summary': '',
+                        'full_audio': None,
+                        'download_base64': '',
+                    }
+                elif current_paper:
+                    # Add content to current paper
+                    line = line.strip('* ')
+                    if not current_paper['authors']:
+                        current_paper['authors'] = line
+                    else:
+                        if current_paper['summary']:
+                            current_paper['summary'] += ' ' + line
+                        else:
+                            current_paper['summary'] = line
+            except Exception as e:
+                st.warning(f"Error parsing line {i}: {str(e)}")
+                continue
+        # Add final paper if exists
+        if current_paper:
+            results.append(current_paper)
+        return results[:20]  # Ensure we don't exceed 20 papers
+async def create_paper_audio_files(papers: List[Dict], input_question: str):
+    """Generate audio files for papers asynchronously with progress tracking."""
+    with PerformanceTimer("paper_audio_generation"):
+        tasks = []
+        for paper in papers:
+            try:
+                # Prepare text for audio generation
+                audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
+                audio_text = clean_for_speech(audio_text)
+                # Create task for audio generation
+                task = async_edge_tts_generate(
+                    audio_text,
+                    voice=st.session_state['tts_voice'],
+                    file_format=st.session_state['audio_format']
+                )
+                tasks.append((paper, task))
+            except Exception as e:
+                st.warning(f"Error preparing audio for paper {paper['title']}: {str(e)}")
+                continue
+        # Process all audio generation tasks concurrently
+        for paper, task in tasks:
+            try:
+                audio_file, gen_time = await task
+                if audio_file:
+                    paper['full_audio'] = audio_file
+                    if st.session_state['enable_download']:
+                        paper['download_base64'] = create_download_link_with_cache(
+                            audio_file,
+                            st.session_state['audio_format']
+                        )
+            except Exception as e:
+                st.warning(f"Error generating audio for paper {paper['title']}: {str(e)}")
+                paper['full_audio'] = None
+                paper['download_base64'] = ''
+def initialize_marquee_settings():
+    """Initialize default marquee settings if not present in session state."""
+    if 'marquee_settings' not in st.session_state:
+        st.session_state['marquee_settings'] = {
+            "background": "#1E1E1E",
+            "color": "#FFFFFF",
+            "font-size": "14px",
+            "animationDuration": "20s",
+            "width": "100%",
+            "lineHeight": "35px"
+        }
+def get_marquee_settings():
+    """Get current marquee settings, initializing if needed."""
+    initialize_marquee_settings()
+    return st.session_state['marquee_settings']
+def update_marquee_settings_ui():
+    """Add color pickers & sliders for marquee configuration in sidebar."""
+    st.sidebar.markdown("### 🎯 Marquee Settings")
+    # Create two columns for settings
+    cols = st.sidebar.columns(2)
+    # Column 1: Color settings
+    with cols[0]:
+        # Background color picker
+        bg_color = st.color_picker(
+            "🎨 Background",
+            st.session_state['marquee_settings']["background"],
+            key="bg_color_picker"
+        )
+        # Text color picker
+        text_color = st.color_picker(
+            "✍️ Text Color",
+            st.session_state['marquee_settings']["color"],
+            key="text_color_picker"
+        )
+    # Column 2: Size and speed settings
+    with cols[1]:
+        # Font size slider
+        font_size = st.slider(
+            "📏 Font Size",
+            10, 24, 14,
+            key="font_size_slider"
+        )
+        # Animation duration slider
+        duration = st.slider(
+            "⏱️ Animation Speed",
+            1, 20, 20,
+            key="duration_slider"
+        )
+    # Update session state with new settings
+    st.session_state['marquee_settings'].update({
+        "background": bg_color,
+        "color": text_color,
+        "font-size": f"{font_size}px",
+        "animationDuration": f"{duration}s"
+    })
+def display_marquee(text: str, settings: dict, key_suffix: str = ""):
+    """Show marquee text with specified style settings."""
+    # Truncate long text to prevent performance issues
+    truncated_text = text[:280] + "..." if len(text) > 280 else text
+    # Display the marquee
+    streamlit_marquee(
+        content=truncated_text,
+        **settings,
+        key=f"marquee_{key_suffix}"
+    )
+    # Add spacing after marquee
+    st.write("")
+def create_paper_links_md(papers: list) -> str:
+    """Creates a minimal markdown file linking to each paper's arxiv URL."""
+    lines = ["# Paper Links\n"]
+    for i, p in enumerate(papers, start=1):
+        lines.append(f"{i}. **{p['title']}** — [Arxiv]({p['url']})")
+    return "\n".join(lines)
+def apply_custom_styling():
+    """Apply custom CSS styling to the app."""
+    st.markdown("""
+        <style>
+            .main {
+                background: linear-gradient(to right, #1a1a1a, #2d2d2d);
+                color: #fff;
+            }
+            .stMarkdown {
+                font-family: 'Helvetica Neue', sans-serif;
+            }
+            .stButton>button {
+                margin-right: 0.5rem;
+            }
+            .streamlit-marquee {
+                margin: 1rem 0;
+                border-radius: 4px;
+            }
+            .st-emotion-cache-1y4p8pa {
+                padding: 1rem;
+            }
+        </style>
+    """, unsafe_allow_html=True)
+def display_performance_metrics(timings: dict):
+    """Display performance metrics with visualizations."""
+    st.sidebar.markdown("### ⏱️ Performance Metrics")
+    # Calculate total time
+    total_time = sum(timings.values())
+    st.sidebar.write(f"**Total Processing Time:** {total_time:.2f}s")
+    # Show breakdown of operations
+    st.sidebar.markdown("#### Operation Breakdown")
+    for operation, duration in timings.items():
+        percentage = (duration / total_time) * 100 if total_time > 0 else 0
+        st.sidebar.write(f"**{operation}:** {duration:.2f}s ({percentage:.1f}%)")
+        # Create a progress bar for visual representation
+        st.sidebar.progress(percentage / 100)
+def display_papers(papers: List[Dict], marquee_settings: Dict):
+    """Display paper information with enhanced visualization."""
+    with PerformanceTimer("paper_display"):
+        st.write("## 📚 Research Papers")
+        # Create tabs for different views
+        tab1, tab2 = st.tabs(["📋 List View", "📊 Grid View"])
+        with tab1:
+            for i, paper in enumerate(papers, start=1):
+                # Create marquee for paper title
+                marquee_text = f"📄 {paper['title']} | 👤 {paper['authors'][:120]}"
+                display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
+                # Paper details expander
+                with st.expander(f"{i}. 📄 {paper['title']}", expanded=True):
+                    # Create PDF link
+                    pdf_url = paper['url'].replace('/abs/', '/pdf/')
+                    # Display paper information
+                    st.markdown(f"""
+                        **Date:** {paper['date']}
+                        **Title:** {paper['title']}
+                        **Links:** 📄 [Abstract]({paper['url']}) | 📑 [PDF]({pdf_url})
+                    """)
+                    st.markdown(f"**Authors:** {paper['authors']}")
+                    st.markdown(f"**Summary:** {paper['summary']}")
+                    # Audio player and download if available
+                    if paper.get('full_audio'):
+                        st.write("🎧 Paper Audio Summary")
+                        st.audio(paper['full_audio'])
+                        if paper['download_base64']:
+                            st.markdown(paper['download_base64'], unsafe_allow_html=True)
+        with tab2:
+            # Create a grid layout of papers
+            cols = st.columns(3)
+            for i, paper in enumerate(papers):
+                with cols[i % 3]:
+                    st.markdown(f"""
+                        ### 📄 {paper['title'][:50]}...
+                        **Date:** {paper['date']}
+                        [Abstract]({paper['url']}) | [PDF]({paper['url'].replace('/abs/', '/pdf/')})
+                    """)
+                    if paper.get('full_audio'):
+                        st.audio(paper['full_audio'])
+def display_papers_in_sidebar(papers: List[Dict]):
+    """Display paper listing in sidebar with lazy loading."""
+    with PerformanceTimer("sidebar_display"):
+        st.sidebar.title("📚 Papers Overview")
+        # Add filter options
+        filter_date = st.sidebar.date_input("Filter by date:", None)
+        search_term = st.sidebar.text_input("Search papers:", "")
+        # Filter papers based on criteria
+        filtered_papers = papers
+        if filter_date:
+            filtered_papers = [p for p in filtered_papers
+                             if filter_date.strftime("%Y-%m-%d") in p['date']]
+        if search_term:
+            search_lower = search_term.lower()
+            filtered_papers = [p for p in filtered_papers
+                             if search_lower in p['title'].lower()
+                             or search_lower in p['authors'].lower()]
+        # Display filtered papers
+        for i, paper in enumerate(filtered_papers, start=1):
+            paper_key = f"paper_{paper['url']}"
+            if paper_key not in st.session_state:
+                st.session_state[paper_key] = False
+            with st.sidebar.expander(f"{i}. {paper['title'][:50]}...", expanded=False):
+                # Paper metadata
+                st.markdown(f"**Date:** {paper['date']}")
+                # Links
+                pdf_url = paper['url'].replace('/abs/', '/pdf/')
+                st.markdown(f"📄 [Abstract]({paper['url']}) | 📑 [PDF]({pdf_url})")
+                # Preview of authors and summary
+                st.markdown(f"**Authors:** {paper['authors'][:100]}...")
+                if paper['summary']:
+                    st.markdown(f"**Summary:** {paper['summary'][:200]}...")
+                # Audio controls
+                if paper['full_audio']:
+                    if st.button("🎵 Load Audio", key=f"btn_{paper_key}"):
+                        st.session_state[paper_key] = True
+                    if st.session_state[paper_key]:
+                        st.audio(paper['full_audio'])
+                        if paper['download_base64']:
+                            st.markdown(paper['download_base64'], unsafe_allow_html=True)
+# ─────────────────────────────────────────────────────────
+# 5. FILE MANAGEMENT & HISTORY
+# ─────────────────────────────────────────────────��───────
+def create_file(prompt: str, response: str, file_type: str = "md") -> str:
+    """Create a file with proper naming and error handling."""
+    with PerformanceTimer("file_creation"):
+        try:
+            # Generate filename
+            filename = generate_filename(prompt.strip(), response.strip(), file_type)
+            # Ensure directory exists
+            os.makedirs("generated_files", exist_ok=True)
+            filepath = os.path.join("generated_files", filename)
+            # Write content
+            with open(filepath, 'w', encoding='utf-8') as f:
+                if file_type == "md":
+                    f.write(f"# Query\n{prompt}\n\n# Response\n{response}")
+                else:
+                    f.write(f"{prompt}\n\n{response}")
+            return filepath
+        except Exception as e:
+            st.error(f"Error creating file: {str(e)}")
+            return ""
+def get_high_info_terms(text: str, top_n: int = 10) -> List[str]:
+    """Extract most informative terms from text."""
+    # Common English stop words to filter out
+    stop_words = set([
+        'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to',
+        'for', 'of', 'with', 'by', 'from', 'up', 'about', 'into', 'over',
+        'after', 'the', 'this', 'that', 'these', 'those', 'what', 'which'
+    ])
+    # Extract words and bi-grams
+    words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
+    bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
+    # Combine and filter terms
+    combined = words + bi_grams
+    filtered = [term for term in combined
+               if term not in stop_words
+               and len(term.split()) <= 2
+               and len(term) > 3]
+    # Count and return top terms
+    counter = Counter(filtered)
+    return [term for term, freq in counter.most_common(top_n)]
+def clean_text_for_filename(text: str) -> str:
+    """Clean text for use in filenames."""
+    # Remove special characters
+    text = text.lower()
+    text = re.sub(r'[^\w\s-]', '', text)
+    # Remove common unhelpful words
+    stop_words = set([
+        'the', 'and', 'for', 'with', 'this', 'that', 'what', 'which',
+        'where', 'when', 'why', 'how', 'who', 'whom', 'whose', 'ai',
+        'library', 'function', 'method', 'class', 'object', 'variable'
+    ])
+    words = text.split()
+    filtered = [w for w in words if len(w) > 3 and w not in stop_words]
+    return '_'.join(filtered)[:200]
+def generate_filename(prompt: str, response: str, file_type: str = "md",
+                     max_length: int = 200) -> str:
+    """Generate descriptive filename from content."""
+    # Get timestamp prefix
+    prefix = format_timestamp_prefix() + "_"
+    # Extract informative terms
+    combined_text = (prompt + " " + response)[:500]
+    info_terms = get_high_info_terms(combined_text, top_n=5)
+    # Get content snippet
+    snippet = (prompt[:40] + " " + response[:40]).strip()
+    snippet_cleaned = clean_text_for_filename(snippet)
+    # Combine and deduplicate parts
+    name_parts = info_terms + [snippet_cleaned]
+    seen = set()
+    unique_parts = []
+    for part in name_parts:
+        if part not in seen:
+            seen.add(part)
+            unique_parts.append(part)
+    # Create final filename
+    full_name = '_'.join(unique_parts).strip('_')
+    leftover_chars = max_length - len(prefix) - len(file_type) - 1
+    if len(full_name) > leftover_chars:
+        full_name = full_name[:leftover_chars]
+    return f"{prefix}{full_name}.{file_type}"
+def create_zip_of_files(md_files: List[str], mp3_files: List[str],
+                       wav_files: List[str], input_question: str) -> Optional[str]:
+    """Create zip archive of files with optimization."""
+    with PerformanceTimer("zip_creation"):
+        # Filter out readme and empty files
+        md_files = [f for f in md_files
+                   if os.path.basename(f).lower() != 'readme.md'
+                   and os.path.getsize(f) > 0]
+        all_files = md_files + mp3_files + wav_files
+        if not all_files:
+            return None
+        try:
+            # Generate zip name
+            all_content = []
+            for f in all_files:
+                if f.endswith('.md'):
+                    with open(f, 'r', encoding='utf-8') as file:
+                        all_content.append(file.read())
+                elif f.endswith(('.mp3', '.wav')):
+                    basename = os.path.splitext(os.path.basename(f))[0]
+                    all_content.append(basename.replace('_', ' '))
+            all_content.append(input_question)
+            combined_content = " ".join(all_content)
+            info_terms = get_high_info_terms(combined_content, top_n=10)
+            timestamp = format_timestamp_prefix()
+            name_text = '-'.join(term for term in info_terms[:5])
+            zip_name = f"archive_{timestamp}_{name_text[:50]}.zip"
+            # Create zip file
+            with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as z:
+                for f in all_files:
+                    z.write(f, os.path.basename(f))
+            return zip_name
+        except Exception as e:
+            st.error(f"Error creating zip archive: {str(e)}")
+            return None
+# ─────────────────────────────────────────────────────────
+# 6. OPTIMIZED AI LOOKUP & PROCESSING
+# ─────────────────────────────────────────────────────────
+def perform_ai_lookup(q: str, vocal_summary: bool = True,
+                     extended_refs: bool = False,
+                     titles_summary: bool = True,
+                     full_audio: bool = False) -> Tuple[str, Dict[str, float]]:
+    """Main AI lookup routine with performance optimization."""
+    with PerformanceTimer("total_lookup") as total_timer:
+        timings = {}
+        # Add operation controls if not present
+        if 'operation_controls' not in st.session_state:
+            st.sidebar.markdown("### 🔧 Operation Controls")
+            st.session_state['enable_claude'] = st.sidebar.checkbox(
+                "Enable Claude Search",
+                value=st.session_state['enable_claude']
+            )
+            st.session_state['enable_audio'] = st.sidebar.checkbox(
+                "Generate Audio",
+                value=st.session_state['enable_audio']
+            )
+            st.session_state['enable_download'] = st.sidebar.checkbox(
+                "Create Download Links",
+                value=st.session_state['enable_download']
+            )
+            st.session_state['operation_controls'] = True
+        result = ""
+        # 1. Claude API (if enabled)
+        if st.session_state['enable_claude']:
+            with PerformanceTimer("claude_api") as claude_timer:
+                try:
+                    client = anthropic.Anthropic(api_key=anthropic_key)
+                    response = client.messages.create(
+                        model="claude-3-sonnet-20240229",
+                        max_tokens=1000,
+                        messages=[{"role": "user", "content": q}]
+                    )
+                    st.write("Claude's reply 🧠:")
+                    st.markdown(response.content[0].text)
+                    result = response.content[0].text
+                    timings['claude_api'] = time.time() - claude_timer.start_time
+                except Exception as e:
+                    st.error(f"Error with Claude API: {str(e)}")
+                    result = "Error occurred during Claude API call"
+                    timings['claude_api'] = 0
+        # 2. Async save and audio generation
+        async def process_results():
+            with PerformanceTimer("results_processing") as proc_timer:
+                md_file, audio_file, md_time, audio_time = await async_save_qa_with_audio(
+                    q, result
+                )
+                timings['markdown_save'] = md_time
+                timings['audio_generation'] = audio_time
+                if audio_file and st.session_state['enable_audio']:
+                    st.subheader("📝 Main Response Audio")
+                    st.audio(audio_file)
+                    if st.session_state['enable_download']:
+                        st.markdown(
+                            create_download_link_with_cache(
+                                audio_file,
+                                st.session_state['audio_format']
+                            ),
+                            unsafe_allow_html=True
+                        )
+        # Run async operations
+        asyncio.run(process_results())
+        # 3. Arxiv RAG with performance tracking
+        if st.session_state['enable_claude']:
+            with PerformanceTimer("arxiv_rag") as rag_timer:
+                try:
+                    st.write('Running Arxiv RAG with Claude inputs.')
+                    client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
+                    refs = client.predict(
+                        q,
+                        10,
+                        "Semantic Search",
+                        "mistralai/Mixtral-8x7B-Instruct-v0.1",
+                        api_name="/update_with_rag_md"
+                    )[0]
+                    timings['arxiv_rag'] = time.time() - rag_timer.start_time
+                    # Process papers asynchronously
+                    papers = parse_arxiv_refs(refs)
+                    if papers:
+                        with PerformanceTimer("paper_processing") as paper_timer:
+                            async def process_papers():
+                                # Create minimal links page
+                                paper_links = create_paper_links_md(papers)
+                                links_file = create_file(q, paper_links, "md")
+                                st.markdown(paper_links)
+                                # Generate audio and display papers
+                                await create_paper_audio_files(papers, q)
+                                display_papers(papers, get_marquee_settings())
+                                display_papers_in_sidebar(papers)
+                            asyncio.run(process_papers())
+                            timings['paper_processing'] = time.time() - paper_timer.start_time
+                    else:
+                        st.warning("No papers found in the response.")
+                except Exception as e:
+                    st.error(f"Error during Arxiv RAG: {str(e)}")
+                    timings['arxiv_rag'] = 0
+        return result, timings
+def process_voice_input(text: str):
+    """Process voice input with enhanced error handling and feedback."""
+    if not text:
+        st.warning("Please provide some input text.")
+        return
+    with PerformanceTimer("voice_processing"):
+        try:
+            st.subheader("🔍 Search Results")
+            result, timings = perform_ai_lookup(
+                text,
+                vocal_summary=True,
+                extended_refs=False,
+                titles_summary=True,
+                full_audio=True
+            )
+            # Save results
+            md_file, audio_file = save_qa_with_audio(text, result)
+            # Display results
+            st.subheader("📝 Generated Files")
+            col1, col2 = st.columns(2)
+            with col1:
+                st.write(f"📄 Markdown: {os.path.basename(md_file)}")
+                st.markdown(get_download_link(md_file, "md"), unsafe_allow_html=True)
+            with col2:
+                if audio_file:
+                    st.write(f"🎵 Audio: {os.path.basename(audio_file)}")
+                    play_and_download_audio(
+                        audio_file,
+                        st.session_state['audio_format']
+                    )
+        except Exception as e:
+            st.error(f"Error processing voice input: {str(e)}")
+# ─────────────────────────────────────────────────────────
+# 7. SIDEBAR AND FILE HISTORY
+# ─────────────────────────────────────────────────────────
+def display_file_history_in_sidebar():
+    """Display file history with enhanced organization and filtering."""
+    with PerformanceTimer("file_history"):
+        st.sidebar.markdown("---")
+        st.sidebar.markdown("### 📂 File History")
+        # Gather all files
+        md_files = glob.glob("*.md")
+        mp3_files = glob.glob("*.mp3")
+        wav_files = glob.glob("*.wav")
+        all_files = md_files + mp3_files + wav_files
+        if not all_files:
+            st.sidebar.write("No files found.")
+            return
+        # Add file management controls
+        col1, col2 = st.sidebar.columns(2)
+        with col1:
+            if st.button("🗑 Delete All"):
+                try:
+                    for f in all_files:
+                        os.remove(f)
+                    st.session_state.should_rerun = True
+                    st.success("All files deleted successfully.")
+                except Exception as e:
+                    st.error(f"Error deleting files: {str(e)}")
+        with col2:
+            if st.button("⬇️ Zip All"):
+                zip_name = create_zip_of_files(
+                    md_files,
+                    mp3_files,
+                    wav_files,
+                    st.session_state.get('last_query', '')
+                )
+                if zip_name:
+                    st.sidebar.markdown(
+                        get_download_link(zip_name, "zip"),
+                        unsafe_allow_html=True
+                    )
+        # Add file filtering options
+        st.sidebar.markdown("### 🔍 Filter Files")
+        file_search = st.sidebar.text_input("Search files:", "")
+        file_type_filter = st.sidebar.multiselect(
+            "File types:",
+            ["Markdown", "Audio"],
+            default=["Markdown", "Audio"]
+        )
+        # Sort files by modification time
+        all_files.sort(key=os.path.getmtime, reverse=True)
+        # Filter files based on search and type
+        filtered_files = []
+        for f in all_files:
+            if file_search.lower() in f.lower():
+                ext = os.path.splitext(f)[1].lower()
+                if (("Markdown" in file_type_filter and ext == ".md") or
+                    ("Audio" in file_type_filter and ext in [".mp3", ".wav"])):
+                    filtered_files.append(f)
+        # Display filtered files
+        for f in filtered_files:
+            fname = os.path.basename(f)
+            ext = os.path.splitext(fname)[1].lower().strip('.')
+            emoji = FILE_EMOJIS.get(ext, '📦')
+            # Get file metadata
+            mod_time = datetime.fromtimestamp(os.path.getmtime(f))
+            time_str = mod_time.strftime("%Y-%m-%d %H:%M:%S")
+            file_size = os.path.getsize(f) / 1024  # Size in KB
+            with st.sidebar.expander(f"{emoji} {fname}"):
+                st.write(f"**Modified:** {time_str}")
+                st.write(f"**Size:** {file_size:.1f} KB")
+                if ext == "md":
+                    try:
+                        with open(f, "r", encoding="utf-8") as file_in:
+                            snippet = file_in.read(200).replace("\n", " ")
+                        if len(snippet) == 200:
+                            snippet += "..."
+                        st.write(snippet)
+                        st.markdown(
+                            get_download_link(f, file_type="md"),
+                            unsafe_allow_html=True
+                        )
+                    except Exception as e:
+                        st.error(f"Error reading markdown file: {str(e)}")
+                elif ext in ["mp3", "wav"]:
+                    st.audio(f)
+                    st.markdown(
+                        get_download_link(f, file_type=ext),
+                        unsafe_allow_html=True
+                    )
+                else:
+                    st.markdown(get_download_link(f), unsafe_allow_html=True)
+# ─────────────────────────────────────────────────────────
+# 8. MAIN APPLICATION
+# ─────────────────────────────────────────────────────────
+def main():
+    """Main application entry point with enhanced UI and error handling."""
+    try:
+        # 1. Setup marquee UI in sidebar
+        update_marquee_settings_ui()
+        marquee_settings = get_marquee_settings()
+        # 2. Display welcome marquee
+        display_marquee(
+            st.session_state['marquee_content'],
+            {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
+            key_suffix="welcome"
+        )
+        # 3. Main action tabs
+        tab_main = st.radio(
+            "Action:",
+            ["🎤 Voice", "📸 Media", "🔍 ArXiv", "📝 Editor"],
+            horizontal=True
+        )
+        # Custom component usage
+        mycomponent = components.declare_component(
+            "mycomponent",
+            path="mycomponent"
+        )
+        val = mycomponent(my_input_value="Hello")
+        if val:
+            # Process input value
+            val_stripped = val.replace('\\n', ' ')
+            edited_input = st.text_area(
+                "✏️ Edit Input:",
+                value=val_stripped,
+                height=100
+            )
+            # Model selection and options
+            run_option = st.selectbox("Model:", ["Arxiv"])
+            col1, col2 = st.columns(2)
+            with col1:
+                #autorun = st.checkbox("⚙ AutoRun", value=True)
+                autorun = st.checkbox("⚙ AutoRun", value=False)
+            with col2:
+                full_audio = st.checkbox("📚 FullAudio", value=False)
+            # Check for input changes
+            input_changed = (val != st.session_state.old_val)
+            if autorun and input_changed:
+                st.session_state.old_val = val
+                st.session_state.last_query = edited_input
+                result, timings = perform_ai_lookup(
+                    edited_input,
+                    vocal_summary=True,
+                    extended_refs=False,
+                    titles_summary=True,
+                    full_audio=full_audio
+                )
+                # Display performance metrics
+                display_performance_metrics(timings)
+            else:
+                if st.button("▶ Run"):
+                    st.session_state.old_val = val
+                    st.session_state.last_query = edited_input
+                    result, timings = perform_ai_lookup(
+                        edited_input,
+                        vocal_summary=True,
+                        extended_refs=False,
+                        titles_summary=True,
+                        full_audio=full_audio
+                    )
+                    # Display performance metrics
+                    display_performance_metrics(timings)
+        # Tab-specific content
+        if tab_main == "🔍 ArXiv":
+            display_arxiv_tab()
+        elif tab_main == "🎤 Voice":
+            display_voice_tab()
+        elif tab_main == "📸 Media":
+            display_media_tab()
+        elif tab_main == "📝 Editor":
+            display_editor_tab()
+        # Display file history
+        display_file_history_in_sidebar()
+        # Apply styling
+        apply_custom_styling()
+        # Check for rerun
+        if st.session_state.should_rerun:
+            st.session_state.should_rerun = False
+            st.rerun()
+    except Exception as e:
+        st.error(f"An error occurred in the main application: {str(e)}")
+        st.info("Please try refreshing the page or contact support if the issue persists.")
+if __name__ == "__main__":
+    main()