awacke1 commited on
Commit
a484f78
Β·
verified Β·
1 Parent(s): 2a5bffd

Create backup22-fulltoobigimho.app.py

Browse files
Files changed (1) hide show
  1. backup22-fulltoobigimho.app.py +1869 -0
backup22-fulltoobigimho.app.py ADDED
@@ -0,0 +1,1869 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import anthropic
3
+ import openai
4
+ import base64
5
+ import cv2
6
+ import glob
7
+ import json
8
+ import math
9
+ import os
10
+ import pytz
11
+ import random
12
+ import re
13
+ import requests
14
+ import textract
15
+ import time
16
+ import zipfile
17
+ import plotly.graph_objects as go
18
+ import streamlit.components.v1 as components
19
+ from datetime import datetime
20
+ from audio_recorder_streamlit import audio_recorder
21
+ from bs4 import BeautifulSoup
22
+ from collections import defaultdict, deque, Counter
23
+ from dotenv import load_dotenv
24
+ from gradio_client import Client
25
+ from huggingface_hub import InferenceClient
26
+ from io import BytesIO
27
+ from PIL import Image
28
+ from PyPDF2 import PdfReader
29
+ from urllib.parse import quote
30
+ from xml.etree import ElementTree as ET
31
+ from openai import OpenAI
32
+ import extra_streamlit_components as stx
33
+ from streamlit.runtime.scriptrunner import get_script_run_ctx
34
+ import asyncio
35
+ import edge_tts
36
+ from streamlit_marquee import streamlit_marquee
37
+ from concurrent.futures import ThreadPoolExecutor
38
+ from functools import partial
39
+ from typing import Dict, List, Optional, Tuple, Union
40
+
41
+ # ─────────────────────────────────────────────────────────
42
+ # 1. CORE CONFIGURATION & SETUP
43
+ # ─────────────────────────────────────────────────────────
44
+ st.set_page_config(
45
+ page_title="🚲TalkingAIResearcherπŸ†",
46
+ page_icon="πŸš²πŸ†",
47
+ layout="wide",
48
+ initial_sidebar_state="auto",
49
+ menu_items={
50
+ 'Get Help': 'https://huggingface.co/awacke1',
51
+ 'Report a bug': 'https://huggingface.co/spaces/awacke1',
52
+ 'About': "🚲TalkingAIResearcherπŸ†"
53
+ }
54
+ )
55
+
56
+ load_dotenv()
57
+
58
+ # Available English voices for Edge TTS
59
+ EDGE_TTS_VOICES = [
60
+ "en-US-AriaNeural",
61
+ "en-US-GuyNeural",
62
+ "en-US-JennyNeural",
63
+ "en-GB-SoniaNeural",
64
+ "en-GB-RyanNeural",
65
+ "en-AU-NatashaNeural",
66
+ "en-AU-WilliamNeural",
67
+ "en-CA-ClaraNeural",
68
+ "en-CA-LiamNeural"
69
+ ]
70
+
71
+ # Session state initialization with default values
72
+ DEFAULT_SESSION_STATE = {
73
+ 'marquee_settings': {
74
+ "background": "#1E1E1E",
75
+ "color": "#FFFFFF",
76
+ "font-size": "14px",
77
+ "animationDuration": "20s",
78
+ "width": "100%",
79
+ "lineHeight": "35px"
80
+ },
81
+ 'tts_voice': EDGE_TTS_VOICES[0],
82
+ 'audio_format': 'mp3',
83
+ 'transcript_history': [],
84
+ 'chat_history': [],
85
+ 'openai_model': "gpt-4o-2024-05-13",
86
+ 'messages': [],
87
+ 'last_voice_input': "",
88
+ 'editing_file': None,
89
+ 'edit_new_name': "",
90
+ 'edit_new_content': "",
91
+ 'viewing_prefix': None,
92
+ 'should_rerun': False,
93
+ 'old_val': None,
94
+ 'last_query': "",
95
+ 'marquee_content': "πŸš€ Welcome to TalkingAIResearcher | πŸ€– Your Research Assistant",
96
+ 'enable_audio': False,
97
+ 'enable_download': False,
98
+ 'enable_claude': True,
99
+ 'audio_cache': {},
100
+ 'paper_cache': {},
101
+ 'download_link_cache': {},
102
+ 'performance_metrics': defaultdict(list),
103
+ 'operation_timings': defaultdict(float)
104
+ }
105
+
106
+ # Initialize session state
107
+ for key, value in DEFAULT_SESSION_STATE.items():
108
+ if key not in st.session_state:
109
+ st.session_state[key] = value
110
+
111
+ # API Keys and Configuration
112
+ openai_api_key = os.getenv('OPENAI_API_KEY', "")
113
+ anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
114
+ xai_key = os.getenv('xai', "")
115
+
116
+ if 'OPENAI_API_KEY' in st.secrets:
117
+ openai_api_key = st.secrets['OPENAI_API_KEY']
118
+ if 'ANTHROPIC_API_KEY' in st.secrets:
119
+ anthropic_key = st.secrets["ANTHROPIC_API_KEY"]
120
+
121
+ openai.api_key = openai_api_key
122
+ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID'))
123
+ HF_KEY = os.getenv('HF_KEY')
124
+ API_URL = os.getenv('API_URL')
125
+
126
+ # File type emojis for display
127
+ FILE_EMOJIS = {
128
+ "md": "πŸ“",
129
+ "mp3": "🎡",
130
+ "wav": "πŸ”Š",
131
+ "pdf": "πŸ“„",
132
+ "txt": "πŸ“‹",
133
+ "json": "πŸ“Š",
134
+ "csv": "πŸ“ˆ"
135
+ }
136
+
137
+ # ─────────────────────────────────────────────────────────
138
+ # 2. PERFORMANCE MONITORING & TIMING
139
+ # ─────────────────────────────────────────────────────────
140
+
141
+ class PerformanceTimer:
142
+ """Context manager for timing operations with automatic logging."""
143
+ def __init__(self, operation_name: str):
144
+ self.operation_name = operation_name
145
+ self.start_time = None
146
+
147
+ def __enter__(self):
148
+ self.start_time = time.time()
149
+ return self
150
+
151
+ def __exit__(self, exc_type, exc_val, exc_tb):
152
+ if not exc_type: # Only log if no exception occurred
153
+ duration = time.time() - self.start_time
154
+ st.session_state['operation_timings'][self.operation_name] = duration
155
+ st.session_state['performance_metrics'][self.operation_name].append(duration)
156
+
157
+ def log_performance_metrics():
158
+ """Display performance metrics in the sidebar."""
159
+ st.sidebar.markdown("### ⏱️ Performance Metrics")
160
+
161
+ metrics = st.session_state['operation_timings']
162
+ if metrics:
163
+ total_time = sum(metrics.values())
164
+ st.sidebar.write(f"**Total Processing Time:** {total_time:.2f}s")
165
+
166
+ # Create timing breakdown
167
+ for operation, duration in metrics.items():
168
+ percentage = (duration / total_time) * 100
169
+ st.sidebar.write(f"**{operation}:** {duration:.2f}s ({percentage:.1f}%)")
170
+
171
+ # Show timing history chart
172
+ if st.session_state['performance_metrics']:
173
+ history_data = []
174
+ for op, times in st.session_state['performance_metrics'].items():
175
+ if times: # Only show if we have timing data
176
+ avg_time = sum(times) / len(times)
177
+ history_data.append({"Operation": op, "Avg Time (s)": avg_time})
178
+
179
+ if history_data: # Create chart if we have data
180
+ st.sidebar.markdown("### πŸ“ˆ Timing History")
181
+ chart_data = pd.DataFrame(history_data)
182
+ st.sidebar.bar_chart(chart_data.set_index("Operation"))
183
+
184
+ # ─────────────────────────────────────────────────────────
185
+ # 3. OPTIMIZED AUDIO GENERATION
186
+ # ─────────────────────────────────────────────────────────
187
+
188
+ def clean_for_speech(text: str) -> str:
189
+ """Clean up text for TTS output with enhanced cleaning."""
190
+ with PerformanceTimer("text_cleaning"):
191
+ # Remove markdown formatting
192
+ text = re.sub(r'#+ ', '', text) # Remove headers
193
+ text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # Clean links
194
+ text = re.sub(r'[*_~`]', '', text) # Remove emphasis markers
195
+
196
+ # Remove code blocks
197
+ text = re.sub(r'```[\s\S]*?```', '', text)
198
+ text = re.sub(r'`[^`]*`', '', text)
199
+
200
+ # Clean up whitespace
201
+ text = re.sub(r'\s+', ' ', text)
202
+ text = text.replace("\n", " ")
203
+ text = text.replace("</s>", " ")
204
+
205
+ # Remove URLs
206
+ text = re.sub(r'https?://\S+', '', text)
207
+ text = re.sub(r'\(https?://[^\)]+\)', '', text)
208
+
209
+ # Final cleanup
210
+ text = text.strip()
211
+ return text
212
+
213
+ async def async_edge_tts_generate(
214
+ text: str,
215
+ voice: str,
216
+ rate: int = 0,
217
+ pitch: int = 0,
218
+ file_format: str = "mp3"
219
+ ) -> Tuple[Optional[str], float]:
220
+ """Asynchronous TTS generation with performance tracking and caching."""
221
+ with PerformanceTimer("tts_generation") as timer:
222
+ # Clean and validate text
223
+ text = clean_for_speech(text)
224
+ if not text.strip():
225
+ return None, 0
226
+
227
+ # Check cache
228
+ cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
229
+ if cache_key in st.session_state['audio_cache']:
230
+ return st.session_state['audio_cache'][cache_key], 0
231
+
232
+ try:
233
+ # Generate audio
234
+ rate_str = f"{rate:+d}%"
235
+ pitch_str = f"{pitch:+d}Hz"
236
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
237
+
238
+ # Generate unique filename
239
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
240
+ filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
241
+
242
+ # Save audio file
243
+ await communicate.save(filename)
244
+
245
+ # Cache result
246
+ st.session_state['audio_cache'][cache_key] = filename
247
+ return filename, time.time() - timer.start_time
248
+
249
+ except Exception as e:
250
+ st.error(f"Error generating audio: {str(e)}")
251
+ return None, 0
252
+
253
+ async def async_save_qa_with_audio(
254
+ question: str,
255
+ answer: str,
256
+ voice: Optional[str] = None
257
+ ) -> Tuple[str, Optional[str], float, float]:
258
+ """Asynchronously save Q&A to markdown and generate audio with timing."""
259
+ voice = voice or st.session_state['tts_voice']
260
+
261
+ with PerformanceTimer("qa_save") as timer:
262
+ # Save markdown
263
+ md_start = time.time()
264
+ combined_text = f"# Question\n{question}\n\n# Answer\n{answer}"
265
+ md_file = create_file(question, answer, "md")
266
+ md_time = time.time() - md_start
267
+
268
+ # Generate audio if enabled
269
+ audio_file = None
270
+ audio_time = 0
271
+ if st.session_state['enable_audio']:
272
+ audio_text = f"{question}\n\nAnswer: {answer}"
273
+ audio_file, audio_time = await async_edge_tts_generate(
274
+ audio_text,
275
+ voice=voice,
276
+ file_format=st.session_state['audio_format']
277
+ )
278
+
279
+ return md_file, audio_file, md_time, audio_time
280
+
281
+ def create_download_link_with_cache(
282
+ file_path: str,
283
+ file_type: str = "mp3"
284
+ ) -> str:
285
+ """Create download link with caching and error handling."""
286
+ with PerformanceTimer("download_link_generation"):
287
+ # Check cache first
288
+ cache_key = f"dl_{file_path}"
289
+ if cache_key in st.session_state['download_link_cache']:
290
+ return st.session_state['download_link_cache'][cache_key]
291
+
292
+ try:
293
+ with open(file_path, "rb") as f:
294
+ b64 = base64.b64encode(f.read()).decode()
295
+
296
+ # Generate appropriate link based on file type
297
+ filename = os.path.basename(file_path)
298
+ if file_type == "mp3":
299
+ link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">🎡 Download {filename}</a>'
300
+ elif file_type == "wav":
301
+ link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">πŸ”Š Download {filename}</a>'
302
+ elif file_type == "md":
303
+ link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">πŸ“ Download {filename}</a>'
304
+ else:
305
+ link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">⬇️ Download {filename}</a>'
306
+
307
+ # Cache and return
308
+ st.session_state['download_link_cache'][cache_key] = link
309
+ return link
310
+
311
+ except Exception as e:
312
+ st.error(f"Error creating download link: {str(e)}")
313
+ return ""
314
+
315
+
316
+
317
+ # ---
318
+ def display_voice_tab():
319
+ """Display voice input tab with TTS settings."""
320
+ st.subheader("🎀 Voice Input")
321
+
322
+ # Voice Settings Section
323
+ st.markdown("### 🎀 Voice Settings")
324
+ captionFemale='Top: 🌸 **Aria** – 🎢 **Jenny** – 🌺 **Sonia** – 🌌 **Natasha** – 🌷 **Clara**'
325
+ captionMale='Bottom: 🌟 **Guy** – πŸ› οΈ **Ryan** – 🎻 **William** – 🌟 **Liam**'
326
+ st.sidebar.image('Group Picture - Voices.png', caption=captionFemale + ' - ' + captionMale)
327
+ st.sidebar.markdown("""
328
+
329
+ # πŸŽ™οΈ Voice Character Agent Selector 🎭
330
+ 1. Female:
331
+ - 🌸 **Aria** – Female: 🌟 The voice of elegance and creativity, perfect for soothing storytelling or inspiring ideas.
332
+ - 🎢 **Jenny** – Female: πŸ’– Sweet and friendly, she’s the go-to for warm, conversational tones.
333
+ - 🌺 **Sonia** – Female: πŸ’ƒ Bold and confident, ideal for commanding attention and delivering with flair.
334
+ - 🌌 **Natasha** – Female: ✨ Enigmatic and sophisticated, Natasha is great for a touch of mystery and charm.
335
+ - 🌷 **Clara** – Female: πŸŽ€ Cheerful and gentle, perfect for nurturing, empathetic conversations.
336
+ ---
337
+ 2. Male:
338
+ - 🌟 **Guy** – Male: 🎩 Sophisticated and versatile, a natural fit for clear and authoritative delivery.
339
+ - πŸ› οΈ **Ryan** – Male: 🀝 Down-to-earth and approachable, ideal for friendly and casual exchanges.
340
+ - 🎻 **William** – Male: πŸ“š Classic and refined, perfect for a scholarly or thoughtful tone.
341
+ - 🌟 **Liam** – Male: ⚑ Energetic and upbeat, great for dynamic, engaging interactions.
342
+
343
+ """)
344
+ selected_voice = st.selectbox(
345
+ "Select TTS Voice:",
346
+ options=EDGE_TTS_VOICES,
347
+ index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
348
+ )
349
+
350
+ # Audio Format Selection
351
+ st.markdown("### πŸ”Š Audio Format")
352
+ selected_format = st.radio(
353
+ "Choose Audio Format:",
354
+ options=["MP3", "WAV"],
355
+ index=0
356
+ )
357
+
358
+ # Update session state if settings change
359
+ if selected_voice != st.session_state['tts_voice']:
360
+ st.session_state['tts_voice'] = selected_voice
361
+ st.rerun()
362
+ if selected_format.lower() != st.session_state['audio_format']:
363
+ st.session_state['audio_format'] = selected_format.lower()
364
+ st.rerun()
365
+
366
+ # Text Input Area
367
+ user_text = st.text_area("πŸ’¬ Message:", height=100)
368
+ user_text = user_text.strip().replace('\n', ' ')
369
+
370
+ # Send Button
371
+ if st.button("πŸ“¨ Send"):
372
+ process_voice_input(user_text)
373
+
374
+ # Chat History
375
+ st.subheader("πŸ“œ Chat History")
376
+ for c in st.session_state.chat_history:
377
+ st.write("**You:**", c["user"])
378
+ st.write("**Response:**", c["claude"])
379
+
380
+ def display_arxiv_tab():
381
+ """Display ArXiv search tab with options."""
382
+ st.subheader("πŸ” Query ArXiv")
383
+ q = st.text_input("πŸ” Query:", key="arxiv_query")
384
+
385
+ # Options Section
386
+ st.markdown("### πŸŽ› Options")
387
+ col1, col2 = st.columns(2)
388
+
389
+ with col1:
390
+ vocal_summary = st.checkbox("πŸŽ™ Short Audio", value=True,
391
+ key="option_vocal_summary")
392
+ extended_refs = st.checkbox("πŸ“œ Long Refs", value=False,
393
+ key="option_extended_refs")
394
+
395
+ with col2:
396
+ titles_summary = st.checkbox("πŸ”– Titles Only", value=True,
397
+ key="option_titles_summary")
398
+ full_audio = st.checkbox("πŸ“š Full Audio", value=False,
399
+ key="option_full_audio")
400
+
401
+ full_transcript = st.checkbox("🧾 Full Transcript", value=False,
402
+ key="option_full_transcript")
403
+
404
+ if q and st.button("πŸ” Run Search"):
405
+ st.session_state.last_query = q
406
+ result, timings = perform_ai_lookup(
407
+ q,
408
+ vocal_summary=vocal_summary,
409
+ extended_refs=extended_refs,
410
+ titles_summary=titles_summary,
411
+ full_audio=full_audio
412
+ )
413
+
414
+ if full_transcript:
415
+ create_file(q, result, "md")
416
+
417
+ def display_media_tab():
418
+ """Display media gallery tab with audio, images, and video."""
419
+ st.header("πŸ“Έ Media Gallery")
420
+
421
+ # Create tabs for different media types
422
+ tabs = st.tabs(["🎡 Audio", "πŸ–Ό Images", "πŸŽ₯ Video"])
423
+
424
+ # Audio Files Tab
425
+ with tabs[0]:
426
+ st.subheader("🎡 Audio Files")
427
+ audio_files = glob.glob("*.mp3") + glob.glob("*.wav")
428
+
429
+ if audio_files:
430
+ for audio_file in audio_files:
431
+ with st.expander(os.path.basename(audio_file)):
432
+ st.audio(audio_file)
433
+ ext = os.path.splitext(audio_file)[1].replace('.', '')
434
+ dl_link = get_download_link(audio_file, file_type=ext)
435
+ st.markdown(dl_link, unsafe_allow_html=True)
436
+ else:
437
+ st.write("No audio files found.")
438
+
439
+ # Images Tab
440
+ with tabs[1]:
441
+ st.subheader("πŸ–Ό Image Files")
442
+ image_files = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg")
443
+
444
+ if image_files:
445
+ cols = st.slider("Columns:", 1, 5, 3, key="cols_images")
446
+ image_cols = st.columns(cols)
447
+
448
+ for i, img_file in enumerate(image_files):
449
+ with image_cols[i % cols]:
450
+ try:
451
+ img = Image.open(img_file)
452
+ st.image(img, use_column_width=True)
453
+ except Exception as e:
454
+ st.error(f"Error loading image {img_file}: {str(e)}")
455
+ else:
456
+ st.write("No images found.")
457
+
458
+ # Video Tab
459
+ with tabs[2]:
460
+ st.subheader("πŸŽ₯ Video Files")
461
+ video_files = glob.glob("*.mp4") + glob.glob("*.mov") + glob.glob("*.avi")
462
+
463
+ if video_files:
464
+ for video_file in video_files:
465
+ with st.expander(os.path.basename(video_file)):
466
+ st.video(video_file)
467
+ else:
468
+ st.write("No videos found.")
469
+
470
+ def display_editor_tab():
471
+ """Display text editor tab with file management."""
472
+ st.subheader("πŸ“ Text Editor")
473
+
474
+ # File Management Section
475
+ st.markdown("### πŸ“‚ File Management")
476
+
477
+ # File Selection
478
+ md_files = glob.glob("*.md")
479
+ selected_file = st.selectbox(
480
+ "Select file to edit:",
481
+ ["New File"] + md_files,
482
+ key="file_selector"
483
+ )
484
+
485
+ # Edit Area
486
+ if selected_file == "New File":
487
+ new_filename = st.text_input("New filename (without extension):")
488
+ file_content = st.text_area("Content:", height=300)
489
+
490
+ if st.button("πŸ’Ύ Save File"):
491
+ if new_filename:
492
+ try:
493
+ with open(f"{new_filename}.md", 'w', encoding='utf-8') as f:
494
+ f.write(file_content)
495
+ st.success(f"File {new_filename}.md saved successfully!")
496
+ st.session_state.should_rerun = True
497
+ except Exception as e:
498
+ st.error(f"Error saving file: {str(e)}")
499
+ else:
500
+ st.warning("Please enter a filename.")
501
+ else:
502
+ try:
503
+ # Load existing file content
504
+ with open(selected_file, 'r', encoding='utf-8') as f:
505
+ file_content = f.read()
506
+
507
+ # Edit existing file
508
+ edited_content = st.text_area(
509
+ "Edit content:",
510
+ value=file_content,
511
+ height=300
512
+ )
513
+
514
+ col1, col2 = st.columns(2)
515
+ with col1:
516
+ if st.button("πŸ’Ύ Save Changes"):
517
+ try:
518
+ with open(selected_file, 'w', encoding='utf-8') as f:
519
+ f.write(edited_content)
520
+ st.success("Changes saved successfully!")
521
+ except Exception as e:
522
+ st.error(f"Error saving changes: {str(e)}")
523
+
524
+ with col2:
525
+ if st.button("πŸ—‘ Delete File"):
526
+ try:
527
+ os.remove(selected_file)
528
+ st.success(f"File {selected_file} deleted successfully!")
529
+ st.session_state.should_rerun = True
530
+ except Exception as e:
531
+ st.error(f"Error deleting file: {str(e)}")
532
+
533
+ except Exception as e:
534
+ st.error(f"Error loading file {selected_file}: {str(e)}")
535
+
536
+ def display_settings_tab():
537
+ """Display application settings tab."""
538
+ st.subheader("βš™οΈ Settings")
539
+
540
+ # General Settings
541
+ st.markdown("### πŸ”§ General Settings")
542
+
543
+ # Theme Selection
544
+ theme = st.selectbox(
545
+ "Color Theme:",
546
+ ["Dark", "Light", "Custom"],
547
+ index=0
548
+ )
549
+
550
+ if theme == "Custom":
551
+ st.color_picker("Primary Color:", "#1E1E1E")
552
+ st.color_picker("Secondary Color:", "#2D2D2D")
553
+
554
+ # Performance Settings
555
+ st.markdown("### ⚑ Performance Settings")
556
+
557
+ # Cache Settings
558
+ cache_size = st.slider(
559
+ "Maximum Cache Size (MB):",
560
+ 0, 1000, 100
561
+ )
562
+
563
+ if st.button("Clear Cache"):
564
+ st.session_state['audio_cache'] = {}
565
+ st.session_state['paper_cache'] = {}
566
+ st.session_state['download_link_cache'] = {}
567
+ st.success("Cache cleared successfully!")
568
+
569
+ # API Settings
570
+ st.markdown("### πŸ”‘ API Settings")
571
+
572
+ # Show/hide API keys
573
+ show_keys = st.checkbox("Show API Keys")
574
+ if show_keys:
575
+ st.text_input("OpenAI API Key:", value=openai_api_key)
576
+ st.text_input("Anthropic API Key:", value=anthropic_key)
577
+
578
+ # Save Settings
579
+ if st.button("πŸ’Ύ Save Settings"):
580
+ st.success("Settings saved successfully!")
581
+ st.session_state.should_rerun = True
582
+
583
+
584
+
585
+ def get_download_link(file: str, file_type: str = "zip") -> str:
586
+ """
587
+ Convert a file to base64 and return an HTML link for download.
588
+ Supports multiple file types with appropriate MIME types.
589
+ """
590
+ try:
591
+ with open(file, "rb") as f:
592
+ b64 = base64.b64encode(f.read()).decode()
593
+
594
+ # Get filename for display
595
+ filename = os.path.basename(file)
596
+
597
+ # Define MIME types and emoji icons for different file types
598
+ mime_types = {
599
+ "zip": ("application/zip", "πŸ“‚"),
600
+ "mp3": ("audio/mpeg", "🎡"),
601
+ "wav": ("audio/wav", "πŸ”Š"),
602
+ "md": ("text/markdown", "πŸ“"),
603
+ "pdf": ("application/pdf", "πŸ“„"),
604
+ "txt": ("text/plain", "πŸ“‹"),
605
+ "json": ("application/json", "πŸ“Š"),
606
+ "csv": ("text/csv", "πŸ“ˆ"),
607
+ "png": ("image/png", "πŸ–Ό"),
608
+ "jpg": ("image/jpeg", "πŸ–Ό"),
609
+ "jpeg": ("image/jpeg", "πŸ–Ό")
610
+ }
611
+
612
+ # Get MIME type and emoji for file
613
+ mime_type, emoji = mime_types.get(
614
+ file_type.lower(),
615
+ ("application/octet-stream", "⬇️")
616
+ )
617
+
618
+ # Create download link with appropriate MIME type
619
+ link = f'<a href="data:{mime_type};base64,{b64}" download="{filename}">{emoji} Download {filename}</a>'
620
+
621
+ return link
622
+
623
+ except FileNotFoundError:
624
+ return f"<p style='color: red'>❌ File not found: {file}</p>"
625
+ except Exception as e:
626
+ return f"<p style='color: red'>❌ Error creating download link: {str(e)}</p>"
627
+
628
+ def play_and_download_audio(file_path: str, file_type: str = "mp3"):
629
+ """
630
+ Display audio player and download link for audio file.
631
+ Includes error handling and file validation.
632
+ """
633
+ if not file_path:
634
+ st.warning("No audio file provided.")
635
+ return
636
+
637
+ if not os.path.exists(file_path):
638
+ st.error(f"Audio file not found: {file_path}")
639
+ return
640
+
641
+ try:
642
+ # Display audio player
643
+ st.audio(file_path)
644
+
645
+ # Create and display download link
646
+ dl_link = get_download_link(file_path, file_type=file_type)
647
+ st.markdown(dl_link, unsafe_allow_html=True)
648
+
649
+ except Exception as e:
650
+ st.error(f"Error playing audio: {str(e)}")
651
+
652
+ def get_file_info(file_path: str) -> dict:
653
+ """
654
+ Get detailed information about a file.
655
+ Returns dictionary with size, modification time, and other metadata.
656
+ """
657
+ try:
658
+ stats = os.stat(file_path)
659
+
660
+ # Get basic file information
661
+ info = {
662
+ 'name': os.path.basename(file_path),
663
+ 'path': file_path,
664
+ 'size': stats.st_size,
665
+ 'modified': datetime.fromtimestamp(stats.st_mtime),
666
+ 'created': datetime.fromtimestamp(stats.st_ctime),
667
+ 'type': os.path.splitext(file_path)[1].lower().strip('.'),
668
+ }
669
+
670
+ # Add formatted size
671
+ if info['size'] < 1024:
672
+ info['size_fmt'] = f"{info['size']} B"
673
+ elif info['size'] < 1024 * 1024:
674
+ info['size_fmt'] = f"{info['size']/1024:.1f} KB"
675
+ else:
676
+ info['size_fmt'] = f"{info['size']/(1024*1024):.1f} MB"
677
+
678
+ # Add formatted dates
679
+ info['modified_fmt'] = info['modified'].strftime("%Y-%m-%d %H:%M:%S")
680
+ info['created_fmt'] = info['created'].strftime("%Y-%m-%d %H:%M:%S")
681
+
682
+ return info
683
+
684
+ except Exception as e:
685
+ st.error(f"Error getting file info: {str(e)}")
686
+ return None
687
+
688
+ def sanitize_filename(filename: str) -> str:
689
+ """
690
+ Clean and sanitize a filename to ensure it's safe for filesystem.
691
+ Removes/replaces unsafe characters and enforces length limits.
692
+ """
693
+ # Remove or replace unsafe characters
694
+ filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
695
+
696
+ # Remove leading/trailing spaces and dots
697
+ filename = filename.strip('. ')
698
+
699
+ # Limit length (reserving space for extension)
700
+ max_length = 255
701
+ name, ext = os.path.splitext(filename)
702
+ if len(filename) > max_length:
703
+ return name[:(max_length-len(ext))] + ext
704
+
705
+ return filename
706
+
707
+ def create_file_with_metadata(filename: str, content: str, metadata: dict = None):
708
+ """
709
+ Create a file with optional metadata header.
710
+ Useful for storing additional information with files.
711
+ """
712
+ try:
713
+ # Sanitize filename
714
+ safe_filename = sanitize_filename(filename)
715
+
716
+ # Ensure directory exists
717
+ os.makedirs(os.path.dirname(safe_filename) or '.', exist_ok=True)
718
+
719
+ # Prepare content with metadata
720
+ if metadata:
721
+ metadata_str = json.dumps(metadata, indent=2)
722
+ full_content = f"""---
723
+ {metadata_str}
724
+ ---
725
+ {content}"""
726
+ else:
727
+ full_content = content
728
+
729
+ # Write file
730
+ with open(safe_filename, 'w', encoding='utf-8') as f:
731
+ f.write(full_content)
732
+
733
+ return safe_filename
734
+
735
+ except Exception as e:
736
+ st.error(f"Error creating file: {str(e)}")
737
+ return None
738
+
739
+ def read_file_with_metadata(filename: str) -> tuple:
740
+ """
741
+ Read a file and extract any metadata header.
742
+ Returns tuple of (content, metadata).
743
+ """
744
+ try:
745
+ with open(filename, 'r', encoding='utf-8') as f:
746
+ content = f.read()
747
+
748
+ # Check for metadata section
749
+ if content.startswith('---\n'):
750
+ # Find end of metadata section
751
+ end_meta = content.find('\n---\n', 4)
752
+ if end_meta != -1:
753
+ try:
754
+ metadata = json.loads(content[4:end_meta])
755
+ content = content[end_meta+5:]
756
+ return content, metadata
757
+ except json.JSONDecodeError:
758
+ pass
759
+
760
+ return content, None
761
+
762
+ except Exception as e:
763
+ st.error(f"Error reading file: {str(e)}")
764
+ return None, None
765
+
766
+ def archive_files(file_paths: list, archive_name: str = None) -> str:
767
+ """
768
+ Create a zip archive containing the specified files.
769
+ Returns path to created archive.
770
+ """
771
+ try:
772
+ # Generate archive name if not provided
773
+ if not archive_name:
774
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
775
+ archive_name = f"archive_{timestamp}.zip"
776
+
777
+ # Create zip file
778
+ with zipfile.ZipFile(archive_name, 'w', zipfile.ZIP_DEFLATED) as zf:
779
+ for file_path in file_paths:
780
+ if os.path.exists(file_path):
781
+ zf.write(file_path, os.path.basename(file_path))
782
+
783
+ return archive_name
784
+
785
+ except Exception as e:
786
+ st.error(f"Error creating archive: {str(e)}")
787
+ return None
788
+
789
+ def list_files_by_type(directory: str = ".",
790
+ extensions: list = None,
791
+ recursive: bool = False) -> dict:
792
+ """
793
+ List files in directory filtered by extension.
794
+ Returns dict grouping files by type.
795
+ """
796
+ try:
797
+ if extensions is None:
798
+ extensions = ['md', 'mp3', 'wav', 'pdf', 'txt', 'json', 'csv']
799
+
800
+ files = {}
801
+ pattern = "**/*" if recursive else "*"
802
+
803
+ for ext in extensions:
804
+ glob_pattern = f"{pattern}.{ext}"
805
+ matches = glob.glob(os.path.join(directory, glob_pattern),
806
+ recursive=recursive)
807
+ if matches:
808
+ files[ext] = matches
809
+
810
+ return files
811
+
812
+ except Exception as e:
813
+ st.error(f"Error listing files: {str(e)}")
814
+ return {}
815
+
816
+
817
+
818
+
819
+
820
+
821
+ def get_central_time() -> datetime:
822
+ """Get current time in US Central timezone."""
823
+ central = pytz.timezone('US/Central')
824
+ return datetime.now(central)
825
+
826
+ def format_timestamp_prefix() -> str:
827
+ """Generate timestamp prefix in format MM_dd_yy_hh_mm_AM/PM."""
828
+ ct = get_central_time()
829
+ return ct.strftime("%m_%d_%y_%I_%M_%p")
830
+
831
+ def get_formatted_time(dt: datetime = None,
832
+ timezone: str = 'US/Central',
833
+ include_timezone: bool = True,
834
+ include_seconds: bool = False) -> str:
835
+ """
836
+ Format a datetime object with specified options.
837
+ If no datetime is provided, uses current time.
838
+ """
839
+ if dt is None:
840
+ tz = pytz.timezone(timezone)
841
+ dt = datetime.now(tz)
842
+ elif dt.tzinfo is None:
843
+ tz = pytz.timezone(timezone)
844
+ dt = tz.localize(dt)
845
+
846
+ format_string = "%Y-%m-%d %I:%M"
847
+ if include_seconds:
848
+ format_string += ":%S"
849
+ format_string += " %p"
850
+ if include_timezone:
851
+ format_string += " %Z"
852
+
853
+ return dt.strftime(format_string)
854
+
855
+ def parse_timestamp(timestamp_str: str,
856
+ timezone: str = 'US/Central') -> Optional[datetime]:
857
+ """
858
+ Parse a timestamp string in various formats.
859
+ Returns timezone-aware datetime object.
860
+ """
861
+ try:
862
+ # Try different format patterns
863
+ patterns = [
864
+ "%m_%d_%y_%I_%M_%p", # Standard app format
865
+ "%Y-%m-%d %I:%M %p", # Common 12-hour format
866
+ "%Y-%m-%d %H:%M", # 24-hour format
867
+ "%m/%d/%y %I:%M %p", # US date format
868
+ "%d/%m/%y %I:%M %p" # European date format
869
+ ]
870
+
871
+ dt = None
872
+ for pattern in patterns:
873
+ try:
874
+ dt = datetime.strptime(timestamp_str, pattern)
875
+ break
876
+ except ValueError:
877
+ continue
878
+
879
+ if dt is None:
880
+ raise ValueError(f"Could not parse timestamp: {timestamp_str}")
881
+
882
+ # Add timezone if not present
883
+ if dt.tzinfo is None:
884
+ tz = pytz.timezone(timezone)
885
+ dt = tz.localize(dt)
886
+
887
+ return dt
888
+
889
+ except Exception as e:
890
+ st.error(f"Error parsing timestamp: {str(e)}")
891
+ return None
892
+
893
+ def get_time_ago(dt: datetime) -> str:
894
+ """
895
+ Convert datetime to human-readable "time ago" format.
896
+ E.g., "2 hours ago", "3 days ago", etc.
897
+ """
898
+ try:
899
+ now = datetime.now(dt.tzinfo)
900
+ diff = now - dt
901
+
902
+ seconds = diff.total_seconds()
903
+
904
+ if seconds < 60:
905
+ return "just now"
906
+ elif seconds < 3600:
907
+ minutes = int(seconds / 60)
908
+ return f"{minutes} minute{'s' if minutes != 1 else ''} ago"
909
+ elif seconds < 86400:
910
+ hours = int(seconds / 3600)
911
+ return f"{hours} hour{'s' if hours != 1 else ''} ago"
912
+ elif seconds < 604800:
913
+ days = int(seconds / 86400)
914
+ return f"{days} day{'s' if days != 1 else ''} ago"
915
+ elif seconds < 2592000:
916
+ weeks = int(seconds / 604800)
917
+ return f"{weeks} week{'s' if weeks != 1 else ''} ago"
918
+ elif seconds < 31536000:
919
+ months = int(seconds / 2592000)
920
+ return f"{months} month{'s' if months != 1 else ''} ago"
921
+ else:
922
+ years = int(seconds / 31536000)
923
+ return f"{years} year{'s' if years != 1 else ''} ago"
924
+
925
+ except Exception as e:
926
+ st.error(f"Error calculating time ago: {str(e)}")
927
+ return "unknown time ago"
928
+
929
+ def format_duration(seconds: float) -> str:
930
+ """
931
+ Format a duration in seconds to human-readable string.
932
+ E.g., "2m 30s", "1h 15m", etc.
933
+ """
934
+ try:
935
+ if seconds < 0:
936
+ return "invalid duration"
937
+
938
+ # Handle special cases
939
+ if seconds < 1:
940
+ return f"{seconds * 1000:.0f}ms"
941
+ if seconds < 60:
942
+ return f"{seconds:.1f}s"
943
+
944
+ # Calculate hours, minutes, seconds
945
+ hours = int(seconds // 3600)
946
+ minutes = int((seconds % 3600) // 60)
947
+ secs = seconds % 60
948
+
949
+ # Build duration string
950
+ parts = []
951
+ if hours > 0:
952
+ parts.append(f"{hours}h")
953
+ if minutes > 0:
954
+ parts.append(f"{minutes}m")
955
+ if secs > 0 and hours == 0: # Only show seconds if less than an hour
956
+ parts.append(f"{secs:.1f}s")
957
+
958
+ return " ".join(parts)
959
+
960
+ except Exception as e:
961
+ st.error(f"Error formatting duration: {str(e)}")
962
+ return "unknown duration"
963
+
964
+
965
+
966
+
967
+
968
+
969
+ async def create_paper_audio_files(papers: List[Dict], input_question: str):
970
+ """Generate audio files for papers asynchronously with improved naming."""
971
+ with PerformanceTimer("paper_audio_generation"):
972
+ tasks = []
973
+ for paper in papers:
974
+ try:
975
+ # Prepare text for audio generation
976
+ audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
977
+ audio_text = clean_for_speech(audio_text)
978
+
979
+ # Create sanitized title for filename
980
+ safe_title = paper['title'].lower()
981
+ safe_title = re.sub(r'[^\w\s-]', '', safe_title) # Remove special chars
982
+ safe_title = re.sub(r'\s+', '_', safe_title) # Replace spaces with underscores
983
+ safe_title = safe_title[:100] # Limit length
984
+
985
+ # Generate timestamp
986
+ timestamp = format_timestamp_prefix()
987
+
988
+ # Create filename with timestamp and title
989
+ filename = f"{timestamp}_{safe_title}.{st.session_state['audio_format']}"
990
+
991
+ # Create task for audio generation
992
+ async def generate_audio(text, filename):
993
+ rate_str = "0%"
994
+ pitch_str = "0Hz"
995
+ communicate = edge_tts.Communicate(text, st.session_state['tts_voice'])
996
+ await communicate.save(filename)
997
+ return filename
998
+
999
+ task = generate_audio(audio_text, filename)
1000
+ tasks.append((paper, task, filename))
1001
+
1002
+ except Exception as e:
1003
+ st.warning(f"Error preparing audio for paper {paper['title']}: {str(e)}")
1004
+ continue
1005
+
1006
+ # Process all audio generation tasks concurrently
1007
+ for paper, task, filename in tasks:
1008
+ try:
1009
+ audio_file = await task
1010
+ if audio_file:
1011
+ paper['full_audio'] = audio_file
1012
+ if st.session_state['enable_download']:
1013
+ paper['download_base64'] = create_download_link_with_cache(
1014
+ audio_file,
1015
+ st.session_state['audio_format']
1016
+ )
1017
+ except Exception as e:
1018
+ st.warning(f"Error generating audio for paper {paper['title']}: {str(e)}")
1019
+ paper['full_audio'] = None
1020
+ paper['download_base64'] = ''
1021
+
1022
+
1023
+
1024
+
1025
+
1026
+
1027
+
1028
+
1029
+
1030
+ # ─────────────────────────────────────────────────────────
1031
+ # 4. PAPER PROCESSING & DISPLAY
1032
+ # ─────────────────────────────────────────────────────────
1033
+
1034
+ def parse_arxiv_refs(ref_text: str) -> List[Dict[str, str]]:
1035
+ """Parse arxiv references with improved error handling."""
1036
+ if not ref_text:
1037
+ return []
1038
+
1039
+ with PerformanceTimer("parse_refs"):
1040
+ results = []
1041
+ current_paper = {}
1042
+ lines = ref_text.split('\n')
1043
+
1044
+ for i, line in enumerate(lines):
1045
+ try:
1046
+ if line.count('|') == 2:
1047
+ # Found a new paper line
1048
+ if current_paper:
1049
+ results.append(current_paper)
1050
+ if len(results) >= 20: # Limit to 20 papers
1051
+ break
1052
+
1053
+ # Parse header parts
1054
+ header_parts = line.strip('* ').split('|')
1055
+ date = header_parts[0].strip()
1056
+ title = header_parts[1].strip()
1057
+ url_match = re.search(r'(https://arxiv.org/\S+)', line)
1058
+ url = url_match.group(1) if url_match else f"paper_{len(results)}"
1059
+
1060
+ current_paper = {
1061
+ 'date': date,
1062
+ 'title': title,
1063
+ 'url': url,
1064
+ 'authors': '',
1065
+ 'summary': '',
1066
+ 'full_audio': None,
1067
+ 'download_base64': '',
1068
+ }
1069
+
1070
+ elif current_paper:
1071
+ # Add content to current paper
1072
+ line = line.strip('* ')
1073
+ if not current_paper['authors']:
1074
+ current_paper['authors'] = line
1075
+ else:
1076
+ if current_paper['summary']:
1077
+ current_paper['summary'] += ' ' + line
1078
+ else:
1079
+ current_paper['summary'] = line
1080
+
1081
+ except Exception as e:
1082
+ st.warning(f"Error parsing line {i}: {str(e)}")
1083
+ continue
1084
+
1085
+ # Add final paper if exists
1086
+ if current_paper:
1087
+ results.append(current_paper)
1088
+
1089
+ return results[:20] # Ensure we don't exceed 20 papers
1090
+
1091
+ async def create_paper_audio_files(papers: List[Dict], input_question: str):
1092
+ """Generate audio files for papers asynchronously with progress tracking."""
1093
+ with PerformanceTimer("paper_audio_generation"):
1094
+ tasks = []
1095
+ for paper in papers:
1096
+ try:
1097
+ # Prepare text for audio generation
1098
+ audio_text = f"{paper['title']} by {paper['authors']}. {paper['summary']}"
1099
+ audio_text = clean_for_speech(audio_text)
1100
+
1101
+ # Create task for audio generation
1102
+ task = async_edge_tts_generate(
1103
+ audio_text,
1104
+ voice=st.session_state['tts_voice'],
1105
+ file_format=st.session_state['audio_format']
1106
+ )
1107
+ tasks.append((paper, task))
1108
+
1109
+ except Exception as e:
1110
+ st.warning(f"Error preparing audio for paper {paper['title']}: {str(e)}")
1111
+ continue
1112
+
1113
+ # Process all audio generation tasks concurrently
1114
+ for paper, task in tasks:
1115
+ try:
1116
+ audio_file, gen_time = await task
1117
+ if audio_file:
1118
+ paper['full_audio'] = audio_file
1119
+ if st.session_state['enable_download']:
1120
+ paper['download_base64'] = create_download_link_with_cache(
1121
+ audio_file,
1122
+ st.session_state['audio_format']
1123
+ )
1124
+ except Exception as e:
1125
+ st.warning(f"Error generating audio for paper {paper['title']}: {str(e)}")
1126
+ paper['full_audio'] = None
1127
+ paper['download_base64'] = ''
1128
+
1129
+
1130
+ def initialize_marquee_settings():
1131
+ """Initialize default marquee settings if not present in session state."""
1132
+ if 'marquee_settings' not in st.session_state:
1133
+ st.session_state['marquee_settings'] = {
1134
+ "background": "#1E1E1E",
1135
+ "color": "#FFFFFF",
1136
+ "font-size": "14px",
1137
+ "animationDuration": "20s",
1138
+ "width": "100%",
1139
+ "lineHeight": "35px"
1140
+ }
1141
+
1142
+ def get_marquee_settings():
1143
+ """Get current marquee settings, initializing if needed."""
1144
+ initialize_marquee_settings()
1145
+ return st.session_state['marquee_settings']
1146
+
1147
+ def update_marquee_settings_ui():
1148
+ """Add color pickers & sliders for marquee configuration in sidebar."""
1149
+ st.sidebar.markdown("### 🎯 Marquee Settings")
1150
+
1151
+ # Create two columns for settings
1152
+ cols = st.sidebar.columns(2)
1153
+
1154
+ # Column 1: Color settings
1155
+ with cols[0]:
1156
+ # Background color picker
1157
+ bg_color = st.color_picker(
1158
+ "🎨 Background",
1159
+ st.session_state['marquee_settings']["background"],
1160
+ key="bg_color_picker"
1161
+ )
1162
+
1163
+ # Text color picker
1164
+ text_color = st.color_picker(
1165
+ "✍️ Text Color",
1166
+ st.session_state['marquee_settings']["color"],
1167
+ key="text_color_picker"
1168
+ )
1169
+
1170
+ # Column 2: Size and speed settings
1171
+ with cols[1]:
1172
+ # Font size slider
1173
+ font_size = st.slider(
1174
+ "πŸ“ Font Size",
1175
+ 10, 24, 14,
1176
+ key="font_size_slider"
1177
+ )
1178
+
1179
+ # Animation duration slider
1180
+ duration = st.slider(
1181
+ "⏱️ Animation Speed",
1182
+ 1, 20, 20,
1183
+ key="duration_slider"
1184
+ )
1185
+
1186
+ # Update session state with new settings
1187
+ st.session_state['marquee_settings'].update({
1188
+ "background": bg_color,
1189
+ "color": text_color,
1190
+ "font-size": f"{font_size}px",
1191
+ "animationDuration": f"{duration}s"
1192
+ })
1193
+
1194
+ def display_marquee(text: str, settings: dict, key_suffix: str = ""):
1195
+ """Show marquee text with specified style settings."""
1196
+ # Truncate long text to prevent performance issues
1197
+ truncated_text = text[:280] + "..." if len(text) > 280 else text
1198
+
1199
+ # Display the marquee
1200
+ streamlit_marquee(
1201
+ content=truncated_text,
1202
+ **settings,
1203
+ key=f"marquee_{key_suffix}"
1204
+ )
1205
+
1206
+ # Add spacing after marquee
1207
+ st.write("")
1208
+
1209
+ def create_paper_links_md(papers: list) -> str:
1210
+ """Creates a minimal markdown file linking to each paper's arxiv URL."""
1211
+ lines = ["# Paper Links\n"]
1212
+ for i, p in enumerate(papers, start=1):
1213
+ lines.append(f"{i}. **{p['title']}** β€” [Arxiv]({p['url']})")
1214
+ return "\n".join(lines)
1215
+
1216
+ def apply_custom_styling():
1217
+ """Apply custom CSS styling to the app."""
1218
+ st.markdown("""
1219
+ <style>
1220
+ .main {
1221
+ background: linear-gradient(to right, #1a1a1a, #2d2d2d);
1222
+ color: #fff;
1223
+ }
1224
+ .stMarkdown {
1225
+ font-family: 'Helvetica Neue', sans-serif;
1226
+ }
1227
+ .stButton>button {
1228
+ margin-right: 0.5rem;
1229
+ }
1230
+ .streamlit-marquee {
1231
+ margin: 1rem 0;
1232
+ border-radius: 4px;
1233
+ }
1234
+ .st-emotion-cache-1y4p8pa {
1235
+ padding: 1rem;
1236
+ }
1237
+ </style>
1238
+ """, unsafe_allow_html=True)
1239
+
1240
+ def display_performance_metrics(timings: dict):
1241
+ """Display performance metrics with visualizations."""
1242
+ st.sidebar.markdown("### ⏱️ Performance Metrics")
1243
+
1244
+ # Calculate total time
1245
+ total_time = sum(timings.values())
1246
+ st.sidebar.write(f"**Total Processing Time:** {total_time:.2f}s")
1247
+
1248
+ # Show breakdown of operations
1249
+ st.sidebar.markdown("#### Operation Breakdown")
1250
+ for operation, duration in timings.items():
1251
+ percentage = (duration / total_time) * 100 if total_time > 0 else 0
1252
+ st.sidebar.write(f"**{operation}:** {duration:.2f}s ({percentage:.1f}%)")
1253
+
1254
+ # Create a progress bar for visual representation
1255
+ st.sidebar.progress(percentage / 100)
1256
+
1257
+
1258
+
1259
+
1260
+ def display_papers(papers: List[Dict], marquee_settings: Dict):
1261
+ """Display paper information with enhanced visualization."""
1262
+ with PerformanceTimer("paper_display"):
1263
+ st.write("## πŸ“š Research Papers")
1264
+
1265
+ # Create tabs for different views
1266
+ tab1, tab2 = st.tabs(["πŸ“‹ List View", "πŸ“Š Grid View"])
1267
+
1268
+ with tab1:
1269
+ for i, paper in enumerate(papers, start=1):
1270
+ # Create marquee for paper title
1271
+ marquee_text = f"πŸ“„ {paper['title']} | πŸ‘€ {paper['authors'][:120]}"
1272
+ display_marquee(marquee_text, marquee_settings, key_suffix=f"paper_{i}")
1273
+
1274
+ # Paper details expander
1275
+ with st.expander(f"{i}. πŸ“„ {paper['title']}", expanded=True):
1276
+ # Create PDF link
1277
+ pdf_url = paper['url'].replace('/abs/', '/pdf/')
1278
+
1279
+ # Display paper information
1280
+ st.markdown(f"""
1281
+ **Date:** {paper['date']}
1282
+ **Title:** {paper['title']}
1283
+ **Links:** πŸ“„ [Abstract]({paper['url']}) | πŸ“‘ [PDF]({pdf_url})
1284
+ """)
1285
+ st.markdown(f"**Authors:** {paper['authors']}")
1286
+ st.markdown(f"**Summary:** {paper['summary']}")
1287
+
1288
+ # Audio player and download if available
1289
+ if paper.get('full_audio'):
1290
+ st.write("🎧 Paper Audio Summary")
1291
+ st.audio(paper['full_audio'])
1292
+ if paper['download_base64']:
1293
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
1294
+
1295
+ with tab2:
1296
+ # Create a grid layout of papers
1297
+ cols = st.columns(3)
1298
+ for i, paper in enumerate(papers):
1299
+ with cols[i % 3]:
1300
+ st.markdown(f"""
1301
+ ### πŸ“„ {paper['title'][:50]}...
1302
+ **Date:** {paper['date']}
1303
+ [Abstract]({paper['url']}) | [PDF]({paper['url'].replace('/abs/', '/pdf/')})
1304
+ """)
1305
+ if paper.get('full_audio'):
1306
+ st.audio(paper['full_audio'])
1307
+
1308
+ def display_papers_in_sidebar(papers: List[Dict]):
1309
+ """Display paper listing in sidebar with lazy loading."""
1310
+ with PerformanceTimer("sidebar_display"):
1311
+ st.sidebar.title("πŸ“š Papers Overview")
1312
+
1313
+ # Add filter options
1314
+ filter_date = st.sidebar.date_input("Filter by date:", None)
1315
+ search_term = st.sidebar.text_input("Search papers:", "")
1316
+
1317
+ # Filter papers based on criteria
1318
+ filtered_papers = papers
1319
+ if filter_date:
1320
+ filtered_papers = [p for p in filtered_papers
1321
+ if filter_date.strftime("%Y-%m-%d") in p['date']]
1322
+ if search_term:
1323
+ search_lower = search_term.lower()
1324
+ filtered_papers = [p for p in filtered_papers
1325
+ if search_lower in p['title'].lower()
1326
+ or search_lower in p['authors'].lower()]
1327
+
1328
+ # Display filtered papers
1329
+ for i, paper in enumerate(filtered_papers, start=1):
1330
+ paper_key = f"paper_{paper['url']}"
1331
+ if paper_key not in st.session_state:
1332
+ st.session_state[paper_key] = False
1333
+
1334
+ with st.sidebar.expander(f"{i}. {paper['title'][:50]}...", expanded=False):
1335
+ # Paper metadata
1336
+ st.markdown(f"**Date:** {paper['date']}")
1337
+
1338
+ # Links
1339
+ pdf_url = paper['url'].replace('/abs/', '/pdf/')
1340
+ st.markdown(f"πŸ“„ [Abstract]({paper['url']}) | πŸ“‘ [PDF]({pdf_url})")
1341
+
1342
+ # Preview of authors and summary
1343
+ st.markdown(f"**Authors:** {paper['authors'][:100]}...")
1344
+ if paper['summary']:
1345
+ st.markdown(f"**Summary:** {paper['summary'][:200]}...")
1346
+
1347
+ # Audio controls
1348
+ if paper['full_audio']:
1349
+ if st.button("🎡 Load Audio", key=f"btn_{paper_key}"):
1350
+ st.session_state[paper_key] = True
1351
+
1352
+ if st.session_state[paper_key]:
1353
+ st.audio(paper['full_audio'])
1354
+ if paper['download_base64']:
1355
+ st.markdown(paper['download_base64'], unsafe_allow_html=True)
1356
+
1357
+ # ─────────────────────────────────────────────────────────
1358
+ # 5. FILE MANAGEMENT & HISTORY
1359
+ # ─────────────────────────────────────────────────��───────
1360
+
1361
+ def create_file(prompt: str, response: str, file_type: str = "md") -> str:
1362
+ """Create a file with proper naming and error handling."""
1363
+ with PerformanceTimer("file_creation"):
1364
+ try:
1365
+ # Generate filename
1366
+ filename = generate_filename(prompt.strip(), response.strip(), file_type)
1367
+
1368
+ # Ensure directory exists
1369
+ os.makedirs("generated_files", exist_ok=True)
1370
+ filepath = os.path.join("generated_files", filename)
1371
+
1372
+ # Write content
1373
+ with open(filepath, 'w', encoding='utf-8') as f:
1374
+ if file_type == "md":
1375
+ f.write(f"# Query\n{prompt}\n\n# Response\n{response}")
1376
+ else:
1377
+ f.write(f"{prompt}\n\n{response}")
1378
+
1379
+ return filepath
1380
+
1381
+ except Exception as e:
1382
+ st.error(f"Error creating file: {str(e)}")
1383
+ return ""
1384
+
1385
+ def get_high_info_terms(text: str, top_n: int = 10) -> List[str]:
1386
+ """Extract most informative terms from text."""
1387
+ # Common English stop words to filter out
1388
+ stop_words = set([
1389
+ 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to',
1390
+ 'for', 'of', 'with', 'by', 'from', 'up', 'about', 'into', 'over',
1391
+ 'after', 'the', 'this', 'that', 'these', 'those', 'what', 'which'
1392
+ ])
1393
+
1394
+ # Extract words and bi-grams
1395
+ words = re.findall(r'\b\w+(?:-\w+)*\b', text.lower())
1396
+ bi_grams = [' '.join(pair) for pair in zip(words, words[1:])]
1397
+
1398
+ # Combine and filter terms
1399
+ combined = words + bi_grams
1400
+ filtered = [term for term in combined
1401
+ if term not in stop_words
1402
+ and len(term.split()) <= 2
1403
+ and len(term) > 3]
1404
+
1405
+ # Count and return top terms
1406
+ counter = Counter(filtered)
1407
+ return [term for term, freq in counter.most_common(top_n)]
1408
+
1409
+ def clean_text_for_filename(text: str) -> str:
1410
+ """Clean text for use in filenames."""
1411
+ # Remove special characters
1412
+ text = text.lower()
1413
+ text = re.sub(r'[^\w\s-]', '', text)
1414
+
1415
+ # Remove common unhelpful words
1416
+ stop_words = set([
1417
+ 'the', 'and', 'for', 'with', 'this', 'that', 'what', 'which',
1418
+ 'where', 'when', 'why', 'how', 'who', 'whom', 'whose', 'ai',
1419
+ 'library', 'function', 'method', 'class', 'object', 'variable'
1420
+ ])
1421
+
1422
+ words = text.split()
1423
+ filtered = [w for w in words if len(w) > 3 and w not in stop_words]
1424
+
1425
+ return '_'.join(filtered)[:200]
1426
+
1427
+ def generate_filename(prompt: str, response: str, file_type: str = "md",
1428
+ max_length: int = 200) -> str:
1429
+ """Generate descriptive filename from content."""
1430
+ # Get timestamp prefix
1431
+ prefix = format_timestamp_prefix() + "_"
1432
+
1433
+ # Extract informative terms
1434
+ combined_text = (prompt + " " + response)[:500]
1435
+ info_terms = get_high_info_terms(combined_text, top_n=5)
1436
+
1437
+ # Get content snippet
1438
+ snippet = (prompt[:40] + " " + response[:40]).strip()
1439
+ snippet_cleaned = clean_text_for_filename(snippet)
1440
+
1441
+ # Combine and deduplicate parts
1442
+ name_parts = info_terms + [snippet_cleaned]
1443
+ seen = set()
1444
+ unique_parts = []
1445
+ for part in name_parts:
1446
+ if part not in seen:
1447
+ seen.add(part)
1448
+ unique_parts.append(part)
1449
+
1450
+ # Create final filename
1451
+ full_name = '_'.join(unique_parts).strip('_')
1452
+ leftover_chars = max_length - len(prefix) - len(file_type) - 1
1453
+ if len(full_name) > leftover_chars:
1454
+ full_name = full_name[:leftover_chars]
1455
+
1456
+ return f"{prefix}{full_name}.{file_type}"
1457
+
1458
+ def create_zip_of_files(md_files: List[str], mp3_files: List[str],
1459
+ wav_files: List[str], input_question: str) -> Optional[str]:
1460
+ """Create zip archive of files with optimization."""
1461
+ with PerformanceTimer("zip_creation"):
1462
+ # Filter out readme and empty files
1463
+ md_files = [f for f in md_files
1464
+ if os.path.basename(f).lower() != 'readme.md'
1465
+ and os.path.getsize(f) > 0]
1466
+
1467
+ all_files = md_files + mp3_files + wav_files
1468
+ if not all_files:
1469
+ return None
1470
+
1471
+ try:
1472
+ # Generate zip name
1473
+ all_content = []
1474
+ for f in all_files:
1475
+ if f.endswith('.md'):
1476
+ with open(f, 'r', encoding='utf-8') as file:
1477
+ all_content.append(file.read())
1478
+ elif f.endswith(('.mp3', '.wav')):
1479
+ basename = os.path.splitext(os.path.basename(f))[0]
1480
+ all_content.append(basename.replace('_', ' '))
1481
+
1482
+ all_content.append(input_question)
1483
+ combined_content = " ".join(all_content)
1484
+ info_terms = get_high_info_terms(combined_content, top_n=10)
1485
+
1486
+ timestamp = format_timestamp_prefix()
1487
+ name_text = '-'.join(term for term in info_terms[:5])
1488
+ zip_name = f"archive_{timestamp}_{name_text[:50]}.zip"
1489
+
1490
+ # Create zip file
1491
+ with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as z:
1492
+ for f in all_files:
1493
+ z.write(f, os.path.basename(f))
1494
+
1495
+ return zip_name
1496
+
1497
+ except Exception as e:
1498
+ st.error(f"Error creating zip archive: {str(e)}")
1499
+ return None
1500
+
1501
+ # ─────────────────────────────────────────────────────────
1502
+ # 6. OPTIMIZED AI LOOKUP & PROCESSING
1503
+ # ─────────────────────────────────────────────────────────
1504
+
1505
+ def perform_ai_lookup(q: str, vocal_summary: bool = True,
1506
+ extended_refs: bool = False,
1507
+ titles_summary: bool = True,
1508
+ full_audio: bool = False) -> Tuple[str, Dict[str, float]]:
1509
+ """Main AI lookup routine with performance optimization."""
1510
+ with PerformanceTimer("total_lookup") as total_timer:
1511
+ timings = {}
1512
+
1513
+ # Add operation controls if not present
1514
+ if 'operation_controls' not in st.session_state:
1515
+ st.sidebar.markdown("### πŸ”§ Operation Controls")
1516
+ st.session_state['enable_claude'] = st.sidebar.checkbox(
1517
+ "Enable Claude Search",
1518
+ value=st.session_state['enable_claude']
1519
+ )
1520
+ st.session_state['enable_audio'] = st.sidebar.checkbox(
1521
+ "Generate Audio",
1522
+ value=st.session_state['enable_audio']
1523
+ )
1524
+ st.session_state['enable_download'] = st.sidebar.checkbox(
1525
+ "Create Download Links",
1526
+ value=st.session_state['enable_download']
1527
+ )
1528
+ st.session_state['operation_controls'] = True
1529
+
1530
+ result = ""
1531
+
1532
+ # 1. Claude API (if enabled)
1533
+ if st.session_state['enable_claude']:
1534
+ with PerformanceTimer("claude_api") as claude_timer:
1535
+ try:
1536
+ client = anthropic.Anthropic(api_key=anthropic_key)
1537
+ response = client.messages.create(
1538
+ model="claude-3-sonnet-20240229",
1539
+ max_tokens=1000,
1540
+ messages=[{"role": "user", "content": q}]
1541
+ )
1542
+ st.write("Claude's reply 🧠:")
1543
+ st.markdown(response.content[0].text)
1544
+ result = response.content[0].text
1545
+ timings['claude_api'] = time.time() - claude_timer.start_time
1546
+ except Exception as e:
1547
+ st.error(f"Error with Claude API: {str(e)}")
1548
+ result = "Error occurred during Claude API call"
1549
+ timings['claude_api'] = 0
1550
+
1551
+ # 2. Async save and audio generation
1552
+ async def process_results():
1553
+ with PerformanceTimer("results_processing") as proc_timer:
1554
+ md_file, audio_file, md_time, audio_time = await async_save_qa_with_audio(
1555
+ q, result
1556
+ )
1557
+ timings['markdown_save'] = md_time
1558
+ timings['audio_generation'] = audio_time
1559
+
1560
+ if audio_file and st.session_state['enable_audio']:
1561
+ st.subheader("πŸ“ Main Response Audio")
1562
+ st.audio(audio_file)
1563
+
1564
+ if st.session_state['enable_download']:
1565
+ st.markdown(
1566
+ create_download_link_with_cache(
1567
+ audio_file,
1568
+ st.session_state['audio_format']
1569
+ ),
1570
+ unsafe_allow_html=True
1571
+ )
1572
+
1573
+ # Run async operations
1574
+ asyncio.run(process_results())
1575
+
1576
+ # 3. Arxiv RAG with performance tracking
1577
+ if st.session_state['enable_claude']:
1578
+ with PerformanceTimer("arxiv_rag") as rag_timer:
1579
+ try:
1580
+ st.write('Running Arxiv RAG with Claude inputs.')
1581
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
1582
+ refs = client.predict(
1583
+ q,
1584
+ 10,
1585
+ "Semantic Search",
1586
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
1587
+ api_name="/update_with_rag_md"
1588
+ )[0]
1589
+ timings['arxiv_rag'] = time.time() - rag_timer.start_time
1590
+
1591
+ # Process papers asynchronously
1592
+ papers = parse_arxiv_refs(refs)
1593
+ if papers:
1594
+ with PerformanceTimer("paper_processing") as paper_timer:
1595
+ async def process_papers():
1596
+ # Create minimal links page
1597
+ paper_links = create_paper_links_md(papers)
1598
+ links_file = create_file(q, paper_links, "md")
1599
+ st.markdown(paper_links)
1600
+
1601
+ # Generate audio and display papers
1602
+ await create_paper_audio_files(papers, q)
1603
+ display_papers(papers, get_marquee_settings())
1604
+ display_papers_in_sidebar(papers)
1605
+
1606
+ asyncio.run(process_papers())
1607
+ timings['paper_processing'] = time.time() - paper_timer.start_time
1608
+ else:
1609
+ st.warning("No papers found in the response.")
1610
+ except Exception as e:
1611
+ st.error(f"Error during Arxiv RAG: {str(e)}")
1612
+ timings['arxiv_rag'] = 0
1613
+
1614
+ return result, timings
1615
+
1616
+ def process_voice_input(text: str):
1617
+ """Process voice input with enhanced error handling and feedback."""
1618
+ if not text:
1619
+ st.warning("Please provide some input text.")
1620
+ return
1621
+
1622
+ with PerformanceTimer("voice_processing"):
1623
+ try:
1624
+ st.subheader("πŸ” Search Results")
1625
+ result, timings = perform_ai_lookup(
1626
+ text,
1627
+ vocal_summary=True,
1628
+ extended_refs=False,
1629
+ titles_summary=True,
1630
+ full_audio=True
1631
+ )
1632
+
1633
+ # Save results
1634
+ md_file, audio_file = save_qa_with_audio(text, result)
1635
+
1636
+ # Display results
1637
+ st.subheader("πŸ“ Generated Files")
1638
+ col1, col2 = st.columns(2)
1639
+ with col1:
1640
+ st.write(f"πŸ“„ Markdown: {os.path.basename(md_file)}")
1641
+ st.markdown(get_download_link(md_file, "md"), unsafe_allow_html=True)
1642
+
1643
+ with col2:
1644
+ if audio_file:
1645
+ st.write(f"🎡 Audio: {os.path.basename(audio_file)}")
1646
+ play_and_download_audio(
1647
+ audio_file,
1648
+ st.session_state['audio_format']
1649
+ )
1650
+
1651
+ except Exception as e:
1652
+ st.error(f"Error processing voice input: {str(e)}")
1653
+
1654
+ # ─────────────────────────────────────────────────────────
1655
+ # 7. SIDEBAR AND FILE HISTORY
1656
+ # ─────────────────────────────────────────────────────────
1657
+
1658
+ def display_file_history_in_sidebar():
1659
+ """Display file history with enhanced organization and filtering."""
1660
+ with PerformanceTimer("file_history"):
1661
+ st.sidebar.markdown("---")
1662
+ st.sidebar.markdown("### πŸ“‚ File History")
1663
+
1664
+ # Gather all files
1665
+ md_files = glob.glob("*.md")
1666
+ mp3_files = glob.glob("*.mp3")
1667
+ wav_files = glob.glob("*.wav")
1668
+ all_files = md_files + mp3_files + wav_files
1669
+
1670
+ if not all_files:
1671
+ st.sidebar.write("No files found.")
1672
+ return
1673
+
1674
+ # Add file management controls
1675
+ col1, col2 = st.sidebar.columns(2)
1676
+ with col1:
1677
+ if st.button("πŸ—‘ Delete All"):
1678
+ try:
1679
+ for f in all_files:
1680
+ os.remove(f)
1681
+ st.session_state.should_rerun = True
1682
+ st.success("All files deleted successfully.")
1683
+ except Exception as e:
1684
+ st.error(f"Error deleting files: {str(e)}")
1685
+
1686
+ with col2:
1687
+ if st.button("⬇️ Zip All"):
1688
+ zip_name = create_zip_of_files(
1689
+ md_files,
1690
+ mp3_files,
1691
+ wav_files,
1692
+ st.session_state.get('last_query', '')
1693
+ )
1694
+ if zip_name:
1695
+ st.sidebar.markdown(
1696
+ get_download_link(zip_name, "zip"),
1697
+ unsafe_allow_html=True
1698
+ )
1699
+
1700
+ # Add file filtering options
1701
+ st.sidebar.markdown("### πŸ” Filter Files")
1702
+ file_search = st.sidebar.text_input("Search files:", "")
1703
+ file_type_filter = st.sidebar.multiselect(
1704
+ "File types:",
1705
+ ["Markdown", "Audio"],
1706
+ default=["Markdown", "Audio"]
1707
+ )
1708
+
1709
+ # Sort files by modification time
1710
+ all_files.sort(key=os.path.getmtime, reverse=True)
1711
+
1712
+ # Filter files based on search and type
1713
+ filtered_files = []
1714
+ for f in all_files:
1715
+ if file_search.lower() in f.lower():
1716
+ ext = os.path.splitext(f)[1].lower()
1717
+ if (("Markdown" in file_type_filter and ext == ".md") or
1718
+ ("Audio" in file_type_filter and ext in [".mp3", ".wav"])):
1719
+ filtered_files.append(f)
1720
+
1721
+ # Display filtered files
1722
+ for f in filtered_files:
1723
+ fname = os.path.basename(f)
1724
+ ext = os.path.splitext(fname)[1].lower().strip('.')
1725
+ emoji = FILE_EMOJIS.get(ext, 'πŸ“¦')
1726
+
1727
+ # Get file metadata
1728
+ mod_time = datetime.fromtimestamp(os.path.getmtime(f))
1729
+ time_str = mod_time.strftime("%Y-%m-%d %H:%M:%S")
1730
+ file_size = os.path.getsize(f) / 1024 # Size in KB
1731
+
1732
+ with st.sidebar.expander(f"{emoji} {fname}"):
1733
+ st.write(f"**Modified:** {time_str}")
1734
+ st.write(f"**Size:** {file_size:.1f} KB")
1735
+
1736
+ if ext == "md":
1737
+ try:
1738
+ with open(f, "r", encoding="utf-8") as file_in:
1739
+ snippet = file_in.read(200).replace("\n", " ")
1740
+ if len(snippet) == 200:
1741
+ snippet += "..."
1742
+ st.write(snippet)
1743
+ st.markdown(
1744
+ get_download_link(f, file_type="md"),
1745
+ unsafe_allow_html=True
1746
+ )
1747
+ except Exception as e:
1748
+ st.error(f"Error reading markdown file: {str(e)}")
1749
+
1750
+ elif ext in ["mp3", "wav"]:
1751
+ st.audio(f)
1752
+ st.markdown(
1753
+ get_download_link(f, file_type=ext),
1754
+ unsafe_allow_html=True
1755
+ )
1756
+
1757
+ else:
1758
+ st.markdown(get_download_link(f), unsafe_allow_html=True)
1759
+
1760
+ # ─────────────────────────────────────────────────────────
1761
+ # 8. MAIN APPLICATION
1762
+ # ─────────────────────────────────────────────────────────
1763
+
1764
+ def main():
1765
+ """Main application entry point with enhanced UI and error handling."""
1766
+ try:
1767
+ # 1. Setup marquee UI in sidebar
1768
+ update_marquee_settings_ui()
1769
+ marquee_settings = get_marquee_settings()
1770
+
1771
+ # 2. Display welcome marquee
1772
+ display_marquee(
1773
+ st.session_state['marquee_content'],
1774
+ {**marquee_settings, "font-size": "28px", "lineHeight": "50px"},
1775
+ key_suffix="welcome"
1776
+ )
1777
+
1778
+ # 3. Main action tabs
1779
+ tab_main = st.radio(
1780
+ "Action:",
1781
+ ["🎀 Voice", "πŸ“Έ Media", "πŸ” ArXiv", "πŸ“ Editor"],
1782
+ horizontal=True
1783
+ )
1784
+
1785
+ # Custom component usage
1786
+ mycomponent = components.declare_component(
1787
+ "mycomponent",
1788
+ path="mycomponent"
1789
+ )
1790
+ val = mycomponent(my_input_value="Hello")
1791
+
1792
+ if val:
1793
+ # Process input value
1794
+ val_stripped = val.replace('\\n', ' ')
1795
+ edited_input = st.text_area(
1796
+ "✏️ Edit Input:",
1797
+ value=val_stripped,
1798
+ height=100
1799
+ )
1800
+
1801
+ # Model selection and options
1802
+ run_option = st.selectbox("Model:", ["Arxiv"])
1803
+ col1, col2 = st.columns(2)
1804
+
1805
+ with col1:
1806
+ #autorun = st.checkbox("βš™ AutoRun", value=True)
1807
+ autorun = st.checkbox("βš™ AutoRun", value=False)
1808
+ with col2:
1809
+ full_audio = st.checkbox("πŸ“š FullAudio", value=False)
1810
+
1811
+ # Check for input changes
1812
+ input_changed = (val != st.session_state.old_val)
1813
+
1814
+ if autorun and input_changed:
1815
+ st.session_state.old_val = val
1816
+ st.session_state.last_query = edited_input
1817
+ result, timings = perform_ai_lookup(
1818
+ edited_input,
1819
+ vocal_summary=True,
1820
+ extended_refs=False,
1821
+ titles_summary=True,
1822
+ full_audio=full_audio
1823
+ )
1824
+
1825
+ # Display performance metrics
1826
+ display_performance_metrics(timings)
1827
+
1828
+ else:
1829
+ if st.button("β–Ά Run"):
1830
+ st.session_state.old_val = val
1831
+ st.session_state.last_query = edited_input
1832
+ result, timings = perform_ai_lookup(
1833
+ edited_input,
1834
+ vocal_summary=True,
1835
+ extended_refs=False,
1836
+ titles_summary=True,
1837
+ full_audio=full_audio
1838
+ )
1839
+
1840
+ # Display performance metrics
1841
+ display_performance_metrics(timings)
1842
+
1843
+ # Tab-specific content
1844
+ if tab_main == "πŸ” ArXiv":
1845
+ display_arxiv_tab()
1846
+ elif tab_main == "🎀 Voice":
1847
+ display_voice_tab()
1848
+ elif tab_main == "πŸ“Έ Media":
1849
+ display_media_tab()
1850
+ elif tab_main == "πŸ“ Editor":
1851
+ display_editor_tab()
1852
+
1853
+ # Display file history
1854
+ display_file_history_in_sidebar()
1855
+
1856
+ # Apply styling
1857
+ apply_custom_styling()
1858
+
1859
+ # Check for rerun
1860
+ if st.session_state.should_rerun:
1861
+ st.session_state.should_rerun = False
1862
+ st.rerun()
1863
+
1864
+ except Exception as e:
1865
+ st.error(f"An error occurred in the main application: {str(e)}")
1866
+ st.info("Please try refreshing the page or contact support if the issue persists.")
1867
+
1868
+ if __name__ == "__main__":
1869
+ main()