awacke1 commited on
Commit
f8d1729
Β·
verified Β·
1 Parent(s): f02b408

Create version16.audiosidebarforresults.app.py

Browse files
version16.audiosidebarforresults.app.py ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, textract, time, zipfile
3
+ import plotly.graph_objects as go
4
+ import streamlit.components.v1 as components
5
+ from datetime import datetime
6
+ from audio_recorder_streamlit import audio_recorder
7
+ from bs4 import BeautifulSoup
8
+ from collections import deque
9
+ from dotenv import load_dotenv
10
+ from gradio_client import Client
11
+ from huggingface_hub import InferenceClient
12
+ from io import BytesIO
13
+ from PIL import Image
14
+ from PyPDF2 import PdfReader
15
+ from urllib.parse import quote
16
+ from xml.etree import ElementTree as ET
17
+ from openai import OpenAI
18
+ import extra_streamlit_components as stx
19
+ from streamlit.runtime.scriptrunner import get_script_run_ctx
20
+ import asyncio
21
+ import edge_tts # ensure this is installed (pip install edge-tts)
22
+
23
+ # πŸ”§ Config & Setup
24
+ st.set_page_config(
25
+ page_title="🚲BikeAIπŸ† Claude/GPT Research",
26
+ page_icon="πŸš²πŸ†",
27
+ layout="wide",
28
+ initial_sidebar_state="auto",
29
+ menu_items={
30
+ 'Get Help': 'https://huggingface.co/awacke1',
31
+ 'Report a bug': 'https://huggingface.co/spaces/awacke1',
32
+ 'About': "🚲BikeAIπŸ† Claude/GPT Research AI"
33
+ }
34
+ )
35
+ load_dotenv()
36
+ openai.api_key = os.getenv('OPENAI_API_KEY') or st.secrets['OPENAI_API_KEY']
37
+ anthropic_key = os.getenv("ANTHROPIC_API_KEY_3") or st.secrets["ANTHROPIC_API_KEY"]
38
+ claude_client = anthropic.Anthropic(api_key=anthropic_key)
39
+ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID'))
40
+ HF_KEY = os.getenv('HF_KEY')
41
+ API_URL = os.getenv('API_URL')
42
+
43
+ st.session_state.setdefault('transcript_history', [])
44
+ st.session_state.setdefault('chat_history', [])
45
+ st.session_state.setdefault('openai_model', "gpt-4o-2024-05-13")
46
+ st.session_state.setdefault('messages', [])
47
+ st.session_state.setdefault('last_voice_input', "")
48
+
49
+ # 🎨 Minimal Custom CSS
50
+ st.markdown("""
51
+ <style>
52
+ .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
53
+ .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
54
+ </style>
55
+ """, unsafe_allow_html=True)
56
+
57
+ # πŸ”‘ Common Utilities
58
+ def generate_filename(prompt, file_type="md"):
59
+ ctz = pytz.timezone('US/Central')
60
+ date_str = datetime.now(ctz).strftime("%m%d_%H%M")
61
+ safe = re.sub(r'[<>:"/\\\\|?*\n]', ' ', prompt)
62
+ safe = re.sub(r'\s+', ' ', safe).strip()[:90]
63
+ return f"{date_str}_{safe}.{file_type}"
64
+
65
+ def create_file(filename, prompt, response):
66
+ with open(filename, 'w', encoding='utf-8') as f:
67
+ f.write(prompt + "\n\n" + response)
68
+
69
+ def get_download_link(file):
70
+ with open(file, "rb") as f:
71
+ b64 = base64.b64encode(f.read()).decode()
72
+ return f'<a href="data:file/txt;base64,{b64}" download="{os.path.basename(file)}">πŸ“‚ Download {os.path.basename(file)}</a>'
73
+
74
+ @st.cache_resource
75
+ def speech_synthesis_html(result):
76
+ # This old function can remain as a fallback, but we won't use it after integrating EdgeTTS.
77
+ html_code = f"""
78
+ <html><body>
79
+ <script>
80
+ var msg = new SpeechSynthesisUtterance("{result.replace('"', '')}");
81
+ window.speechSynthesis.speak(msg);
82
+ </script>
83
+ </body></html>
84
+ """
85
+ components.html(html_code, height=0)
86
+
87
+ #------------add EdgeTTS
88
+ # --- NEW FUNCTIONS FOR EDGE TTS ---
89
+ async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0):
90
+ """
91
+ Generate audio from text using Edge TTS and return the path to the MP3 file.
92
+ """
93
+ if not text.strip():
94
+ return None
95
+ rate_str = f"{rate:+d}%"
96
+ pitch_str = f"{pitch:+d}Hz"
97
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
98
+ out_fn = generate_filename(text,"mp3")
99
+ await communicate.save(out_fn)
100
+ return out_fn
101
+
102
+ def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0):
103
+ """
104
+ Synchronous wrapper to call the async TTS generation and return the file path.
105
+ """
106
+ return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch))
107
+
108
+ def play_and_download_audio(file_path):
109
+ """
110
+ Display an audio player and a download link for the generated MP3 file.
111
+ """
112
+ if file_path and os.path.exists(file_path):
113
+ st.audio(file_path)
114
+ st.markdown(get_download_link(file_path), unsafe_allow_html=True)
115
+ #---------------------------
116
+
117
+ def process_image(image_path, user_prompt):
118
+ with open(image_path, "rb") as imgf:
119
+ image_data = imgf.read()
120
+ b64img = base64.b64encode(image_data).decode("utf-8")
121
+ resp = openai_client.chat.completions.create(
122
+ model=st.session_state["openai_model"],
123
+ messages=[
124
+ {"role": "system", "content": "You are a helpful assistant."},
125
+ {"role": "user", "content": [
126
+ {"type": "text", "text": user_prompt},
127
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64img}"}}
128
+ ]}
129
+ ],
130
+ temperature=0.0,
131
+ )
132
+ return resp.choices[0].message.content
133
+
134
+ def process_audio(audio_path):
135
+ with open(audio_path, "rb") as f:
136
+ transcription = openai_client.audio.transcriptions.create(model="whisper-1", file=f)
137
+ st.session_state.messages.append({"role": "user", "content": transcription.text})
138
+ return transcription.text
139
+
140
+ def process_video(video_path, seconds_per_frame=1):
141
+ vid = cv2.VideoCapture(video_path)
142
+ total = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
143
+ fps = vid.get(cv2.CAP_PROP_FPS)
144
+ skip = int(fps*seconds_per_frame)
145
+ frames_b64 = []
146
+ for i in range(0, total, skip):
147
+ vid.set(cv2.CAP_PROP_POS_FRAMES, i)
148
+ ret, frame = vid.read()
149
+ if not ret: break
150
+ _, buf = cv2.imencode(".jpg", frame)
151
+ frames_b64.append(base64.b64encode(buf).decode("utf-8"))
152
+ vid.release()
153
+ return frames_b64
154
+
155
+ def process_video_with_gpt(video_path, prompt):
156
+ frames = process_video(video_path)
157
+ resp = openai_client.chat.completions.create(
158
+ model=st.session_state["openai_model"],
159
+ messages=[
160
+ {"role":"system","content":"Analyze video frames."},
161
+ {"role":"user","content":[
162
+ {"type":"text","text":prompt},
163
+ *[{"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{fr}"}} for fr in frames]
164
+ ]}
165
+ ]
166
+ )
167
+ return resp.choices[0].message.content
168
+
169
+ def search_arxiv(query):
170
+ st.write("πŸ” Searching ArXiv...")
171
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
172
+ r1 = client.predict(prompt=query, llm_model_picked="mistralai/Mixtral-8x7B-Instruct-v0.1", stream_outputs=True, api_name="/ask_llm")
173
+ st.markdown("### Mistral-8x7B-Instruct-v0.1 Result")
174
+ st.markdown(r1)
175
+ r2 = client.predict(prompt=query, llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2", stream_outputs=True, api_name="/ask_llm")
176
+ st.markdown("### Mistral-7B-Instruct-v0.2 Result")
177
+ st.markdown(r2)
178
+ return f"{r1}\n\n{r2}"
179
+
180
+ def perform_ai_lookup(q):
181
+ start = time.time()
182
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
183
+ # Perform a RAG-based search
184
+ r = client.predict(q,20,"Semantic Search","mistralai/Mixtral-8x7B-Instruct-v0.1",api_name="/update_with_rag_md")
185
+ refs = r[0]
186
+ # Ask model for answer
187
+ r2 = client.predict(q,"mistralai/Mixtral-8x7B-Instruct-v0.1",True,api_name="/ask_llm")
188
+ result = f"### πŸ”Ž {q}\n\n{r2}\n\n{refs}"
189
+
190
+ st.markdown(result)
191
+
192
+ # Speak main result
193
+ audio_file_main = speak_with_edge_tts(r2, voice="en-US-AriaNeural", rate=0, pitch=0)
194
+ st.write("### Audio Output for Main Result")
195
+ play_and_download_audio(audio_file_main)
196
+
197
+ # Speak references summaries
198
+ summaries_text = "Here are the summaries from the references: " + refs.replace('"','')
199
+ audio_file_refs = speak_with_edge_tts(summaries_text, voice="en-US-AriaNeural", rate=0, pitch=0)
200
+ st.write("### Audio Output for References Summaries")
201
+ play_and_download_audio(audio_file_refs)
202
+
203
+ # Extract titles from refs and speak them
204
+ titles = []
205
+ for line in refs.split('\n'):
206
+ m = re.search(r"\[([^\]]+)\]", line)
207
+ if m:
208
+ titles.append(m.group(1))
209
+ if titles:
210
+ titles_text = "Here are the titles of the papers: " + ", ".join(titles)
211
+ audio_file_titles = speak_with_edge_tts(titles_text, voice="en-US-AriaNeural", rate=0, pitch=0)
212
+ st.write("### Audio Output for Paper Titles")
213
+ play_and_download_audio(audio_file_titles)
214
+
215
+ elapsed = time.time()-start
216
+ st.write(f"Elapsed: {elapsed:.2f} s")
217
+ fn = generate_filename(q,"md")
218
+ create_file(fn,q,result)
219
+ return result
220
+
221
+ def process_with_gpt(text):
222
+ if not text: return
223
+ st.session_state.messages.append({"role":"user","content":text})
224
+ with st.chat_message("user"):
225
+ st.markdown(text)
226
+ with st.chat_message("assistant"):
227
+ c = openai_client.chat.completions.create(
228
+ model=st.session_state["openai_model"],
229
+ messages=st.session_state.messages,
230
+ stream=False
231
+ )
232
+ ans = c.choices[0].message.content
233
+ st.write("GPT-4o: " + ans)
234
+ create_file(generate_filename(text,"md"),text,ans)
235
+ st.session_state.messages.append({"role":"assistant","content":ans})
236
+ return ans
237
+
238
+ def process_with_claude(text):
239
+ if not text: return
240
+ with st.chat_message("user"):
241
+ st.markdown(text)
242
+ with st.chat_message("assistant"):
243
+ r = claude_client.messages.create(
244
+ model="claude-3-sonnet-20240229",
245
+ max_tokens=1000,
246
+ messages=[{"role":"user","content":text}]
247
+ )
248
+ ans = r.content[0].text
249
+ st.write("Claude: " + ans)
250
+ create_file(generate_filename(text,"md"),text,ans)
251
+ st.session_state.chat_history.append({"user":text,"claude":ans})
252
+ return ans
253
+
254
+ def create_zip_of_files():
255
+ # Include all .md and .mp3 files in the zip
256
+ md_files = glob.glob("*.md")
257
+ mp3_files = glob.glob("*.mp3")
258
+ all_files = md_files + mp3_files
259
+ zip_name = "all_files.zip"
260
+ with zipfile.ZipFile(zip_name,'w') as z:
261
+ for f in all_files:
262
+ z.write(f)
263
+ return zip_name
264
+
265
+ def get_media_html(p,typ="video",w="100%"):
266
+ d = base64.b64encode(open(p,'rb').read()).decode()
267
+ if typ=="video":
268
+ return f'<video width="{w}" controls autoplay muted loop><source src="data:video/mp4;base64,{d}" type="video/mp4"></video>'
269
+ else:
270
+ return f'<audio controls style="width:{w};"><source src="data:audio/mpeg;base64,{d}" type="audio/mpeg"></audio>'
271
+
272
+ def display_file_manager():
273
+ st.sidebar.title("🎡 Audio Files & Documents")
274
+ st.sidebar.markdown("Here you can find all recorded `.mp3` files and `.md` notes.")
275
+
276
+ # Display .mp3 files in the sidebar
277
+ mp3_files = sorted(glob.glob("*.mp3"), reverse=True)
278
+ if mp3_files:
279
+ st.sidebar.subheader("MP3 Files:")
280
+ for a in mp3_files:
281
+ with st.sidebar.expander(f"{os.path.basename(a)}"):
282
+ # Show audio player
283
+ st.sidebar.markdown(get_media_html(a,"audio"),unsafe_allow_html=True)
284
+ # Download link for the MP3 file
285
+ st.sidebar.markdown(get_download_link(a), unsafe_allow_html=True)
286
+ # Button to transcribe this file
287
+ if st.sidebar.button(f"Transcribe {os.path.basename(a)}"):
288
+ t = process_audio(a)
289
+ st.sidebar.write("Transcription:")
290
+ st.sidebar.write(t)
291
+ else:
292
+ st.sidebar.write("No MP3 files found.")
293
+
294
+ # Display .md files in the sidebar
295
+ st.sidebar.subheader("MD Files:")
296
+ files = sorted(glob.glob("*.md"), reverse=True)
297
+ if st.sidebar.button("πŸ—‘ Delete All MD"):
298
+ for f in files: os.remove(f)
299
+ st.experimental_rerun()
300
+ # Download all as zip (including .mp3 and .md)
301
+ if st.sidebar.button("⬇️ Download All (.md and .mp3)"):
302
+ z = create_zip_of_files()
303
+ st.sidebar.markdown(get_download_link(z),unsafe_allow_html=True)
304
+
305
+ for f in files:
306
+ col1, col2, col3, col4 = st.sidebar.columns([1,3,1,1])
307
+ with col1:
308
+ if st.sidebar.button("🌐", key="v"+f):
309
+ st.session_state.current_file = f
310
+ c = open(f,'r',encoding='utf-8').read()
311
+ st.write("**Viewing file content:**")
312
+ st.write(c)
313
+ with col2:
314
+ st.sidebar.markdown(get_download_link(f),unsafe_allow_html=True)
315
+ with col3:
316
+ if st.sidebar.button("πŸ“‚", key="e"+f):
317
+ st.session_state.current_file = f
318
+ st.session_state.file_content = open(f,'r',encoding='utf-8').read()
319
+ with col4:
320
+ if st.sidebar.button("πŸ—‘", key="d"+f):
321
+ os.remove(f)
322
+ st.experimental_rerun()
323
+
324
+ def main():
325
+ st.sidebar.markdown("### 🚲BikeAIπŸ† Multi-Agent Research AI")
326
+ tab_main = st.radio("Action:",["🎀 Voice Input","πŸ“Έ Media Gallery","πŸ” Search ArXiv","πŸ“ File Editor"],horizontal=True)
327
+
328
+ model_choice = st.sidebar.radio("AI Model:", ["Arxiv","GPT-4o","Claude-3","GPT+Claude+Arxiv"], index=0)
329
+
330
+ # Declare the component
331
+ mycomponent = components.declare_component("mycomponent", path="mycomponent")
332
+ val = mycomponent(my_input_value="Hello")
333
+ if val:
334
+ user_input = val.strip()
335
+ if user_input:
336
+ if model_choice == "GPT-4o":
337
+ process_with_gpt(user_input)
338
+ elif model_choice == "Claude-3":
339
+ process_with_claude(user_input)
340
+ elif model_choice == "Arxiv":
341
+ st.subheader("Arxiv Only Results:")
342
+ perform_ai_lookup(user_input)
343
+ else:
344
+ col1,col2,col3=st.columns(3)
345
+ with col1:
346
+ st.subheader("GPT-4o Omni:")
347
+ try: process_with_gpt(user_input)
348
+ except: st.write('GPT 4o error')
349
+ with col2:
350
+ st.subheader("Claude-3 Sonnet:")
351
+ try: process_with_claude(user_input)
352
+ except: st.write('Claude error')
353
+ with col3:
354
+ st.subheader("Arxiv + Mistral:")
355
+ try:
356
+ r = perform_ai_lookup(user_input)
357
+ st.markdown(r)
358
+ except:
359
+ st.write("Arxiv error")
360
+
361
+ if tab_main == "🎀 Voice Input":
362
+ st.subheader("🎀 Voice Recognition")
363
+ user_text = st.text_area("Message:", height=100)
364
+ user_text = user_text.strip()
365
+ if st.button("Send πŸ“¨"):
366
+ if user_text:
367
+ if model_choice == "GPT-4o":
368
+ process_with_gpt(user_text)
369
+ elif model_choice == "Claude-3":
370
+ process_with_claude(user_text)
371
+ elif model_choice == "Arxiv":
372
+ st.subheader("Arxiv Only Results:")
373
+ perform_ai_lookup(user_text)
374
+ else:
375
+ col1,col2,col3=st.columns(3)
376
+ with col1:
377
+ st.subheader("GPT-4o Omni:")
378
+ process_with_gpt(user_text)
379
+ with col2:
380
+ st.subheader("Claude-3 Sonnet:")
381
+ process_with_claude(user_text)
382
+ with col3:
383
+ st.subheader("Arxiv & Mistral:")
384
+ res = perform_ai_lookup(user_text)
385
+ st.markdown(res)
386
+ st.subheader("πŸ“œ Chat History")
387
+ t1,t2=st.tabs(["Claude History","GPT-4o History"])
388
+ with t1:
389
+ for c in st.session_state.chat_history:
390
+ st.write("**You:**", c["user"])
391
+ st.write("**Claude:**", c["claude"])
392
+ with t2:
393
+ for m in st.session_state.messages:
394
+ with st.chat_message(m["role"]):
395
+ st.markdown(m["content"])
396
+
397
+ elif tab_main == "πŸ“Έ Media Gallery":
398
+ # Only show Images and Videos since Audio is now in sidebar
399
+ st.header("🎬 Media Gallery - Images and Videos")
400
+ tabs = st.tabs(["πŸ–ΌοΈ Images", "πŸŽ₯ Video"])
401
+ with tabs[0]:
402
+ imgs = glob.glob("*.png")+glob.glob("*.jpg")
403
+ if imgs:
404
+ c = st.slider("Cols",1,5,3)
405
+ cols = st.columns(c)
406
+ for i,f in enumerate(imgs):
407
+ with cols[i%c]:
408
+ st.image(Image.open(f),use_container_width=True)
409
+ if st.button(f"πŸ‘€ Analyze {os.path.basename(f)}"):
410
+ a = process_image(f,"Describe this image.")
411
+ st.markdown(a)
412
+ else:
413
+ st.write("No images found.")
414
+ with tabs[1]:
415
+ vids = glob.glob("*.mp4")
416
+ if vids:
417
+ for v in vids:
418
+ with st.expander(f"πŸŽ₯ {os.path.basename(v)}"):
419
+ st.markdown(get_media_html(v,"video"),unsafe_allow_html=True)
420
+ if st.button(f"Analyze {os.path.basename(v)}"):
421
+ a = process_video_with_gpt(v,"Describe video.")
422
+ st.markdown(a)
423
+ else:
424
+ st.write("No videos found.")
425
+
426
+ elif tab_main == "πŸ” Search ArXiv":
427
+ q=st.text_input("Research query:")
428
+ if q:
429
+ q = q.strip()
430
+ if q:
431
+ r=search_arxiv(q)
432
+ st.markdown(r)
433
+
434
+ elif tab_main == "πŸ“ File Editor":
435
+ if getattr(st.session_state,'current_file',None):
436
+ st.subheader(f"Editing: {st.session_state.current_file}")
437
+ new_text = st.text_area("Content:", st.session_state.file_content, height=300)
438
+ if st.button("Save"):
439
+ with open(st.session_state.current_file,'w',encoding='utf-8') as f:
440
+ f.write(new_text)
441
+ st.success("Updated!")
442
+ else:
443
+ st.write("Select a file from the sidebar to edit.")
444
+
445
+ display_file_manager()
446
+
447
+
448
+ if __name__=="__main__":
449
+ main()