awacke1 commited on
Commit
0fa951f
·
verified ·
1 Parent(s): e7ceb79

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +481 -0
app.py ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import cv2
3
+ import glob
4
+ import json
5
+ import math
6
+ import os
7
+ import pytz
8
+ import random
9
+ import re
10
+ import requests
11
+ import streamlit as st
12
+ import streamlit.components.v1 as components
13
+ import textract
14
+ import time
15
+ import zipfile
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ from tqdm import tqdm
18
+ import concurrent
19
+
20
+ from audio_recorder_streamlit import audio_recorder
21
+ from bs4 import BeautifulSoup
22
+ from collections import deque
23
+ from datetime import datetime
24
+ from dotenv import load_dotenv
25
+ from gradio_client import Client, handle_file
26
+ from huggingface_hub import InferenceClient
27
+ from io import BytesIO
28
+ from moviepy import VideoFileClip
29
+ from PIL import Image
30
+ from PyPDF2 import PdfReader
31
+ from templates import bot_template, css, user_template
32
+ from urllib.parse import quote
33
+ from xml.etree import ElementTree as ET
34
+
35
+ import openai
36
+ from openai import OpenAI
37
+ import pandas as pd
38
+
39
+ # 1. Configuration
40
+ Site_Name = 'Scholarly-Article-Document-Search-With-Memory'
41
+ title = "🔬🧠ScienceBrain.AI"
42
+ helpURL = 'https://huggingface.co/awacke1'
43
+ bugURL = 'https://huggingface.co/spaces/awacke1'
44
+ icons = Image.open("icons.ico")
45
+ st.set_page_config(
46
+ page_title=title,
47
+ page_icon=icons,
48
+ layout="wide",
49
+ initial_sidebar_state="auto",
50
+ menu_items={'Get Help': helpURL, 'Report a bug': bugURL, 'About': title}
51
+ )
52
+
53
+ # API Configuration
54
+ API_KEY = os.getenv('API_KEY')
55
+ HF_KEY = os.getenv('HF_KEY')
56
+ headers = {"Authorization": f"Bearer {HF_KEY}", "Content-Type": "application/json"}
57
+ key = os.getenv('OPENAI_API_KEY')
58
+ client = OpenAI(api_key=key, organization=os.getenv('OPENAI_ORG_ID'))
59
+ MODEL = "gpt-4o-2024-05-13"
60
+ if "openai_model" not in st.session_state:
61
+ st.session_state["openai_model"] = MODEL
62
+ if "messages" not in st.session_state:
63
+ st.session_state.messages = []
64
+ if st.button("Clear Session"):
65
+ st.session_state.messages = []
66
+
67
+ # Sidebar Options
68
+ should_save = st.sidebar.checkbox("💾 Save", value=True, help="Save your session data.")
69
+
70
+ # HTML5 Speech Synthesis
71
+ @st.cache_resource
72
+ def SpeechSynthesis(result):
73
+ documentHTML5 = '''
74
+ <!DOCTYPE html>
75
+ <html>
76
+ <head>
77
+ <title>Read It Aloud</title>
78
+ <script type="text/javascript">
79
+ function readAloud() {
80
+ const text = document.getElementById("textArea").value;
81
+ const speech = new SpeechSynthesisUtterance(text);
82
+ window.speechSynthesis.speak(speech);
83
+ }
84
+ </script>
85
+ </head>
86
+ <body>
87
+ <h1>🔊 Read It Aloud</h1>
88
+ <textarea id="textArea" rows="10" cols="80">
89
+ '''
90
+ documentHTML5 += result + '''
91
+ </textarea>
92
+ <br>
93
+ <button onclick="readAloud()">🔊 Read Aloud</button>
94
+ </body>
95
+ </html>
96
+ '''
97
+ components.html(documentHTML5, width=1280, height=300)
98
+
99
+ # File Naming and Saving
100
+ def generate_filename(prompt, file_type):
101
+ central = pytz.timezone('US/Central')
102
+ safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
103
+ replaced_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt)
104
+ safe_prompt = re.sub(r'\s+', ' ', replaced_prompt).strip()[:240]
105
+ return f"{safe_date_time}_{safe_prompt}.{file_type}"
106
+
107
+ def create_and_save_file(content, file_type="md", prompt=None, is_image=False, should_save=True):
108
+ if not should_save:
109
+ return None
110
+ filename = generate_filename(prompt if prompt else content, file_type)
111
+ with open(filename, "w", encoding="utf-8") as f:
112
+ if is_image:
113
+ f.write(content)
114
+ else:
115
+ f.write(prompt + "\n\n" + content if prompt else content)
116
+ return filename
117
+
118
+ # Text Processing
119
+ def process_text(text_input):
120
+ if text_input:
121
+ st.session_state.messages.append({"role": "user", "content": text_input})
122
+ with st.chat_message("user"):
123
+ st.markdown(text_input)
124
+ with st.chat_message("assistant"):
125
+ completion = client.chat.completions.create(
126
+ model=st.session_state["openai_model"],
127
+ messages=[{"role": m["role"], "content": m["content"]} for m in st.session_state.messages],
128
+ stream=False
129
+ )
130
+ response = completion.choices[0].message.content
131
+ st.markdown(response)
132
+ filename = generate_filename(text_input, "md")
133
+ create_and_save_file(response, "md", text_input, should_save=should_save)
134
+ st.session_state.messages.append({"role": "assistant", "content": response})
135
+
136
+ # Audio Processing
137
+ def process_audio(audio_input, text_input=''):
138
+ if audio_input:
139
+ audio_bytes = audio_input.read() if not isinstance(audio_input, str) else open(audio_input, "rb").read()
140
+ with st.spinner("Transcribing audio..."):
141
+ transcription = client.audio.transcriptions.create(model="whisper-1", file=BytesIO(audio_bytes))
142
+ st.session_state.messages.append({"role": "user", "content": transcription.text})
143
+ with st.chat_message("user"):
144
+ st.markdown(transcription.text)
145
+ with st.chat_message("assistant"):
146
+ completion = client.chat.completions.create(
147
+ model=st.session_state["openai_model"],
148
+ messages=[{"role": "user", "content": text_input + "\n\nTranscription: " + transcription.text}]
149
+ )
150
+ response = completion.choices[0].message.content
151
+ st.markdown(response)
152
+ filename = generate_filename(transcription.text, "md")
153
+ create_and_save_file(response, "md", text_input, should_save=should_save)
154
+ st.session_state.messages.append({"role": "assistant", "content": response})
155
+
156
+ # Image Processing
157
+ def process_image(image_input, user_prompt):
158
+ if isinstance(image_input, str):
159
+ with open(image_input, "rb") as image_file:
160
+ image_bytes = image_file.read()
161
+ else:
162
+ image_bytes = image_input.read()
163
+ base64_image = base64.b64encode(image_bytes).decode("utf-8")
164
+ response = client.chat.completions.create(
165
+ model=st.session_state["openai_model"],
166
+ messages=[
167
+ {"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
168
+ {"role": "user", "content": [
169
+ {"type": "text", "text": user_prompt},
170
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}
171
+ ]}
172
+ ],
173
+ temperature=0.0
174
+ )
175
+ image_response = response.choices[0].message.content
176
+ filename = generate_filename(user_prompt, "md")
177
+ create_and_save_file(image_response, "md", user_prompt, should_save=should_save)
178
+ return image_response
179
+
180
+ # Video Processing
181
+ def save_video(video_file):
182
+ with open(video_file.name, "wb") as f:
183
+ f.write(video_file.getbuffer())
184
+ return video_file.name
185
+
186
+ def process_video(video_path, seconds_per_frame=2):
187
+ base64Frames = []
188
+ base_video_path, _ = os.path.splitext(video_path)
189
+ video = cv2.VideoCapture(video_path)
190
+ total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
191
+ fps = video.get(cv2.CAP_PROP_FPS)
192
+ frames_to_skip = int(fps * seconds_per_frame)
193
+ curr_frame = 0
194
+ while curr_frame < total_frames - 1:
195
+ video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
196
+ success, frame = video.read()
197
+ if not success:
198
+ break
199
+ _, buffer = cv2.imencode(".jpg", frame)
200
+ base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
201
+ curr_frame += frames_to_skip
202
+ video.release()
203
+ audio_path = f"{base_video_path}.mp3"
204
+ try:
205
+ clip = VideoFileClip(video_path)
206
+ clip.audio.write_audiofile(audio_path, bitrate="32k")
207
+ clip.audio.close()
208
+ clip.close()
209
+ except:
210
+ st.write('No audio track found.')
211
+ return base64Frames, audio_path
212
+
213
+ def process_audio_and_video(video_input):
214
+ if video_input:
215
+ video_path = save_video(video_input)
216
+ with st.spinner("Extracting frames and audio..."):
217
+ base64Frames, audio_path = process_video(video_path)
218
+ with st.spinner("Transcribing video audio..."):
219
+ with open(video_path, "rb") as video_file:
220
+ transcript = client.audio.transcriptions.create(model="whisper-1", file=video_file).text
221
+ with st.chat_message("user"):
222
+ st.markdown(f"Video Transcription: {transcript}")
223
+ with st.chat_message("assistant"):
224
+ response = client.chat.completions.create(
225
+ model=st.session_state["openai_model"],
226
+ messages=[
227
+ {"role": "system", "content": "Summarize the video and its transcript in Markdown."},
228
+ {"role": "user", "content": [
229
+ "Video frames:", *map(lambda x: {"type": "image_url", "image_url": {"url": f"data:image/jpg;base64,{x}"}}, base64Frames),
230
+ {"type": "text", "text": f"Transcription: {transcript}"}
231
+ ]}
232
+ ]
233
+ )
234
+ result = response.choices[0].message.content
235
+ st.markdown(result)
236
+ filename = generate_filename(transcript or "video_summary", "md")
237
+ create_and_save_file(result, "md", "Video summary", should_save=should_save)
238
+
239
+ # RAG PDF Gallery
240
+ def extract_text_from_pdf(pdf_path):
241
+ text = ""
242
+ try:
243
+ with open(pdf_path, "rb") as f:
244
+ reader = PdfReader(f)
245
+ for page in reader.pages:
246
+ page_text = page.extract_text()
247
+ if page_text:
248
+ text += page_text
249
+ except Exception as e:
250
+ st.error(f"Error reading {pdf_path}: {e}")
251
+ return text
252
+
253
+ def generate_questions(pdf_path):
254
+ text = extract_text_from_pdf(pdf_path)
255
+ response = client.chat.completions.create(
256
+ model="gpt-4o-2024-05-13",
257
+ messages=[{"role": "user", "content": f"Generate a question that can only be answered from this document:\n{text[:2000]}"}]
258
+ )
259
+ return response.choices[0].message.content
260
+
261
+ def upload_single_pdf(file_path, vector_store_id):
262
+ file_name = os.path.basename(file_path)
263
+ try:
264
+ file_response = client.files.create(file=open(file_path, 'rb'), purpose="assistants")
265
+ attach_response = client.vector_stores.files.create(
266
+ vector_store_id=vector_store_id,
267
+ file_id=file_response.id
268
+ )
269
+ return {"file": file_name, "status": "success"}
270
+ except Exception as e:
271
+ st.error(f"Error with {file_name}: {str(e)}")
272
+ return {"file": file_name, "status": "failed", "error": str(e)}
273
+
274
+ def upload_pdf_files_to_vector_store(vector_store_id, pdf_files):
275
+ stats = {"total_files": len(pdf_files), "successful_uploads": 0, "failed_uploads": 0, "errors": []}
276
+ with ThreadPoolExecutor(max_workers=10) as executor:
277
+ futures = {executor.submit(upload_single_pdf, file_path, vector_store_id): file_path for file_path in pdf_files}
278
+ for future in tqdm(concurrent.futures.as_completed(futures), total=len(pdf_files)):
279
+ result = future.result()
280
+ if result["status"] == "success":
281
+ stats["successful_uploads"] += 1
282
+ else:
283
+ stats["failed_uploads"] += 1
284
+ stats["errors"].append(result)
285
+ return stats
286
+
287
+ def create_vector_store(store_name):
288
+ try:
289
+ vector_store = client.vector_stores.create(name=store_name)
290
+ return {"id": vector_store.id, "name": vector_store.name, "created_at": vector_store.created_at, "file_count": vector_store.file_counts.completed}
291
+ except Exception as e:
292
+ st.error(f"Error creating vector store: {e}")
293
+ return {}
294
+
295
+ def process_rag_query(query, vector_store_id):
296
+ response = client.chat.completions.create(
297
+ model="gpt-4o-mini",
298
+ messages=[{"role": "user", "content": query}],
299
+ tools=[{"type": "file_search", "file_search": {"vector_store_ids": [vector_store_id]}}],
300
+ tool_choice="auto"
301
+ )
302
+ return response.choices[0].message.content, response.choices[0].tool_calls if response.choices[0].tool_calls else []
303
+
304
+ def evaluate_rag_performance(questions_dict, vector_store_id, k=5):
305
+ total_queries = len(questions_dict)
306
+ correct_retrievals_at_k = 0
307
+ reciprocal_ranks = []
308
+ average_precisions = []
309
+
310
+ for filename, query in questions_dict.items():
311
+ expected_filename = filename
312
+ response, tool_calls = process_rag_query(query, vector_store_id)
313
+ if tool_calls and tool_calls[0].function.name == "file_search":
314
+ search_results = json.loads(tool_calls[0].function.arguments).get("search_results", [])
315
+ retrieved_files = [result["file"]["filename"] for result in search_results[:k]]
316
+ if expected_filename in retrieved_files:
317
+ rank = retrieved_files.index(expected_filename) + 1
318
+ correct_retrievals_at_k += 1
319
+ reciprocal_ranks.append(1 / rank)
320
+ precisions = [1 if f == expected_filename else 0 for f in retrieved_files[:rank]]
321
+ average_precisions.append(sum(precisions) / len(precisions))
322
+ else:
323
+ reciprocal_ranks.append(0)
324
+ average_precisions.append(0)
325
+ else:
326
+ reciprocal_ranks.append(0)
327
+ average_precisions.append(0)
328
+
329
+ recall_at_k = correct_retrievals_at_k / total_queries
330
+ precision_at_k = recall_at_k
331
+ mrr = sum(reciprocal_ranks) / total_queries
332
+ map_score = sum(average_precisions) / total_queries
333
+ return {"recall@k": recall_at_k, "precision@k": precision_at_k, "mrr": mrr, "map": map_score}
334
+
335
+ def rag_pdf_gallery():
336
+ st.subheader("📚 RAG PDF Gallery")
337
+ pdf_files = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
338
+ if pdf_files:
339
+ # Save uploaded PDFs locally
340
+ local_pdf_paths = []
341
+ for pdf in pdf_files:
342
+ pdf_path = f"temp_{pdf.name}"
343
+ with open(pdf_path, "wb") as f:
344
+ f.write(pdf.read())
345
+ local_pdf_paths.append(pdf_path)
346
+
347
+ # Generate evaluation questions
348
+ with st.spinner("Generating evaluation questions..."):
349
+ questions_dict = {os.path.basename(pdf_path): generate_questions(pdf_path) for pdf_path in local_pdf_paths}
350
+ st.write("Generated Questions:", questions_dict)
351
+
352
+ # Create and populate vector store
353
+ store_name = "rag_pdf_gallery_store"
354
+ with st.spinner("Creating vector store..."):
355
+ vector_store_details = create_vector_store(store_name)
356
+ upload_stats = upload_pdf_files_to_vector_store(vector_store_details["id"], local_pdf_paths)
357
+ st.write("Upload Stats:", upload_stats)
358
+
359
+ # Query interface
360
+ query = st.text_input("Ask a question about the PDFs:")
361
+ if query:
362
+ with st.spinner("Processing RAG query..."):
363
+ response, tool_calls = process_rag_query(query, vector_store_details["id"])
364
+ st.markdown("**Response:**")
365
+ st.markdown(response)
366
+ if tool_calls:
367
+ st.markdown("**Retrieved Chunks:**")
368
+ search_results = json.loads(tool_calls[0].function.arguments).get("search_results", [])
369
+ for result in search_results:
370
+ st.write(f"- File: {result['file']['filename']}, Score: {result['score']}")
371
+
372
+ # Evaluate performance
373
+ if st.button("Evaluate RAG Performance"):
374
+ with st.spinner("Evaluating performance..."):
375
+ metrics = evaluate_rag_performance(questions_dict, vector_store_details["id"])
376
+ st.write("Evaluation Metrics:", metrics)
377
+
378
+ # Cleanup
379
+ for pdf_path in local_pdf_paths:
380
+ os.remove(pdf_path)
381
+
382
+ # File Sidebar
383
+ def FileSidebar():
384
+ st.sidebar.title("File Operations")
385
+ file_types = st.sidebar.multiselect("Filter by type", [".md", ".wav", ".png", ".mp4", ".mp3"], default=[".md"])
386
+ all_files = [f for f in glob.glob("*.*") if os.path.splitext(f)[1] in file_types and len(os.path.splitext(f)[0]) >= 10]
387
+ all_files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
388
+
389
+ if st.sidebar.button("🗑 Delete All Filtered"):
390
+ for file in all_files:
391
+ os.remove(file)
392
+ st.rerun()
393
+
394
+ @st.cache_resource
395
+ def create_zip_of_files(files):
396
+ zip_name = "files.zip"
397
+ with zipfile.ZipFile(zip_name, 'w') as zipf:
398
+ for file in files:
399
+ zipf.write(file)
400
+ return zip_name
401
+
402
+ @st.cache_resource
403
+ def get_zip_download_link(zip_file):
404
+ with open(zip_file, 'rb') as f:
405
+ data = f.read()
406
+ b64 = base64.b64encode(data).decode()
407
+ return f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>'
408
+
409
+ if st.sidebar.button("⬇️ Download All Filtered"):
410
+ zip_file = create_zip_of_files(all_files)
411
+ st.sidebar.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True)
412
+
413
+ for file in all_files:
414
+ col1, col2, col3 = st.sidebar.columns([1, 6, 1])
415
+ with col1:
416
+ if st.button("🌐", key=f"view_{file}"):
417
+ with open(file, "r", encoding="utf-8") as f:
418
+ content = f.read()
419
+ st.markdown(content)
420
+ SpeechSynthesis(content)
421
+ with col2:
422
+ st.write(file)
423
+ with col3:
424
+ if st.button("🗑", key=f"delete_{file}"):
425
+ os.remove(file)
426
+ st.rerun()
427
+
428
+ # Main Function
429
+ def main():
430
+ st.markdown("##### GPT-4o Omni Model: Text, Audio, Image, Video & RAG")
431
+ model_options = ["gpt-4o-2024-05-13", "gpt-3.5-turbo", "gpt-4o-mini"]
432
+ selected_model = st.selectbox("Select GPT Model", model_options, index=0)
433
+ st.session_state["openai_model"] = selected_model
434
+
435
+ option = st.selectbox("Select Input Type", ("Text", "Image", "Audio", "Video", "RAG PDF Gallery"))
436
+
437
+ if option == "Text":
438
+ text_input = st.text_input("Enter your text:")
439
+ if text_input:
440
+ with st.spinner("Processing..."):
441
+ process_text(text_input)
442
+
443
+ elif option == "Image":
444
+ default_prompt = "Describe this image and list ten facts in a markdown outline with emojis."
445
+ text_input = st.text_input("Image Prompt:", value=default_prompt)
446
+ image_input = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
447
+ if image_input and text_input:
448
+ with st.spinner("Processing..."):
449
+ image_response = process_image(image_input, text_input)
450
+ with st.chat_message("ai", avatar="🦖"):
451
+ st.markdown(image_response)
452
+
453
+ elif option == "Audio":
454
+ default_prompt = "Summarize this audio transcription in Markdown."
455
+ text_input = st.text_input("Audio Prompt:", value=default_prompt)
456
+ audio_input = st.file_uploader("Upload an audio file", type=["mp3", "wav"])
457
+ if audio_input and text_input:
458
+ with st.spinner("Processing..."):
459
+ process_audio(audio_input, text_input)
460
+
461
+ elif option == "Video":
462
+ default_prompt = "Summarize this video and its transcription in Markdown."
463
+ text_input = st.text_input("Video Prompt:", value=default_prompt)
464
+ video_input = st.file_uploader("Upload a video file", type=["mp4"])
465
+ if video_input and text_input:
466
+ with st.spinner("Processing..."):
467
+ process_audio_and_video(video_input)
468
+
469
+ elif option == "RAG PDF Gallery":
470
+ rag_pdf_gallery()
471
+
472
+ # Chat History and Display
473
+ for message in st.session_state.messages:
474
+ with st.chat_message(message["role"]):
475
+ st.markdown(message["content"])
476
+
477
+ if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
478
+ process_text(prompt)
479
+
480
+ FileSidebar()
481
+ main()