import os
import streamlit as st
from groq import Groq
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import whisper
from gtts import gTTS
from tempfile import NamedTemporaryFile
import json
import gdown
# Initialize Groq client
client = Groq(api_key="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")
# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
# Load Whisper model
whisper_model = whisper.load_model("base")
# Initialize FAISS
embedding_dimension = 384 # Dimension of embeddings from the model
index = faiss.IndexFlatL2(embedding_dimension)
metadata = []
# List of Google Drive PDF links
google_drive_links = [
"https://drive.google.com/file/d/1l7uT2KK-g_r853KZtqY3yjYL0JQRGRFg/view?usp=sharing"
]
# Streamlit App Configuration
st.set_page_config(page_title="Voice/Text Chatbot with RAG PDF Query", page_icon="🔊", layout="wide")
# Title
st.markdown("
Quranic-Therapy: AI-Driven Mental Health and Wellness
", unsafe_allow_html=True)
st.markdown("---")
# Sidebar for PDF Upload
st.sidebar.header("Upload Your PDF File")
uploaded_file = st.sidebar.file_uploader("Upload a PDF file", type="pdf")
# Function to extract file ID from Google Drive link
def extract_file_id(drive_link):
return drive_link.split("/d/")[1].split("/view")[0]
# Function to download PDF from Google Drive
def download_pdf_from_google_drive(file_id, output_path):
download_url = f"https://drive.google.com/uc?id={file_id}"
gdown.download(download_url, output_path, quiet=False)
# Function for text extraction from PDF
def extract_text_from_pdf(file):
reader = PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
# Function for text-to-speech
def text_to_speech(response_text):
tts = gTTS(text=response_text, lang="en")
audio_file = NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(audio_file.name)
return audio_file.name
# Save embeddings and metadata
def save_database(faiss_index, metadata, file_path="vector_database.json"):
all_embeddings = []
for i in range(faiss_index.ntotal):
all_embeddings.append(faiss_index.reconstruct(i).tolist())
data = {
"embeddings": all_embeddings,
"metadata": metadata
}
with open(file_path, "w") as f:
json.dump(data, f)
st.success(f"Vector database saved to {file_path}!")
# Process Google Drive PDFs
st.sidebar.header("Processing Google Drive PDFs")
with st.spinner("Downloading and processing Google Drive PDFs..."):
for link in google_drive_links:
file_id = extract_file_id(link)
output_pdf_path = f"downloaded_{file_id}.pdf"
# Download PDF
if not os.path.exists(output_pdf_path): # Avoid re-downloading
download_pdf_from_google_drive(file_id, output_pdf_path)
# Extract text and process
pdf_text = extract_text_from_pdf(output_pdf_path)
if pdf_text.strip():
# Split text into chunks and create embeddings
chunk_size = 500
chunks = [pdf_text[i:i + chunk_size] for i in range(0, len(pdf_text), chunk_size)]
embeddings = embedding_model.encode(chunks, convert_to_numpy=True)
index.add(embeddings)
# Store metadata
metadata.extend([{"chunk": chunk, "source": f"Google Drive: {output_pdf_path}"} for chunk in chunks])
# PDF Text Processing
if uploaded_file:
pdf_text = extract_text_from_pdf(uploaded_file)
if pdf_text.strip():
st.success("PDF text successfully extracted!")
with st.expander("View Extracted Text", expanded=False):
st.write(pdf_text[:3000] + "..." if len(pdf_text) > 3000 else pdf_text)
# Split text into chunks and create embeddings
chunk_size = 500
chunks = [pdf_text[i:i + chunk_size] for i in range(0, len(pdf_text), chunk_size)]
embeddings = embedding_model.encode(chunks, convert_to_numpy=True)
index.add(embeddings)
# Store metadata
metadata.extend([{"chunk": chunk, "source": uploaded_file.name} for chunk in chunks])
save_database(index, metadata)
st.success(f"Processed {len(chunks)} chunks and stored embeddings in FAISS!")
# Main Chatbot Interface
st.header("🤖 Chatbot Interface")
# Input Method Selection
input_method = st.radio("Select Input Method:", options=["Text", "Audio"])
if input_method == "Text":
st.subheader("💬 Text Query Input")
text_query = st.text_input("Enter your query:")
if st.button("Submit Text Query"):
if text_query:
try:
# Search FAISS for nearest chunks
query_embedding = embedding_model.encode([text_query], convert_to_numpy=True)
distances, indices = index.search(query_embedding, k=5)
relevant_chunks = [metadata[idx]["chunk"] for idx in indices[0]]
# Generate response using Groq API
prompt = f"Use these references to answer the query:\n\n{relevant_chunks}\n\nQuery: {text_query}"
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama-3.3-70b-versatile",
)
response = chat_completion.choices[0].message.content
# Display text response
st.write(f"**Chatbot Response:** {response}")
# Generate and play audio response
response_audio_path = text_to_speech(response)
st.audio(response_audio_path, format="audio/mp3", start_time=0)
except Exception as e:
st.error(f"Error processing your query: {e}")
elif input_method == "Audio":
st.subheader("🎤 Audio Query Input")
uploaded_audio = st.file_uploader("Upload your audio file", type=["m4a", "mp3", "wav"])
if uploaded_audio:
try:
audio_data = uploaded_audio.read()
audio_file = NamedTemporaryFile(delete=False, suffix=".m4a")
audio_file.write(audio_data)
audio_file_path = audio_file.name
st.success("Audio file uploaded successfully!")
# Transcribe the audio using Whisper model
transcription = whisper_model.transcribe(audio_file_path)["text"]
st.write(f"**You said:** {transcription}")
# Search FAISS for nearest chunks
query_embedding = embedding_model.encode([transcription], convert_to_numpy=True)
distances, indices = index.search(query_embedding, k=5)
relevant_chunks = [metadata[idx]["chunk"] for idx in indices[0]]
# Generate response using Groq API
prompt = f"Use these references to answer the query:\n\n{relevant_chunks}\n\nQuery: {transcription}"
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama-3.3-70b-versatile",
)
response = chat_completion.choices[0].message.content
# Display text response
st.write(f"**Chatbot Response:** {response}")
# Generate and play audio response
response_audio_path = text_to_speech(response)
st.audio(response_audio_path, format="audio/mp3", start_time=0)
except Exception as e:
st.error(f"Error processing your query: {e}")
# Footer
st.markdown(" Quran is the therapy we all need
", unsafe_allow_html=True)