import pandas as pd from sentence_transformers import SentenceTransformer import faiss import numpy as np import streamlit as st import pickle import os # Load the CSV data into a DataFrame df = pd.read_csv("/content/Hydra-Movie-Scrape.csv") # Load the precomputed embeddings with open("/content/embeddings.pkl", "rb") as f: doc_embeddings = pickle.load(f) # Convert the embeddings into a NumPy array (FAISS requires float32) embedding_matrix = np.array(doc_embeddings).astype("float32") # Build a FAISS index for efficient similarity search index = faiss.IndexFlatL2(embedding_matrix.shape[1]) # L2 distance for FAISS index.add(embedding_matrix) # Load the SentenceTransformer model for encoding the query model = SentenceTransformer('all-MiniLM-L6-v2') # Function to retrieve the most relevant movies based on a query def retrieve(query, top_k=10): """Retrieve the top_k most relevant movies based on the query.""" query_embedding = model.encode(query) # Encode the query to get its embedding query_vector = np.array(query_embedding).astype("float32") distances, indices = index.search(np.array([query_vector]), top_k) return indices[0] # Return the indices of the most relevant documents # Streamlit app layout st.title("Movie Dataset RAG Application") query = st.text_input("Ask a question about movies:") if st.button("Submit"): if query: # Retrieve the most relevant documents (movies) based on the query indices = retrieve(query) # Display the results response = "" for idx in indices: if idx != -1: # Check if the index is valid movie_details = df.iloc[idx] response += f"*Title*: {movie_details['Title']}\n" response += f"*Year*: {movie_details['Year']}\n" response += f"*Director*: {movie_details['Director']}\n" response += f"*Cast*: {movie_details['Cast']}\n" response += f"*Summary*: {movie_details['Summary']}\n\n" # Output the response if response: st.write("Here are some movies that match your query:") st.markdown(response) # Use markdown to format the output nicely else: st.write("No relevant documents found.") else: st.write("Please enter a query.")