movies-database / app.py
Ahmadkhan12's picture
Create app.py
c72331c verified
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import streamlit as st
import pickle
import os
# Load CSV file
csv_file = "Hydra-Movie-Scrape.csv"
df = pd.read_csv(csv_file)
# Load the precomputed embeddings
with open("embeddings.pkl", "rb") as f:
doc_embeddings = pickle.load(f)
# Convert the embeddings into a NumPy array (FAISS requires float32)
embedding_matrix = np.array(doc_embeddings).astype("float32")
# Build a FAISS index for efficient similarity search
index = faiss.IndexFlatL2(embedding_matrix.shape[1])
index.add(embedding_matrix)
# Load the SentenceTransformer model for encoding the query
model = SentenceTransformer('all-MiniLM-L6-v2')
# Function to retrieve the most relevant movies based on a query
def retrieve(query, top_k=10):
query_embedding = model.encode(query)
query_vector = np.array(query_embedding).astype("float32")
distances, indices = index.search(np.array([query_vector]), top_k)
return indices[0]
# Streamlit app layout
st.title("Movie Dataset RAG Application")
query = st.text_input("Ask a question about movies:")
if st.button("Submit"):
if query:
indices = retrieve(query)
response = ""
for idx in indices:
if idx != -1:
movie_details = df.iloc[idx]
response += f"*Title*: {movie_details['Title']}\n"
response += f"*Year*: {movie_details['Year']}\n"
response += f"*Director*: {movie_details['Director']}\n"
response += f"*Cast*: {movie_details['Cast']}\n"
response += f"*Summary*: {movie_details['Summary']}\n\n"
if response:
st.write("Here are some movies that match your query:")
st.markdown(response)
else:
st.write("No relevant documents found.")
else:
st.write("Please enter a query.")