Ahmadkhan12 commited on
Commit
c72331c
·
verified ·
1 Parent(s): 48d202f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sentence_transformers import SentenceTransformer
3
+ import faiss
4
+ import numpy as np
5
+ import streamlit as st
6
+ import pickle
7
+ import os
8
+
9
+ # Load CSV file
10
+ csv_file = "Hydra-Movie-Scrape.csv"
11
+ df = pd.read_csv(csv_file)
12
+
13
+ # Load the precomputed embeddings
14
+ with open("embeddings.pkl", "rb") as f:
15
+ doc_embeddings = pickle.load(f)
16
+
17
+ # Convert the embeddings into a NumPy array (FAISS requires float32)
18
+ embedding_matrix = np.array(doc_embeddings).astype("float32")
19
+
20
+ # Build a FAISS index for efficient similarity search
21
+ index = faiss.IndexFlatL2(embedding_matrix.shape[1])
22
+ index.add(embedding_matrix)
23
+
24
+ # Load the SentenceTransformer model for encoding the query
25
+ model = SentenceTransformer('all-MiniLM-L6-v2')
26
+
27
+ # Function to retrieve the most relevant movies based on a query
28
+ def retrieve(query, top_k=10):
29
+ query_embedding = model.encode(query)
30
+ query_vector = np.array(query_embedding).astype("float32")
31
+ distances, indices = index.search(np.array([query_vector]), top_k)
32
+ return indices[0]
33
+
34
+ # Streamlit app layout
35
+ st.title("Movie Dataset RAG Application")
36
+ query = st.text_input("Ask a question about movies:")
37
+
38
+ if st.button("Submit"):
39
+ if query:
40
+ indices = retrieve(query)
41
+ response = ""
42
+ for idx in indices:
43
+ if idx != -1:
44
+ movie_details = df.iloc[idx]
45
+ response += f"*Title*: {movie_details['Title']}\n"
46
+ response += f"*Year*: {movie_details['Year']}\n"
47
+ response += f"*Director*: {movie_details['Director']}\n"
48
+ response += f"*Cast*: {movie_details['Cast']}\n"
49
+ response += f"*Summary*: {movie_details['Summary']}\n\n"
50
+
51
+ if response:
52
+ st.write("Here are some movies that match your query:")
53
+ st.markdown(response)
54
+ else:
55
+ st.write("No relevant documents found.")
56
+ else:
57
+ st.write("Please enter a query.")