ivyblossom commited on
Commit
b06f609
1 Parent(s): 09ef786

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -7
app.py CHANGED
@@ -22,7 +22,7 @@ def semantic_search(query, documents, top_k=5):
22
  query_embedding = model.encode(query, convert_to_tensor=True)
23
 
24
  # Convert the list of documents to embeddings
25
- document_embeddings = model.encode(documents, convert_to_tensor=True)
26
 
27
  # Compute cosine similarity scores of query with documents
28
  cosine_scores = util.pytorch_cos_sim(query_embedding, document_embeddings)
@@ -38,8 +38,8 @@ def semantic_search(query, documents, top_k=5):
38
  def main():
39
  st.title("Semantic Search on PDF Documents")
40
 
41
- query = st.text_input("Enter your query:")
42
  pdf_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
 
43
 
44
  if st.button("Search"):
45
  if pdf_file:
@@ -50,10 +50,7 @@ def main():
50
  # Extract text from the PDF along with page numbers
51
  pdf_text_with_pages = list(extract_text_from_pdf(pdf_path))
52
 
53
- # Extract the text content from the tuple list
54
- pdf_text = [text for _, text in pdf_text_with_pages]
55
-
56
- search_results = semantic_search(query, pdf_text)
57
  os.remove(pdf_path) # Delete the uploaded file after processing
58
 
59
  st.write(f"Search results for query: '{query}'")
@@ -64,4 +61,4 @@ def main():
64
  st.write(result_text)
65
 
66
  if __name__ == "__main__":
67
- main()
 
22
  query_embedding = model.encode(query, convert_to_tensor=True)
23
 
24
  # Convert the list of documents to embeddings
25
+ document_embeddings = model.encode([text for _, text in documents], convert_to_tensor=True)
26
 
27
  # Compute cosine similarity scores of query with documents
28
  cosine_scores = util.pytorch_cos_sim(query_embedding, document_embeddings)
 
38
  def main():
39
  st.title("Semantic Search on PDF Documents")
40
 
 
41
  pdf_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
42
+ query = st.text_input("Enter your query:")
43
 
44
  if st.button("Search"):
45
  if pdf_file:
 
50
  # Extract text from the PDF along with page numbers
51
  pdf_text_with_pages = list(extract_text_from_pdf(pdf_path))
52
 
53
+ search_results = semantic_search(query, pdf_text_with_pages)
 
 
 
54
  os.remove(pdf_path) # Delete the uploaded file after processing
55
 
56
  st.write(f"Search results for query: '{query}'")
 
61
  st.write(result_text)
62
 
63
  if __name__ == "__main__":
64
+ main()