Spaces:

ivyblossom
/

question-answering

Running

ivyblossom commited on Aug 3, 2023

Commit

b06f609

•

1 Parent(s): 09ef786

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ def semantic_search(query, documents, top_k=5):
     query_embedding = model.encode(query, convert_to_tensor=True)
     # Convert the list of documents to embeddings
-    document_embeddings = model.encode(documents, convert_to_tensor=True)
     # Compute cosine similarity scores of query with documents
     cosine_scores = util.pytorch_cos_sim(query_embedding, document_embeddings)
@@ -38,8 +38,8 @@ def semantic_search(query, documents, top_k=5):
 def main():
     st.title("Semantic Search on PDF Documents")
-    query = st.text_input("Enter your query:")
     pdf_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
     if st.button("Search"):
         if pdf_file:
@@ -50,10 +50,7 @@ def main():
             # Extract text from the PDF along with page numbers
             pdf_text_with_pages = list(extract_text_from_pdf(pdf_path))
-            # Extract the text content from the tuple list
-            pdf_text = [text for _, text in pdf_text_with_pages]
-            search_results = semantic_search(query, pdf_text)
             os.remove(pdf_path)  # Delete the uploaded file after processing
             st.write(f"Search results for query: '{query}'")
@@ -64,4 +61,4 @@ def main():
                     st.write(result_text)
 if __name__ == "__main__":
-    main()

     query_embedding = model.encode(query, convert_to_tensor=True)
     # Convert the list of documents to embeddings
+    document_embeddings = model.encode([text for _, text in documents], convert_to_tensor=True)
     # Compute cosine similarity scores of query with documents
     cosine_scores = util.pytorch_cos_sim(query_embedding, document_embeddings)
 def main():
     st.title("Semantic Search on PDF Documents")
     pdf_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
+    query = st.text_input("Enter your query:")
     if st.button("Search"):
         if pdf_file:
             # Extract text from the PDF along with page numbers
             pdf_text_with_pages = list(extract_text_from_pdf(pdf_path))
+            search_results = semantic_search(query, pdf_text_with_pages)
             os.remove(pdf_path)  # Delete the uploaded file after processing
             st.write(f"Search results for query: '{query}'")
                     st.write(result_text)
 if __name__ == "__main__":
+    main()