Spaces:

ugaray96
/

neural-search

Running

App Files Files Community

ugmSorcero commited on Sep 23, 2022

Commit

6a6afbf

1 Parent(s): 843bc9e

Fixes linting

Browse files

Files changed (5) hide show

core/pipelines.py +1 -1
core/search_index.py +2 -4
interface/components.py +10 -16
interface/config.py +1 -1
interface/pages.py +5 -7

core/pipelines.py CHANGED Viewed

@@ -19,7 +19,7 @@ def keyword_search(index="documents", split_word_length=100):
       - Documents that have more lexical overlap with the query are more likely to be relevant
       - Words that occur in fewer documents are more significant than words that occur in many documents
     :warning: **(HAYSTACK BUG) Keyword Search doesn't work if you reindex:** Please refresh page in order to reindex
     """
     document_store = InMemoryDocumentStore(index=index)

       - Documents that have more lexical overlap with the query are more likely to be relevant
       - Words that occur in fewer documents are more significant than words that occur in many documents
     :warning: **(HAYSTACK BUG) Keyword Search doesn't work if you reindex:** Please refresh page in order to reindex
     """
     document_store = InMemoryDocumentStore(index=index)

core/search_index.py CHANGED Viewed

@@ -21,9 +21,7 @@ def format_docs(documents):
 def index(documents, pipeline, clear_index=True):
     documents, doc_ids = format_docs(documents)
     if clear_index:
-        document_stores = pipeline.get_nodes_by_class(
-            class_type=BaseDocumentStore
-        )
         for docstore in document_stores:
             docstore.delete_index(docstore.index)
     pipeline.run(documents=documents)
@@ -45,7 +43,7 @@ def search(queries, pipeline):
                     "score": res.score,
                     "id": res.meta["id"],
                     "fragment_id": res.id,
-                    "meta": res.meta
                 }
             )
         if not score_is_empty:

 def index(documents, pipeline, clear_index=True):
     documents, doc_ids = format_docs(documents)
     if clear_index:
+        document_stores = pipeline.get_nodes_by_class(class_type=BaseDocumentStore)
         for docstore in document_stores:
             docstore.delete_index(docstore.index)
     pipeline.run(documents=documents)
                     "score": res.score,
                     "id": res.meta["id"],
                     "fragment_id": res.id,
+                    "meta": res.meta,
                 }
             )
         if not score_is_empty:

interface/components.py CHANGED Viewed

@@ -42,7 +42,7 @@ def component_select_pipeline(container):
                 "index_pipeline": index_pipeline,
                 "doc": pipeline_funcs[index_pipe].__doc__,
             }
-            st.session_state['doc_id'] = 0
 def component_show_pipeline(pipeline, pipeline_name):
@@ -63,7 +63,7 @@ def component_show_search_result(container, results):
             st.markdown(f"### Match {idx+1}")
             st.markdown(f"**Text**: {document['text']}")
             st.markdown(f"**Document**: {document['id']}")
-            if '_split_id' in document['meta']:
                 st.markdown(f"**Document Chunk**: {document['meta']['_split_id']}")
             if document["score"] is not None:
                 st.markdown(f"**Score**: {document['score']:.3f}")
@@ -78,14 +78,12 @@ def component_text_input(container, doc_id):
             while True:
                 text = st.text_input(f"Document {doc_id}", key=doc_id)
                 if text != "":
-                    texts.append({"text": text, 'doc_id': doc_id})
                     doc_id += 1
                     st.markdown("---")
                 else:
                     break
-        corpus = [
-            {"text": doc["text"], "id": doc["doc_id"]} for doc in texts
-        ]
         return corpus, doc_id
@@ -97,7 +95,7 @@ def component_article_url(container, doc_id):
             while True:
                 url = st.text_input(f"URL {doc_id}", key=doc_id)
                 if url != "":
-                    urls.append({"text": extract_text_from_url(url), 'doc_id': doc_id})
                     doc_id += 1
                     st.markdown("---")
                 else:
@@ -105,11 +103,9 @@ def component_article_url(container, doc_id):
         for idx, doc in enumerate(urls):
             with st.expander(f"Preview URL {idx}"):
-                st.write(doc['text'])
-        corpus = [
-            {"text": doc["text"], "id": doc["doc_id"]} for doc in urls
-        ]
         return corpus, doc_id
@@ -125,7 +121,7 @@ def component_file_input(container, doc_id):
                 if file != None:
                     extracted_text = extract_text_from_file(file)
                     if extracted_text != None:
-                        files.append({"text": extracted_text, 'doc_id': doc_id})
                         doc_id += 1
                         st.markdown("---")
                     else:
@@ -135,9 +131,7 @@ def component_file_input(container, doc_id):
         for idx, doc in enumerate(files):
             with st.expander(f"Preview File {idx}"):
-                st.write(doc['text'])
-        corpus = [
-            {"text": doc["text"], "id": doc["doc_id"]} for doc in files
-        ]
         return corpus, doc_id

                 "index_pipeline": index_pipeline,
                 "doc": pipeline_funcs[index_pipe].__doc__,
             }
+            st.session_state["doc_id"] = 0
 def component_show_pipeline(pipeline, pipeline_name):
             st.markdown(f"### Match {idx+1}")
             st.markdown(f"**Text**: {document['text']}")
             st.markdown(f"**Document**: {document['id']}")
+            if "_split_id" in document["meta"]:
                 st.markdown(f"**Document Chunk**: {document['meta']['_split_id']}")
             if document["score"] is not None:
                 st.markdown(f"**Score**: {document['score']:.3f}")
             while True:
                 text = st.text_input(f"Document {doc_id}", key=doc_id)
                 if text != "":
+                    texts.append({"text": text, "doc_id": doc_id})
                     doc_id += 1
                     st.markdown("---")
                 else:
                     break
+        corpus = [{"text": doc["text"], "id": doc["doc_id"]} for doc in texts]
         return corpus, doc_id
             while True:
                 url = st.text_input(f"URL {doc_id}", key=doc_id)
                 if url != "":
+                    urls.append({"text": extract_text_from_url(url), "doc_id": doc_id})
                     doc_id += 1
                     st.markdown("---")
                 else:
         for idx, doc in enumerate(urls):
             with st.expander(f"Preview URL {idx}"):
+                st.write(doc["text"])
+        corpus = [{"text": doc["text"], "id": doc["doc_id"]} for doc in urls]
         return corpus, doc_id
                 if file != None:
                     extracted_text = extract_text_from_file(file)
                     if extracted_text != None:
+                        files.append({"text": extracted_text, "doc_id": doc_id})
                         doc_id += 1
                         st.markdown("---")
                     else:
         for idx, doc in enumerate(files):
             with st.expander(f"Preview File {idx}"):
+                st.write(doc["text"])
+        corpus = [{"text": doc["text"], "id": doc["doc_id"]} for doc in files]
         return corpus, doc_id

interface/config.py CHANGED Viewed

@@ -4,7 +4,7 @@ from interface.pages import page_landing_page, page_search, page_index
 session_state_variables = {
     "pipeline": None,
     "pipeline_func_parameters": [],
-    "doc_id": 0
 }
 # Define Pages for the demo

 session_state_variables = {
     "pipeline": None,
     "pipeline_func_parameters": [],
+    "doc_id": 0,
 }
 # Define Pages for the demo

interface/pages.py CHANGED Viewed

@@ -78,20 +78,18 @@ def page_index(container):
             default_index=0,
             orientation="horizontal",
         )
-        clear_index = st.sidebar.checkbox('Clear Index', True)
-        doc_id = st.session_state['doc_id']
         corpus, doc_id = input_funcs[selected_input][0](container, doc_id)
         if len(corpus) > 0:
             index_results = None
             if st.button("Index"):
                 index_results = index(
-                    corpus,
-                    st.session_state["pipeline"]["index_pipeline"],
-                    clear_index
                 )
-                st.session_state['doc_id'] = doc_id
             if index_results:
                 st.write(index_results)

             default_index=0,
             orientation="horizontal",
         )
+        clear_index = st.sidebar.checkbox("Clear Index", True)
+        doc_id = st.session_state["doc_id"]
         corpus, doc_id = input_funcs[selected_input][0](container, doc_id)
         if len(corpus) > 0:
             index_results = None
             if st.button("Index"):
                 index_results = index(
+                    corpus, st.session_state["pipeline"]["index_pipeline"], clear_index
                 )
+                st.session_state["doc_id"] = doc_id
             if index_results:
                 st.write(index_results)