Spaces:

nouamanetazi
/

hf-search

Running

App Files Files Community

nouamanetazi HF staff commited on Feb 11, 2022

Commit

cf61e60

1 Parent(s): da75a62

update routes

Browse files

Files changed (3) hide show

pages/search_engine.py +23 -32
server/api.py +37 -2
streamlit_app.py +11 -0

pages/search_engine.py CHANGED Viewed

@@ -50,25 +50,12 @@ def paginator(label, articles, articles_per_page=10, on_sidebar=True):
     return itertools.islice(enumerate(articles), min_index, max_index)
 def page():
-    st.set_page_config(
-        page_title="HF Search Engine",
-        page_icon="🔎",
-        layout="wide",
-        initial_sidebar_state="auto",
-        # menu_items={
-        #     "Get Help": "https://www.extremelycoolapp.com/help",
-        #     "Report a bug": "https://www.extremelycoolapp.com/bug",
-        #     "About": "# This is a header. This is an *extremely* cool app!",
-        # },
-    )
     ### SIDEBAR
     search_backend = st.sidebar.selectbox(
-        "Search Engine",
-        ["hfapi", "custom"],
-        format_func=lambda x: {"hfapi": "Huggingface API", "custom": "Sentence Bert"}[x],
     )
     limit_results = st.sidebar.number_input("Limit results", min_value=0, value=10)
@@ -112,22 +99,22 @@ def page():
     if search_query != "":
         response = requests.post(search_url, headers=headers, json=search_body).json()
-        record_list = []
         _ = [
-            record_list.append(
                 {
-                    "modelId": record["modelId"],
-                    "tags": record["tags"],
-                    "downloads": record["downloads"],
-                    "likes": record["likes"],
                 }
             )
-            for record in response.get("value")
         ]
-        # filter results
-        if record_list:
             st.write(f'Search results ({response.get("count")}):')
             if response.get("count") > 100:
@@ -135,16 +122,20 @@ def page():
             else:
                 shown_results = response.get("count")
-            for i, record in paginator(
                 f"Select results (showing {shown_results} of {response.get('count')} results)",
-                record_list,
             ):
                 col1, col2, col3 = st.columns([5,1,1])
-                col1.metric("Model", record["modelId"])
-                col2.metric("N° downloads", numerize(record["downloads"]))
-                col3.metric("N° likes", numerize(record["likes"]))
-                st.button(f"View model", on_click=lambda record=record: webbrowser.open(f"https://huggingface.co/{record['modelId']})"), key=record["modelId"])
-                st.markdown(f"**Tags:** {'  •  '.join(record['tags'])}")
                 # TODO: embed huggingface spaces
                 #                 import streamlit.components.v1 as components

     return itertools.islice(enumerate(articles), min_index, max_index)
 def page():
     ### SIDEBAR
     search_backend = st.sidebar.selectbox(
+        "Search method",
+        ["semantic", "bm25", "hfapi"],
+        format_func=lambda x: {"hfapi": "Keyword search", "bm25": "BM25 search", "semantic": "Semantic Search"}[x],
     )
     limit_results = st.sidebar.number_input("Limit results", min_value=0, value=10)
     if search_query != "":
         response = requests.post(search_url, headers=headers, json=search_body).json()
+        hit_list = []
         _ = [
+            hit_list.append(
                 {
+                    "modelId": hit["modelId"],
+                    "tags": hit["tags"],
+                    "downloads": hit["downloads"],
+                    "likes": hit["likes"],
+                    "readme": hit.get("readme", None),
                 }
             )
+            for hit in response.get("value")
         ]
+        if hit_list:
             st.write(f'Search results ({response.get("count")}):')
             if response.get("count") > 100:
             else:
                 shown_results = response.get("count")
+            for i, hit in paginator(
                 f"Select results (showing {shown_results} of {response.get('count')} results)",
+                hit_list,
             ):
                 col1, col2, col3 = st.columns([5,1,1])
+                col1.metric("Model", hit["modelId"])
+                col2.metric("N° downloads", numerize(hit["downloads"]))
+                col3.metric("N° likes", numerize(hit["likes"]))
+                st.button(f"View model on 🤗", on_click=lambda hit=hit: webbrowser.open(f"https://huggingface.co/{hit['modelId']}"), key=hit["modelId"])
+                st.markdown(f"**Tags:** {'  •  '.join(hit['tags'])}")
+                if hit["readme"]:
+                    with st.expander("See README"):
+                        st.write(hit["readme"])
                 # TODO: embed huggingface spaces
                 #                 import streamlit.components.v1 as components

server/api.py CHANGED Viewed

@@ -46,8 +46,8 @@ def hf_api():
     return json.dumps({"value": hits, "count": count})
-@app.route("/custom/search", methods=["POST"])
-def main():
     request_data = request.get_json()
     query = request_data.get("query")
     filters = json.loads(request_data.get("filters"))
@@ -58,6 +58,41 @@ def main():
     # TODO: filters
     hits = hf_search(query=query, method="retrieve & rerank", limit=limit)
     return json.dumps({"value": hits, "count": len(hits)})

     return json.dumps({"value": hits, "count": count})
+@app.route("/semantic/search", methods=["POST"])
+def semantic_search():
     request_data = request.get_json()
     query = request_data.get("query")
     filters = json.loads(request_data.get("filters"))
     # TODO: filters
     hits = hf_search(query=query, method="retrieve & rerank", limit=limit)
+    hits = [
+        {
+            "modelId": hit["modelId"],
+            "tags": hit["tags"],
+            "downloads": hit["downloads"],
+            "likes": hit["likes"],
+            "readme": hit.get("readme", None),
+        }
+        for hit in hits
+    ]
+    return json.dumps({"value": hits, "count": len(hits)})
+@app.route("/bm25/search", methods=["POST"])
+def bm25_search():
+    request_data = request.get_json()
+    query = request_data.get("query")
+    filters = json.loads(request_data.get("filters"))
+    limit = request_data.get("limit", 5)
+    print("query", query)
+    print("filters", filters)
+    print("limit", limit)
+    # TODO: filters
+    hits = hf_search(query=query, method="bm25", limit=limit)
+    hits = [
+        {
+            "modelId": hit["modelId"],
+            "tags": hit["tags"],
+            "downloads": hit["downloads"],
+            "likes": hit["likes"],
+            "readme": hit.get("readme", None),
+        }
+        for hit in hits
+    ]
+    pprint(hits)
     return json.dumps({"value": hits, "count": len(hits)})

streamlit_app.py CHANGED Viewed

@@ -11,6 +11,17 @@ def set_record(record):
 if not st.session_state["selected_record"]:  # search engine page
     search_engine_page()
 else:  # a record has been selected

 if not st.session_state["selected_record"]:  # search engine page
+    st.set_page_config(
+        page_title="HuggingFace Search Engine",
+        page_icon="🔎",
+        layout="wide",
+        initial_sidebar_state="auto",
+        # menu_items={
+        #     "Get Help": "https://www.extremelycoolapp.com/help",
+        #     "Report a bug": "https://www.extremelycoolapp.com/bug",
+        #     "About": "# This is a header. This is an *extremely* cool app!",
+        # },
+    )
     search_engine_page()
 else:  # a record has been selected