Spaces:

Weedoo
/

Research-Paper-Recommendation-System

Sleeping

Weedoo commited on Aug 8

Commit

cf0645c

•

1 Parent(s): 4151f23

update options input for huggingface and update exception handling for app.py

Files changed (2) hide show

app.py CHANGED Viewed

@@ -43,6 +43,12 @@ def reset_project():
         logging.info(f"{index} index has been deleted from the vectordb. Delete reset_project() if you want to persist recommended papers.")
     return f"{file_path} has been deleted.<br />{index} index has been deleted from the vectordb.<br />"
 with gr.Blocks() as demo:
     zotero_api_key = gr.Textbox(label="Zotero API Key")
@@ -87,13 +93,8 @@ with gr.Blocks() as demo:
         ids = get_zotero_ids(zotero_api_key, zotero_library_id, zotero_tag)
         df = get_arxiv_papers(ids)
-        try:
-            embeddings, dim = get_hf_embeddings(hf_api_key, df)
-        except KeyError as e:
-            print(e)
-            print('\n Resetting project...')
-            reset_project()
-            exit()
         feedback = upload_to_pinecone(pinecone_api_key, index_name, namespace_name, embeddings, dim, df)

         logging.info(f"{index} index has been deleted from the vectordb. Delete reset_project() if you want to persist recommended papers.")
     return f"{file_path} has been deleted.<br />{index} index has been deleted from the vectordb.<br />"
+def reset_csv():
+    file_path = 'arxiv-scrape.csv'
+    if os.path.exists(file_path):
+        os.remove(file_path)
+        logging.info(f"{file_path} has been deleted. Delete reset_project() if you want to persist recommended papers.")
 with gr.Blocks() as demo:
     zotero_api_key = gr.Textbox(label="Zotero API Key")
         ids = get_zotero_ids(zotero_api_key, zotero_library_id, zotero_tag)
         df = get_arxiv_papers(ids)
+        embeddings, dim = get_hf_embeddings(hf_api_key, df)
         feedback = upload_to_pinecone(pinecone_api_key, index_name, namespace_name, embeddings, dim, df)

utils.py CHANGED Viewed

@@ -59,10 +59,10 @@ def get_hf_embeddings(api_key, df):
     API_URL = "https://api-inference.huggingface.co/models/malteos/scincl"
     headers = {"Authorization": f"Bearer {api_key}"}
-    response = requests.post(API_URL, headers=headers, json={"inputs": title_abs, "wait_for_model": False})
-    print(str(response.status_code) + 'This part needs an update, causing KeyError 0 ')
     if response.status_code == 503:
-        response = requests.post(API_URL, headers=headers, json={"inputs": title_abs, "wait_for_model": True})
     embeddings = response.json()
@@ -102,9 +102,9 @@ def get_new_papers(df):
     if df.empty:
         return 'No New Papers Found'
     else:
-        df_main = pd.concat([df_main, df], ignore_index= True)
-        df_main.drop_duplicates(inplace= True)
-        df_main.to_csv('arxiv-scrape.csv', index = False)
         return df
 def recommend_papers(api_key, index, namespace, embeddings, df, threshold):

     API_URL = "https://api-inference.huggingface.co/models/malteos/scincl"
     headers = {"Authorization": f"Bearer {api_key}"}
+    response = requests.post(API_URL, headers=headers, json={"inputs": title_abs, "options": {"wait_for_model": False}})
     if response.status_code == 503:
+        response = requests.post(API_URL, headers=headers, json={"inputs": title_abs, "options": {"wait_for_model": True}})
     embeddings = response.json()
     if df.empty:
         return 'No New Papers Found'
     else:
+        # df_main = pd.concat([df_main, df], ignore_index= True) #persistence of recommended paper removed for demo
+        # df_main.drop_duplicates(inplace= True)
+        # df_main.to_csv('arxiv-scrape.csv', index = False)
         return df
 def recommend_papers(api_key, index, namespace, embeddings, df, threshold):