File size: 5,041 Bytes
4596869 868658d 4596869 e632772 4596869 837effe 4596869 837effe 4596869 837effe 4596869 cf0645c 4596869 49d6487 4596869 49d6487 4596869 53a06f7 4596869 837effe 4596869 837effe 5fd1a0f 837effe 4596869 e632772 4596869 cf0645c 4596869 faa8315 4596869 45c455c 4596869 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import logging
import os
import gradio as gr
import pandas as pd
from pinecone import Pinecone
from utils import get_zotero_ids, get_arxiv_papers, get_hf_embeddings, upload_to_pinecone, get_new_papers, recommend_papers
HF_API_KEY = os.getenv('HF_API_KEY')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
INDEX_NAME = os.getenv('INDEX_NAME')
NAMESPACE_NAME = os.getenv('NAMESPACE_NAME')
script_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(script_dir)
def category_radio(cat):
if cat == 'Computer Vision and Pattern Recognition':
return 'cs.CV'
elif cat == 'Computation and Language':
return 'cs.CL'
elif cat == 'Artificial Intelligence':
return 'cs.AI'
elif cat == 'Robotics':
return 'cs.RO'
def comment_radio(com):
if com == 'None':
return None
else:
return com
def reset_project():
file_path = 'arxiv-scrape.csv'
if os.path.exists(file_path):
os.remove(file_path)
logging.info(f"{file_path} has been deleted. Delete reset_project() if you want to persist recommended papers.")
api_key = os.getenv('PINECONE_API_KEY')
index = os.getenv('INDEX_NAME')
pc = Pinecone(api_key = api_key)
if index in pc.list_indexes().names():
pc.delete_index(index)
logging.info(f"{index} index has been deleted from the vectordb. Delete reset_project() if you want to persist recommended papers.")
return f"{file_path} has been deleted.<br />{index} index has been deleted from the vectordb.<br />"
def reset_csv():
file_path = 'arxiv-scrape.csv'
if os.path.exists(file_path):
os.remove(file_path)
logging.info(f"{file_path} has been deleted. Delete reset_project() if you want to persist recommended papers.")
with gr.Blocks() as demo:
zotero_api_key = gr.Textbox(label="Zotero API Key")
zotero_library_id = gr.Textbox(label="Zotero Library ID")
zotero_tag = gr.Textbox(label="Zotero Tag")
arxiv_category_name = gr.State([])
radio_arxiv_category_name = gr.Radio(['Computer Vision and Pattern Recognition', 'Computation and Language', 'Artificial Intelligence', 'Robotics'], value= ['Computer Vision and Pattern Recognition'], label="ArXiv Category Query")
radio_arxiv_category_name.change(fn = category_radio, inputs= radio_arxiv_category_name, outputs= arxiv_category_name)
arxiv_comment_query = gr.State([])
radio_arxiv_comment_query = gr.Radio(['CVPR', 'ACL', 'TACL', 'JAIR', 'IJRR', 'None'], value=['CVPR'], label="ArXiv Comment Query")
radio_arxiv_comment_query.change(fn = comment_radio, inputs= radio_arxiv_comment_query, outputs= arxiv_comment_query)
threshold = gr.Slider(minimum= 0.70, maximum= 0.99, value = 0.80, label="Similarity Score Threshold")
init_output = gr.Textbox(label="Project Initialization Result")
rec_output = gr.Markdown(label = "Recommended Papers")
reset_output = gr.Markdown(label = "Reset Declaration")
init_btn = gr.Button("Initialize")
rec_btn = gr.Button("Recommend")
reset_btn = gr.Button("Reset")
timer = gr.Timer(value=600)
timer.tick(reset_project)
reset_btn.click(fn = reset_project, inputs= [], outputs= [reset_output])
@init_btn.click(inputs= [zotero_api_key, zotero_library_id, zotero_tag], outputs= [init_output])
def init(zotero_api_key, zotero_library_id, zotero_tag, hf_api_key = HF_API_KEY, pinecone_api_key = PINECONE_API_KEY, index_name = INDEX_NAME, namespace_name = NAMESPACE_NAME):
logging.basicConfig(filename= 'logfile.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.info("Project Initialization Script Started (Serverless)")
ids = get_zotero_ids(zotero_api_key, zotero_library_id, zotero_tag)
df = get_arxiv_papers(ids)
embeddings, dim = get_hf_embeddings(hf_api_key, df)
feedback = upload_to_pinecone(pinecone_api_key, index_name, namespace_name, embeddings, dim, df)
logging.info(feedback)
if isinstance(feedback, dict):
return f"Retrieved {len(ids)} papers from Zotero. Successfully upserted {feedback['upserted_count']} embeddings in {namespace_name} namespace."
else :
return feedback
@rec_btn.click(inputs= [arxiv_category_name, arxiv_comment_query, threshold], outputs= [rec_output])
def recs(arxiv_category_name, arxiv_comment_query, threshold, hf_api_key = HF_API_KEY, pinecone_api_key = PINECONE_API_KEY, index_name = INDEX_NAME, namespace_name = NAMESPACE_NAME):
logging.info("Weekly Script Started (Serverless)")
df = get_arxiv_papers(category= arxiv_category_name, comment= arxiv_comment_query)
df = get_new_papers(df)
if not isinstance(df, pd.DataFrame):
return df
embeddings, _ = get_hf_embeddings(hf_api_key, df)
results = recommend_papers(pinecone_api_key, index_name, namespace_name, embeddings, df, threshold * 3)
return results
demo.launch(share = True) |