File size: 3,767 Bytes
4596869 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import logging
import os
import gradio as gr
import pandas as pd
from utils import get_zotero_ids, get_arxiv_papers, get_hf_embeddings, upload_to_pinecone, get_new_papers, recommend_papers
HF_API_KEY = os.getenv('HF_API_KEY')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
INDEX_NAME = os.getenv('INDEX_NAME')
NAMESPACE_NAME = os.getenv('NAMESPACE_NAME')
def category_radio(cat):
if cat == 'Computer Vision and Pattern Recognition':
return 'cs.CV'
elif cat == 'Computation and Language':
return 'cs.CL'
elif cat == 'Artificial Intelligence':
return 'cs.AI'
elif cat == 'Robotics':
return 'cs.RO'
def comment_radio(com):
if com == 'CVPR':
return 'CVPR'
else:
return None
def recommend_link(recs):
return recs
with gr.Blocks() as demo:
zotero_api_key = gr.Textbox(label="Zotero API Key")
zotero_library_id = gr.Textbox(label="Zotero Library ID")
zotero_tag = gr.Textbox(label="Zotero Tag")
arxiv_category_name = gr.State([])
radio_arxiv_category_name = gr.Radio(['Computer Vision and Pattern Recognition', 'Computation and Language', 'Artificial Intelligence', 'Robotics'], label="ArXiv Category Query")
radio_arxiv_category_name.change(fn = category_radio, inputs= radio_arxiv_category_name, outputs= arxiv_category_name)
arxiv_comment_query = gr.State([])
radio_arxiv_comment_query = gr.Radio(['CVPR', 'None'], label="ArXiv Comment Query")
radio_arxiv_comment_query.change(fn = comment_radio, inputs= radio_arxiv_comment_query, outputs= arxiv_comment_query)
threshold = gr.Slider(minimum= 0.70, maximum= 0.99, label="Similarity Score Threshold")
init_output = gr.Textbox(label="Project Initialization Result")
rec_output = gr.Markdown(label = "Recommended Papers")
init_btn = gr.Button("Initialize")
rec_btn = gr.Button("Recommend")
@init_btn.click(inputs= [zotero_api_key, zotero_library_id, zotero_tag], outputs= [init_output])
def init(zotero_api_key, zotero_library_id, zotero_tag, hf_api_key = HF_API_KEY, pinecone_api_key = PINECONE_API_KEY, index_name = INDEX_NAME, namespace_name = NAMESPACE_NAME):
logging.basicConfig(filename= '/mnt/c/Users/ankit/Desktop/Portfolio/Paper-Recommendation-System/logs/logfile.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.info("Project Initialization Script Started (Serverless)")
ids = get_zotero_ids(zotero_api_key, zotero_library_id, zotero_tag)
df = get_arxiv_papers(ids)
embeddings, dim = get_hf_embeddings(hf_api_key, df)
feedback = upload_to_pinecone(pinecone_api_key, index_name, namespace_name, embeddings, dim, df)
logging.info(feedback)
if feedback is dict:
return f"Retrieved {len(ids)} papers from Zotero. Successfully upserted {feedback['upserted_count']} embeddings in {namespace_name} namespace."
else :
return feedback
@rec_btn.click(inputs= [arxiv_category_name, arxiv_comment_query, threshold], outputs= [rec_output])
def recs(arxiv_category_name, arxiv_comment_query, threshold, hf_api_key = HF_API_KEY, pinecone_api_key = PINECONE_API_KEY, index_name = INDEX_NAME, namespace_name = NAMESPACE_NAME):
logging.info("Weekly Script Started (Serverless)")
df = get_arxiv_papers(category= arxiv_category_name, comment= arxiv_comment_query)
df = get_new_papers(df)
if not isinstance(df, pd.DataFrame):
return df
embeddings, _ = get_hf_embeddings(hf_api_key, df)
results = recommend_papers(pinecone_api_key, index_name, namespace_name, embeddings, df, threshold)
return results
demo.launch(share = True) |