|
import logging |
|
import os |
|
import gradio as gr |
|
import pandas as pd |
|
from utils import get_zotero_ids, get_arxiv_papers, get_hf_embeddings, upload_to_pinecone, get_new_papers, recommend_papers |
|
|
|
HF_API_KEY = os.getenv('HF_API_KEY') |
|
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY') |
|
INDEX_NAME = os.getenv('INDEX_NAME') |
|
NAMESPACE_NAME = os.getenv('NAMESPACE_NAME') |
|
|
|
def category_radio(cat): |
|
if cat == 'Computer Vision and Pattern Recognition': |
|
return 'cs.CV' |
|
elif cat == 'Computation and Language': |
|
return 'cs.CL' |
|
elif cat == 'Artificial Intelligence': |
|
return 'cs.AI' |
|
elif cat == 'Robotics': |
|
return 'cs.RO' |
|
|
|
def comment_radio(com): |
|
if com == 'CVPR': |
|
return 'CVPR' |
|
else: |
|
return None |
|
|
|
def recommend_link(recs): |
|
return recs |
|
|
|
with gr.Blocks() as demo: |
|
|
|
zotero_api_key = gr.Textbox(label="Zotero API Key") |
|
|
|
zotero_library_id = gr.Textbox(label="Zotero Library ID") |
|
|
|
zotero_tag = gr.Textbox(label="Zotero Tag") |
|
|
|
arxiv_category_name = gr.State([]) |
|
radio_arxiv_category_name = gr.Radio(['Computer Vision and Pattern Recognition', 'Computation and Language', 'Artificial Intelligence', 'Robotics'], label="ArXiv Category Query") |
|
radio_arxiv_category_name.change(fn = category_radio, inputs= radio_arxiv_category_name, outputs= arxiv_category_name) |
|
|
|
arxiv_comment_query = gr.State([]) |
|
radio_arxiv_comment_query = gr.Radio(['CVPR', 'None'], label="ArXiv Comment Query") |
|
radio_arxiv_comment_query.change(fn = comment_radio, inputs= radio_arxiv_comment_query, outputs= arxiv_comment_query) |
|
|
|
threshold = gr.Slider(minimum= 0.70, maximum= 0.99, label="Similarity Score Threshold") |
|
|
|
init_output = gr.Textbox(label="Project Initialization Result") |
|
|
|
rec_output = gr.Markdown(label = "Recommended Papers") |
|
|
|
init_btn = gr.Button("Initialize") |
|
|
|
rec_btn = gr.Button("Recommend") |
|
|
|
@init_btn.click(inputs= [zotero_api_key, zotero_library_id, zotero_tag], outputs= [init_output]) |
|
def init(zotero_api_key, zotero_library_id, zotero_tag, hf_api_key = HF_API_KEY, pinecone_api_key = PINECONE_API_KEY, index_name = INDEX_NAME, namespace_name = NAMESPACE_NAME): |
|
|
|
logging.basicConfig(filename= '/mnt/c/Users/ankit/Desktop/Portfolio/Paper-Recommendation-System/logs/logfile.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
logging.info("Project Initialization Script Started (Serverless)") |
|
|
|
ids = get_zotero_ids(zotero_api_key, zotero_library_id, zotero_tag) |
|
|
|
df = get_arxiv_papers(ids) |
|
|
|
embeddings, dim = get_hf_embeddings(hf_api_key, df) |
|
|
|
feedback = upload_to_pinecone(pinecone_api_key, index_name, namespace_name, embeddings, dim, df) |
|
|
|
logging.info(feedback) |
|
if feedback is dict: |
|
return f"Retrieved {len(ids)} papers from Zotero. Successfully upserted {feedback['upserted_count']} embeddings in {namespace_name} namespace." |
|
else : |
|
return feedback |
|
|
|
@rec_btn.click(inputs= [arxiv_category_name, arxiv_comment_query, threshold], outputs= [rec_output]) |
|
def recs(arxiv_category_name, arxiv_comment_query, threshold, hf_api_key = HF_API_KEY, pinecone_api_key = PINECONE_API_KEY, index_name = INDEX_NAME, namespace_name = NAMESPACE_NAME): |
|
logging.info("Weekly Script Started (Serverless)") |
|
|
|
df = get_arxiv_papers(category= arxiv_category_name, comment= arxiv_comment_query) |
|
|
|
df = get_new_papers(df) |
|
|
|
if not isinstance(df, pd.DataFrame): |
|
return df |
|
|
|
embeddings, _ = get_hf_embeddings(hf_api_key, df) |
|
|
|
results = recommend_papers(pinecone_api_key, index_name, namespace_name, embeddings, df, threshold) |
|
|
|
return results |
|
|
|
demo.launch(share = True) |