Spaces:

Weedoo
/

Research-Paper-Recommendation-System

Running

File size: 3,767 Bytes
import logging
import os
import gradio as gr
import pandas as pd
from utils import get_zotero_ids, get_arxiv_papers, get_hf_embeddings, upload_to_pinecone, get_new_papers, recommend_papers

HF_API_KEY = os.getenv('HF_API_KEY')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
INDEX_NAME = os.getenv('INDEX_NAME')
NAMESPACE_NAME = os.getenv('NAMESPACE_NAME')

def category_radio(cat):
    if cat == 'Computer Vision and Pattern Recognition':
        return 'cs.CV'
    elif cat == 'Computation and Language':
        return 'cs.CL'
    elif cat == 'Artificial Intelligence':
        return 'cs.AI'
    elif cat == 'Robotics':
        return 'cs.RO'

def comment_radio(com):
    if com == 'CVPR':
        return 'CVPR'
    else:
        return None
    
def recommend_link(recs):
    return recs

with gr.Blocks() as demo:

    zotero_api_key = gr.Textbox(label="Zotero API Key")

    zotero_library_id = gr.Textbox(label="Zotero Library ID")

    zotero_tag = gr.Textbox(label="Zotero Tag")

    arxiv_category_name = gr.State([])
    radio_arxiv_category_name = gr.Radio(['Computer Vision and Pattern Recognition', 'Computation and Language', 'Artificial Intelligence', 'Robotics'], label="ArXiv Category Query")
    radio_arxiv_category_name.change(fn = category_radio, inputs= radio_arxiv_category_name, outputs= arxiv_category_name)

    arxiv_comment_query = gr.State([])
    radio_arxiv_comment_query = gr.Radio(['CVPR', 'None'], label="ArXiv Comment Query")
    radio_arxiv_comment_query.change(fn = comment_radio, inputs= radio_arxiv_comment_query, outputs= arxiv_comment_query)

    threshold = gr.Slider(minimum= 0.70, maximum= 0.99, label="Similarity Score Threshold")

    init_output = gr.Textbox(label="Project Initialization Result")

    rec_output = gr.Markdown(label = "Recommended Papers")

    init_btn = gr.Button("Initialize")

    rec_btn = gr.Button("Recommend")

    @init_btn.click(inputs= [zotero_api_key, zotero_library_id, zotero_tag], outputs= [init_output])
    def init(zotero_api_key, zotero_library_id, zotero_tag, hf_api_key = HF_API_KEY,  pinecone_api_key = PINECONE_API_KEY, index_name = INDEX_NAME,  namespace_name = NAMESPACE_NAME):

        logging.basicConfig(filename= '/mnt/c/Users/ankit/Desktop/Portfolio/Paper-Recommendation-System/logs/logfile.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
        logging.info("Project Initialization Script Started (Serverless)")
        
        ids = get_zotero_ids(zotero_api_key, zotero_library_id, zotero_tag)

        df = get_arxiv_papers(ids)

        embeddings, dim = get_hf_embeddings(hf_api_key, df)

        feedback = upload_to_pinecone(pinecone_api_key, index_name, namespace_name, embeddings, dim, df)

        logging.info(feedback)
        if feedback is dict:
            return f"Retrieved {len(ids)} papers from Zotero. Successfully upserted {feedback['upserted_count']} embeddings in {namespace_name} namespace."
        else :
            return feedback
    
    @rec_btn.click(inputs= [arxiv_category_name, arxiv_comment_query, threshold], outputs= [rec_output])
    def recs(arxiv_category_name, arxiv_comment_query, threshold, hf_api_key = HF_API_KEY,  pinecone_api_key = PINECONE_API_KEY, index_name = INDEX_NAME,  namespace_name = NAMESPACE_NAME):
        logging.info("Weekly Script Started (Serverless)")

        df = get_arxiv_papers(category= arxiv_category_name, comment= arxiv_comment_query)

        df = get_new_papers(df)

        if not isinstance(df, pd.DataFrame):
            return df
        
        embeddings, _ = get_hf_embeddings(hf_api_key, df)

        results = recommend_papers(pinecone_api_key, index_name, namespace_name, embeddings, df, threshold)

        return results

demo.launch(share = True)