Spaces:

huggingface
/

paper-central

Running

App Files Files Community

IAMJB commited on Dec 4, 2024

Commit

dbdfc66

1 Parent(s): 64632c4

chat paper

Browse files

Files changed (3) hide show

app.py +37 -2
df/PaperCentral.py +21 -3
paper_chat_tab.py +104 -48

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from pr_paper_central_tab import pr_paper_central_tab
 from huggingface_hub import whoami
 import json
 import requests
 from author_leaderboard_contrib_tab import author_resource_leaderboard_tab
 from paper_chat_tab import paper_chat_tab
@@ -189,7 +190,41 @@ with gr.Blocks(css_paths="style.css") as demo:
         with gr.Tab("Chat With Paper", id="tab-chat-with-paper", visible=False) as tab_chat_paper:
             gr.Markdown("## Chat with Paper")
             arxiv_id = gr.State(value=None)
-            paper_chat_tab(arxiv_id)
     # Define function to move to the next day
@@ -546,7 +581,7 @@ def main():
     """
     Launches the Gradio app.
     """
-    demo.launch(ssr_mode=False)
 # Run the main function when the script is executed

 from huggingface_hub import whoami
 import json
 import requests
+from bs4 import BeautifulSoup
 from author_leaderboard_contrib_tab import author_resource_leaderboard_tab
 from paper_chat_tab import paper_chat_tab
         with gr.Tab("Chat With Paper", id="tab-chat-with-paper", visible=False) as tab_chat_paper:
             gr.Markdown("## Chat with Paper")
             arxiv_id = gr.State(value=None)
+            paper_from = gr.State(value=None)
+            paper_chat_tab(arxiv_id, paper_from)
+    # chat with paper
+    def get_selected(evt: gr.SelectData, dataframe_origin):
+        paper_id = gr.update(value=None)
+        paper_from = gr.update(value=None)
+        tab_chat_paper = gr.update(visible=False)
+        selected_tab = gr.Tabs()
+        try:
+            # Parse the HTML content
+            soup = BeautifulSoup(evt.value, "html.parser")
+            # Find all <a> tags
+            a_tags = soup.find_all('a')
+            for a_tag in a_tags:
+                # Check if 'action_id' attribute exists and equals 'chat-with-paper'
+                if a_tag.get('action_id') == 'chat-with-paper':
+                    paper_id = a_tag.get("paper_id")
+                    paper_from = a_tag.get("paper_from")
+                    tab_chat_paper = gr.update(visible=True)
+                    selected_tab = gr.Tabs(selected="tab-chat-with-paper")
+        except Exception as e:
+            print("The content is not valid HTML or another error occurred:", str(e))
+            pass
+        return paper_id, paper_from, tab_chat_paper, selected_tab
+    paper_central_component.select(get_selected, inputs=[paper_central_component],
+                                   outputs=[arxiv_id, paper_from, tab_chat_paper, tabs])
     # Define function to move to the next day
     """
     Launches the Gradio app.
     """
+    demo.launch(ssr_mode=False, share=True)
 # Run the main function when the script is executed

df/PaperCentral.py CHANGED Viewed

@@ -17,6 +17,7 @@ import numpy as np
 from datetime import datetime, timedelta
 import re
 class PaperCentral:
     """
     A class to manage and process paper data for display in a Gradio Dataframe component.
@@ -450,6 +451,20 @@ class PaperCentral:
                     columns_to_show.append('project_page')
                 filtered_df = filtered_df[(filtered_df['project_page'] != "") & (filtered_df['project_page'].notnull())]
         # Apply conference filtering
         if conference_options:
             columns_to_show = [col for col in columns_to_show if col not in ["date", "arxiv_id"]]
@@ -478,17 +493,20 @@ class PaperCentral:
                     )
                 filtered_df = filtered_df[conference_filter]
             if any(conf in ["NeurIPS2024 D&B", "NeurIPS2024"] for conf in conference_options):
-                def create_chat_link(row):
                     neurips_id = re.search(r'id=([^&]+)', row["proceedings"])
                     if neurips_id:
                         neurips_id = neurips_id.group(1)
-                        return f'<a href="/?tab=tab-chat-with-paper&paper_id={neurips_id}" id="custom_button" target="_blank">✨ Chat with paper</a>'
                     else:
                         return ""
                 # Add the "chat_with_paper" column
-                filtered_df['chat_with_paper'] = filtered_df.apply(create_chat_link, axis=1)
                 if 'chat_with_paper' not in columns_to_show:
                     columns_to_show.append('chat_with_paper')

 from datetime import datetime, timedelta
 import re
 class PaperCentral:
     """
     A class to manage and process paper data for display in a Gradio Dataframe component.
                     columns_to_show.append('project_page')
                 filtered_df = filtered_df[(filtered_df['project_page'] != "") & (filtered_df['project_page'].notnull())]
+        # create chat link
+        def create_chat_link(row):
+            if pd.notna(row["paper_page"]) and row["paper_page"] != "":
+                paper_id = row["paper_page"]
+                return f'<a' \
+                       f' action_id="chat-with-paper" paper_id="{paper_id}" paper_from="paper_page"' \
+                       f' id="custom_button">✨ Chat with paper</a>'
+            return ""
+        filtered_df['chat_with_paper'] = filtered_df.apply(create_chat_link, axis=1)
+        if 'chat_with_paper' not in columns_to_show:
+            columns_to_show.append('chat_with_paper')
         # Apply conference filtering
         if conference_options:
             columns_to_show = [col for col in columns_to_show if col not in ["date", "arxiv_id"]]
                     )
                 filtered_df = filtered_df[conference_filter]
+            # conference chat with paper
             if any(conf in ["NeurIPS2024 D&B", "NeurIPS2024"] for conf in conference_options):
+                def create_chat_neurips_link(row):
                     neurips_id = re.search(r'id=([^&]+)', row["proceedings"])
                     if neurips_id:
                         neurips_id = neurips_id.group(1)
+                        return f'<a' \
+                               f' action_id="chat-with-paper" paper_id={neurips_id} paper_from="neurips"' \
+                               f' id="custom_button">✨ Chat with paper</a>'
                     else:
                         return ""
                 # Add the "chat_with_paper" column
+                filtered_df['chat_with_paper'] = filtered_df.apply(create_chat_neurips_link, axis=1)
                 if 'chat_with_paper' not in columns_to_show:
                     columns_to_show.append('chat_with_paper')

paper_chat_tab.py CHANGED Viewed

@@ -78,6 +78,30 @@ def fetch_paper_info_neurips(paper_id):
     return preamble
 def fetch_paper_content(paper_id):
     try:
         # Construct the URL
@@ -230,25 +254,26 @@ def create_chat_interface(provider_dropdown, model_dropdown, paper_content, hf_t
             print(f"An unexpected error occurred: {ex}")
             yield f"{ex}"
     # Create the ChatInterface
     chat_interface = gr.ChatInterface(
         fn=get_fn,
-        chatbot=gr.Chatbot(
-            label="Chatbot",
-            scale=1,
-            height=400,
-            autoscroll=True,
-        ),
         additional_inputs=[paper_content, hf_token_input, provider_dropdown, model_dropdown, provider_max_total_tokens],
         type="tuples",
     )
-    return chat_interface
-def paper_chat_tab(paper_id):
     with gr.Column():
-        # Textbox to display the paper title and authors
-        content = gr.Markdown(value="")
         # Preamble message to hint the user
         gr.Markdown("**Note:** Providing your own API token can help you avoid rate limits.")
@@ -290,6 +315,14 @@ def paper_chat_tab(paper_id):
         # State to store the paper content
         paper_content = gr.State()
         # Function to update models and logo when provider changes
         def update_provider(selected_provider):
             provider_info = PROVIDERS[selected_provider]
@@ -314,63 +347,86 @@ def paper_chat_tab(paper_id):
                 placeholder=f"Enter your {selected_provider} API token to avoid rate limits"
             )
-            return model_dropdown_choices, logo_html_update, note_markdown_update, hf_token_input_update, chatbot_message_type, max_total_tokens
         provider_dropdown.change(
             fn=update_provider,
             inputs=provider_dropdown,
-            outputs=[model_dropdown, logo_html, note_markdown, hf_token_input, default_type, default_max_total_tokens],
             queue=False
         )
         # Function to update the paper info
-        def update_paper_info(paper_id_value, selected_model):
-            preamble = fetch_paper_info_neurips(paper_id_value)
-            text = fetch_paper_content(paper_id_value)
-            if preamble is None:
-                preamble = "Paper not found or could not retrieve paper information."
-            if text is None:
-                return preamble, None
-            return preamble, text
-        # Update paper content when paper ID or model changes
-        paper_id.change(
-            fn=update_paper_info,
-            inputs=[paper_id, model_dropdown],
-            outputs=[content, paper_content]
-        )
-        model_dropdown.change(
             fn=update_paper_info,
-            inputs=[paper_id, model_dropdown],
-            outputs=[content, paper_content],
-            queue=False,
         )
-        # Create the chat interface
-        chat_interface = create_chat_interface(provider_dropdown, model_dropdown, paper_content, hf_token_input,
-                                               default_type, default_max_total_tokens)
 def main():
     """
     Launches the Gradio app.
     """
     with gr.Blocks(css_paths="style.css") as demo:
-        x = gr.State(value="")  # Initialize with an empty state
-        def update_state():
-            """
-            Function to update the state.
-            """
-            return "5G7ve8E1Lu"
-        with gr.Row():
-            update_button = gr.Button("Update State")  # Button to update the state
-        # Update the state and reflect the change in the display
-        update_button.click(update_state, inputs=[], outputs=[x])
-        paper_chat_tab(x)
     demo.launch(ssr_mode=False)

     return preamble
+def fetch_paper_content_arxiv(paper_id):
+    try:
+        # Construct the URL for the arXiv PDF
+        url = f"https://arxiv.org/pdf/{paper_id}.pdf"
+        # Fetch the PDF
+        response = requests.get(url)
+        response.raise_for_status()  # Raise an exception for HTTP errors
+        # Read the PDF content
+        pdf_content = BytesIO(response.content)
+        reader = PdfReader(pdf_content)
+        # Extract text from the PDF
+        text = ""
+        for page in reader.pages:
+            text += page.extract_text()
+        return text  # Return full text; truncation will be handled later
+    except Exception as e:
+        print(f"Error fetching paper content: {e}")
+        return None
 def fetch_paper_content(paper_id):
     try:
         # Construct the URL
             print(f"An unexpected error occurred: {ex}")
             yield f"{ex}"
+    # Create the Chatbot separately to access it later
+    chatbot = gr.Chatbot(
+        label="Chatbot",
+        scale=1,
+        height=400,
+        autoscroll=True,
+    )
     # Create the ChatInterface
     chat_interface = gr.ChatInterface(
         fn=get_fn,
+        chatbot=chatbot,
         additional_inputs=[paper_content, hf_token_input, provider_dropdown, model_dropdown, provider_max_total_tokens],
         type="tuples",
     )
+    return chat_interface, chatbot
+def paper_chat_tab(paper_id, paper_from):
     with gr.Column():
         # Preamble message to hint the user
         gr.Markdown("**Note:** Providing your own API token can help you avoid rate limits.")
         # State to store the paper content
         paper_content = gr.State()
+        # Textbox to display the paper title and authors
+        content = gr.Markdown(value="")
+        # Create the chat interface and get the chatbot component
+        chat_interface, chatbot = create_chat_interface(provider_dropdown, model_dropdown, paper_content,
+                                                        hf_token_input,
+                                                        default_type, default_max_total_tokens)
         # Function to update models and logo when provider changes
         def update_provider(selected_provider):
             provider_info = PROVIDERS[selected_provider]
                 placeholder=f"Enter your {selected_provider} API token to avoid rate limits"
             )
+            # Reset the chatbot history
+            chatbot_reset = []  # This resets the chatbot conversation
+            return model_dropdown_choices, logo_html_update, note_markdown_update, hf_token_input_update, chatbot_message_type, max_total_tokens, chatbot_reset
         provider_dropdown.change(
             fn=update_provider,
             inputs=provider_dropdown,
+            outputs=[model_dropdown, logo_html, note_markdown, hf_token_input, default_type, default_max_total_tokens,
+                     chatbot],
             queue=False
         )
         # Function to update the paper info
+        def update_paper_info(paper_id_value, paper_from_value, selected_model):
+            if paper_from_value == "neurips":
+                preamble = fetch_paper_info_neurips(paper_id_value)
+                text = fetch_paper_content(paper_id_value)
+                if preamble is None:
+                    preamble = "Paper not found or could not retrieve paper information."
+                if text is None:
+                    return preamble, None, []
+                return preamble, text, []
+            elif paper_from_value == "paper_page":
+                # Fetch the paper information from Hugging Face API
+                url = f"https://huggingface.co/api/papers/{paper_id_value}?field=comments"
+                response = requests.get(url)
+                if response.status_code != 200:
+                    return "Paper not found or could not retrieve paper information.", None, []
+                paper_info = response.json()
+                # Extract required information
+                title = paper_info.get('title', 'No Title')
+                link = f"https://huggingface.co/papers/{paper_id_value}"
+                authors_list = [author.get('name', 'Unknown') for author in paper_info.get('authors', [])]
+                authors = ', '.join(authors_list)
+                summary = paper_info.get('summary', 'No Summary')
+                num_comments = len(paper_info.get('comments', []))
+                num_upvotes = paper_info.get('upvotes', 0)
+                # Format the preamble
+                preamble = f"🤗 [paper-page]({link})<br/>"
+                preamble += f"**Title:** {title}<br/>"
+                preamble += f"**Authors:** {authors}<br/>"
+                preamble += f"**Summary:**<br/>>\n{summary}<br/>"
+                preamble += f"👍{num_comments} 💬{num_upvotes} <br/>"
+                # Fetch the paper content
+                text = fetch_paper_content_arxiv(paper_id_value)
+                if text is None:
+                    text = "Paper content could not be retrieved."
+                return preamble, text, []
+            else:
+                return "", "", []
+        # Update paper content when paper ID changes
+        paper_id.change(
             fn=update_paper_info,
+            inputs=[paper_id, paper_from, model_dropdown],
+            outputs=[content, paper_content, chatbot]
         )
 def main():
     """
     Launches the Gradio app.
     """
     with gr.Blocks(css_paths="style.css") as demo:
+        # Create an input for paper_id
+        paper_id = gr.Textbox(label="Paper ID", value="")
+        # Create an input for paper_from (e.g., 'neurips' or 'paper_page')
+        paper_from = gr.Radio(
+            label="Paper Source",
+            choices=["neurips", "paper_page"],
+            value="neurips"
+        )
+        # Build the paper chat tab
+        paper_chat_tab(paper_id, paper_from)
     demo.launch(ssr_mode=False)