jbdel commited on
Commit
dbdfc66
1 Parent(s): 64632c4

chat paper

Browse files
Files changed (3) hide show
  1. app.py +37 -2
  2. df/PaperCentral.py +21 -3
  3. paper_chat_tab.py +104 -48
app.py CHANGED
@@ -8,6 +8,7 @@ from pr_paper_central_tab import pr_paper_central_tab
8
  from huggingface_hub import whoami
9
  import json
10
  import requests
 
11
 
12
  from author_leaderboard_contrib_tab import author_resource_leaderboard_tab
13
  from paper_chat_tab import paper_chat_tab
@@ -189,7 +190,41 @@ with gr.Blocks(css_paths="style.css") as demo:
189
  with gr.Tab("Chat With Paper", id="tab-chat-with-paper", visible=False) as tab_chat_paper:
190
  gr.Markdown("## Chat with Paper")
191
  arxiv_id = gr.State(value=None)
192
- paper_chat_tab(arxiv_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
 
195
  # Define function to move to the next day
@@ -546,7 +581,7 @@ def main():
546
  """
547
  Launches the Gradio app.
548
  """
549
- demo.launch(ssr_mode=False)
550
 
551
 
552
  # Run the main function when the script is executed
 
8
  from huggingface_hub import whoami
9
  import json
10
  import requests
11
+ from bs4 import BeautifulSoup
12
 
13
  from author_leaderboard_contrib_tab import author_resource_leaderboard_tab
14
  from paper_chat_tab import paper_chat_tab
 
190
  with gr.Tab("Chat With Paper", id="tab-chat-with-paper", visible=False) as tab_chat_paper:
191
  gr.Markdown("## Chat with Paper")
192
  arxiv_id = gr.State(value=None)
193
+ paper_from = gr.State(value=None)
194
+ paper_chat_tab(arxiv_id, paper_from)
195
+
196
+
197
+ # chat with paper
198
+ def get_selected(evt: gr.SelectData, dataframe_origin):
199
+
200
+ paper_id = gr.update(value=None)
201
+ paper_from = gr.update(value=None)
202
+ tab_chat_paper = gr.update(visible=False)
203
+ selected_tab = gr.Tabs()
204
+
205
+ try:
206
+ # Parse the HTML content
207
+ soup = BeautifulSoup(evt.value, "html.parser")
208
+
209
+ # Find all <a> tags
210
+ a_tags = soup.find_all('a')
211
+ for a_tag in a_tags:
212
+ # Check if 'action_id' attribute exists and equals 'chat-with-paper'
213
+ if a_tag.get('action_id') == 'chat-with-paper':
214
+ paper_id = a_tag.get("paper_id")
215
+ paper_from = a_tag.get("paper_from")
216
+ tab_chat_paper = gr.update(visible=True)
217
+ selected_tab = gr.Tabs(selected="tab-chat-with-paper")
218
+
219
+ except Exception as e:
220
+ print("The content is not valid HTML or another error occurred:", str(e))
221
+ pass
222
+
223
+ return paper_id, paper_from, tab_chat_paper, selected_tab
224
+
225
+
226
+ paper_central_component.select(get_selected, inputs=[paper_central_component],
227
+ outputs=[arxiv_id, paper_from, tab_chat_paper, tabs])
228
 
229
 
230
  # Define function to move to the next day
 
581
  """
582
  Launches the Gradio app.
583
  """
584
+ demo.launch(ssr_mode=False, share=True)
585
 
586
 
587
  # Run the main function when the script is executed
df/PaperCentral.py CHANGED
@@ -17,6 +17,7 @@ import numpy as np
17
  from datetime import datetime, timedelta
18
  import re
19
 
 
20
  class PaperCentral:
21
  """
22
  A class to manage and process paper data for display in a Gradio Dataframe component.
@@ -450,6 +451,20 @@ class PaperCentral:
450
  columns_to_show.append('project_page')
451
  filtered_df = filtered_df[(filtered_df['project_page'] != "") & (filtered_df['project_page'].notnull())]
452
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
  # Apply conference filtering
454
  if conference_options:
455
  columns_to_show = [col for col in columns_to_show if col not in ["date", "arxiv_id"]]
@@ -478,17 +493,20 @@ class PaperCentral:
478
  )
479
  filtered_df = filtered_df[conference_filter]
480
 
 
481
  if any(conf in ["NeurIPS2024 D&B", "NeurIPS2024"] for conf in conference_options):
482
- def create_chat_link(row):
483
  neurips_id = re.search(r'id=([^&]+)', row["proceedings"])
484
  if neurips_id:
485
  neurips_id = neurips_id.group(1)
486
- return f'<a href="/?tab=tab-chat-with-paper&paper_id={neurips_id}" id="custom_button" target="_blank">✨ Chat with paper</a>'
 
 
487
  else:
488
  return ""
489
 
490
  # Add the "chat_with_paper" column
491
- filtered_df['chat_with_paper'] = filtered_df.apply(create_chat_link, axis=1)
492
  if 'chat_with_paper' not in columns_to_show:
493
  columns_to_show.append('chat_with_paper')
494
 
 
17
  from datetime import datetime, timedelta
18
  import re
19
 
20
+
21
  class PaperCentral:
22
  """
23
  A class to manage and process paper data for display in a Gradio Dataframe component.
 
451
  columns_to_show.append('project_page')
452
  filtered_df = filtered_df[(filtered_df['project_page'] != "") & (filtered_df['project_page'].notnull())]
453
 
454
+ # create chat link
455
+ def create_chat_link(row):
456
+ if pd.notna(row["paper_page"]) and row["paper_page"] != "":
457
+ paper_id = row["paper_page"]
458
+ return f'<a' \
459
+ f' action_id="chat-with-paper" paper_id="{paper_id}" paper_from="paper_page"' \
460
+ f' id="custom_button">✨ Chat with paper</a>'
461
+ return ""
462
+
463
+ filtered_df['chat_with_paper'] = filtered_df.apply(create_chat_link, axis=1)
464
+
465
+ if 'chat_with_paper' not in columns_to_show:
466
+ columns_to_show.append('chat_with_paper')
467
+
468
  # Apply conference filtering
469
  if conference_options:
470
  columns_to_show = [col for col in columns_to_show if col not in ["date", "arxiv_id"]]
 
493
  )
494
  filtered_df = filtered_df[conference_filter]
495
 
496
+ # conference chat with paper
497
  if any(conf in ["NeurIPS2024 D&B", "NeurIPS2024"] for conf in conference_options):
498
+ def create_chat_neurips_link(row):
499
  neurips_id = re.search(r'id=([^&]+)', row["proceedings"])
500
  if neurips_id:
501
  neurips_id = neurips_id.group(1)
502
+ return f'<a' \
503
+ f' action_id="chat-with-paper" paper_id={neurips_id} paper_from="neurips"' \
504
+ f' id="custom_button">✨ Chat with paper</a>'
505
  else:
506
  return ""
507
 
508
  # Add the "chat_with_paper" column
509
+ filtered_df['chat_with_paper'] = filtered_df.apply(create_chat_neurips_link, axis=1)
510
  if 'chat_with_paper' not in columns_to_show:
511
  columns_to_show.append('chat_with_paper')
512
 
paper_chat_tab.py CHANGED
@@ -78,6 +78,30 @@ def fetch_paper_info_neurips(paper_id):
78
  return preamble
79
 
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  def fetch_paper_content(paper_id):
82
  try:
83
  # Construct the URL
@@ -230,25 +254,26 @@ def create_chat_interface(provider_dropdown, model_dropdown, paper_content, hf_t
230
  print(f"An unexpected error occurred: {ex}")
231
  yield f"{ex}"
232
 
 
 
 
 
 
 
 
 
233
  # Create the ChatInterface
234
  chat_interface = gr.ChatInterface(
235
  fn=get_fn,
236
- chatbot=gr.Chatbot(
237
- label="Chatbot",
238
- scale=1,
239
- height=400,
240
- autoscroll=True,
241
- ),
242
  additional_inputs=[paper_content, hf_token_input, provider_dropdown, model_dropdown, provider_max_total_tokens],
243
  type="tuples",
244
  )
245
- return chat_interface
246
 
247
 
248
- def paper_chat_tab(paper_id):
249
  with gr.Column():
250
- # Textbox to display the paper title and authors
251
- content = gr.Markdown(value="")
252
 
253
  # Preamble message to hint the user
254
  gr.Markdown("**Note:** Providing your own API token can help you avoid rate limits.")
@@ -290,6 +315,14 @@ def paper_chat_tab(paper_id):
290
  # State to store the paper content
291
  paper_content = gr.State()
292
 
 
 
 
 
 
 
 
 
293
  # Function to update models and logo when provider changes
294
  def update_provider(selected_provider):
295
  provider_info = PROVIDERS[selected_provider]
@@ -314,63 +347,86 @@ def paper_chat_tab(paper_id):
314
  placeholder=f"Enter your {selected_provider} API token to avoid rate limits"
315
  )
316
 
317
- return model_dropdown_choices, logo_html_update, note_markdown_update, hf_token_input_update, chatbot_message_type, max_total_tokens
 
 
 
318
 
319
  provider_dropdown.change(
320
  fn=update_provider,
321
  inputs=provider_dropdown,
322
- outputs=[model_dropdown, logo_html, note_markdown, hf_token_input, default_type, default_max_total_tokens],
 
323
  queue=False
324
  )
325
 
326
  # Function to update the paper info
327
- def update_paper_info(paper_id_value, selected_model):
328
- preamble = fetch_paper_info_neurips(paper_id_value)
329
- text = fetch_paper_content(paper_id_value)
330
- if preamble is None:
331
- preamble = "Paper not found or could not retrieve paper information."
332
- if text is None:
333
- return preamble, None
334
- return preamble, text
335
-
336
- # Update paper content when paper ID or model changes
337
- paper_id.change(
338
- fn=update_paper_info,
339
- inputs=[paper_id, model_dropdown],
340
- outputs=[content, paper_content]
341
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
 
343
- model_dropdown.change(
 
344
  fn=update_paper_info,
345
- inputs=[paper_id, model_dropdown],
346
- outputs=[content, paper_content],
347
- queue=False,
348
  )
349
 
350
- # Create the chat interface
351
- chat_interface = create_chat_interface(provider_dropdown, model_dropdown, paper_content, hf_token_input,
352
- default_type, default_max_total_tokens)
353
-
354
 
355
  def main():
356
  """
357
  Launches the Gradio app.
358
  """
359
  with gr.Blocks(css_paths="style.css") as demo:
360
- x = gr.State(value="") # Initialize with an empty state
361
-
362
- def update_state():
363
- """
364
- Function to update the state.
365
- """
366
- return "5G7ve8E1Lu"
367
-
368
- with gr.Row():
369
- update_button = gr.Button("Update State") # Button to update the state
370
 
371
- # Update the state and reflect the change in the display
372
- update_button.click(update_state, inputs=[], outputs=[x])
373
- paper_chat_tab(x)
374
 
375
  demo.launch(ssr_mode=False)
376
 
 
78
  return preamble
79
 
80
 
81
+ def fetch_paper_content_arxiv(paper_id):
82
+ try:
83
+ # Construct the URL for the arXiv PDF
84
+ url = f"https://arxiv.org/pdf/{paper_id}.pdf"
85
+
86
+ # Fetch the PDF
87
+ response = requests.get(url)
88
+ response.raise_for_status() # Raise an exception for HTTP errors
89
+
90
+ # Read the PDF content
91
+ pdf_content = BytesIO(response.content)
92
+ reader = PdfReader(pdf_content)
93
+
94
+ # Extract text from the PDF
95
+ text = ""
96
+ for page in reader.pages:
97
+ text += page.extract_text()
98
+
99
+ return text # Return full text; truncation will be handled later
100
+ except Exception as e:
101
+ print(f"Error fetching paper content: {e}")
102
+ return None
103
+
104
+
105
  def fetch_paper_content(paper_id):
106
  try:
107
  # Construct the URL
 
254
  print(f"An unexpected error occurred: {ex}")
255
  yield f"{ex}"
256
 
257
+ # Create the Chatbot separately to access it later
258
+ chatbot = gr.Chatbot(
259
+ label="Chatbot",
260
+ scale=1,
261
+ height=400,
262
+ autoscroll=True,
263
+ )
264
+
265
  # Create the ChatInterface
266
  chat_interface = gr.ChatInterface(
267
  fn=get_fn,
268
+ chatbot=chatbot,
 
 
 
 
 
269
  additional_inputs=[paper_content, hf_token_input, provider_dropdown, model_dropdown, provider_max_total_tokens],
270
  type="tuples",
271
  )
272
+ return chat_interface, chatbot
273
 
274
 
275
+ def paper_chat_tab(paper_id, paper_from):
276
  with gr.Column():
 
 
277
 
278
  # Preamble message to hint the user
279
  gr.Markdown("**Note:** Providing your own API token can help you avoid rate limits.")
 
315
  # State to store the paper content
316
  paper_content = gr.State()
317
 
318
+ # Textbox to display the paper title and authors
319
+ content = gr.Markdown(value="")
320
+
321
+ # Create the chat interface and get the chatbot component
322
+ chat_interface, chatbot = create_chat_interface(provider_dropdown, model_dropdown, paper_content,
323
+ hf_token_input,
324
+ default_type, default_max_total_tokens)
325
+
326
  # Function to update models and logo when provider changes
327
  def update_provider(selected_provider):
328
  provider_info = PROVIDERS[selected_provider]
 
347
  placeholder=f"Enter your {selected_provider} API token to avoid rate limits"
348
  )
349
 
350
+ # Reset the chatbot history
351
+ chatbot_reset = [] # This resets the chatbot conversation
352
+
353
+ return model_dropdown_choices, logo_html_update, note_markdown_update, hf_token_input_update, chatbot_message_type, max_total_tokens, chatbot_reset
354
 
355
  provider_dropdown.change(
356
  fn=update_provider,
357
  inputs=provider_dropdown,
358
+ outputs=[model_dropdown, logo_html, note_markdown, hf_token_input, default_type, default_max_total_tokens,
359
+ chatbot],
360
  queue=False
361
  )
362
 
363
  # Function to update the paper info
364
+ def update_paper_info(paper_id_value, paper_from_value, selected_model):
365
+ if paper_from_value == "neurips":
366
+ preamble = fetch_paper_info_neurips(paper_id_value)
367
+ text = fetch_paper_content(paper_id_value)
368
+ if preamble is None:
369
+ preamble = "Paper not found or could not retrieve paper information."
370
+ if text is None:
371
+ return preamble, None, []
372
+ return preamble, text, []
373
+ elif paper_from_value == "paper_page":
374
+ # Fetch the paper information from Hugging Face API
375
+ url = f"https://huggingface.co/api/papers/{paper_id_value}?field=comments"
376
+ response = requests.get(url)
377
+ if response.status_code != 200:
378
+ return "Paper not found or could not retrieve paper information.", None, []
379
+ paper_info = response.json()
380
+
381
+ # Extract required information
382
+ title = paper_info.get('title', 'No Title')
383
+ link = f"https://huggingface.co/papers/{paper_id_value}"
384
+ authors_list = [author.get('name', 'Unknown') for author in paper_info.get('authors', [])]
385
+ authors = ', '.join(authors_list)
386
+ summary = paper_info.get('summary', 'No Summary')
387
+ num_comments = len(paper_info.get('comments', []))
388
+ num_upvotes = paper_info.get('upvotes', 0)
389
+
390
+ # Format the preamble
391
+ preamble = f"🤗 [paper-page]({link})<br/>"
392
+ preamble += f"**Title:** {title}<br/>"
393
+ preamble += f"**Authors:** {authors}<br/>"
394
+ preamble += f"**Summary:**<br/>>\n{summary}<br/>"
395
+ preamble += f"👍{num_comments} 💬{num_upvotes} <br/>"
396
+
397
+ # Fetch the paper content
398
+ text = fetch_paper_content_arxiv(paper_id_value)
399
+ if text is None:
400
+ text = "Paper content could not be retrieved."
401
+ return preamble, text, []
402
+ else:
403
+ return "", "", []
404
 
405
+ # Update paper content when paper ID changes
406
+ paper_id.change(
407
  fn=update_paper_info,
408
+ inputs=[paper_id, paper_from, model_dropdown],
409
+ outputs=[content, paper_content, chatbot]
 
410
  )
411
 
 
 
 
 
412
 
413
  def main():
414
  """
415
  Launches the Gradio app.
416
  """
417
  with gr.Blocks(css_paths="style.css") as demo:
418
+ # Create an input for paper_id
419
+ paper_id = gr.Textbox(label="Paper ID", value="")
420
+
421
+ # Create an input for paper_from (e.g., 'neurips' or 'paper_page')
422
+ paper_from = gr.Radio(
423
+ label="Paper Source",
424
+ choices=["neurips", "paper_page"],
425
+ value="neurips"
426
+ )
 
427
 
428
+ # Build the paper chat tab
429
+ paper_chat_tab(paper_id, paper_from)
 
430
 
431
  demo.launch(ssr_mode=False)
432