arxivsummary / app.py
schroneko's picture
fix: app.py
b72813c
from langchain.chat_models import ChatAnthropic
import gradio as gr
import requests
from bs4 import BeautifulSoup
from langchain.schema import AIMessage, HumanMessage, SystemMessage
import os
def summarize_with_anthropic(article, api_key, summary_prompt):
chat = ChatAnthropic(
anthropic_api_key=api_key,
model="claude-2",
max_tokens_to_sample=1024,
temperature=0,
)
prompt_content = f"{summary_prompt}\n\n<article>{article}</article>\n\n"
messages = [HumanMessage(content=prompt_content)]
response = chat(messages)
return response.content
def modify_url_to_target(url):
if "arxiv.org/pdf" in url:
modified_url = url.replace("arxiv.org/pdf", "arxiv.org/abs")
elif "arxiv.org/abs" in url:
modified_url = url.replace("arxiv.org/abs", "ar5iv.labs.arxiv.org/html")
else:
modified_url = url
return modified_url
def scrape_article_from_url(url):
modified_url = modify_url_to_target(url)
response = requests.get(modified_url, allow_redirects=True)
soup = BeautifulSoup(response.content, 'html.parser')
if "arxiv.org/abs" in response.url:
abstract_section = soup.find('blockquote', {'class': 'abstract'})
if abstract_section:
abstract = abstract_section.text.strip().replace("Abstract: ", "")
return abstract, modified_url
else:
return "Abstract not found.", modified_url
return soup.get_text(), modified_url
def summarize_from_url(
article_url,
summary_prompt,
anthropic_key=None,
):
if not anthropic_key:
anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
article_content, summarized_url = scrape_article_from_url(article_url)
summary = summarize_with_anthropic(article_content, anthropic_key, summary_prompt)
return summary, summarized_url
def main_interface():
gr_interface = gr.Interface(
fn=summarize_from_url,
inputs=[
gr.Textbox(placeholder="Enter Article URL", label="arXiv URL"),
gr.Textbox(value="与えた文章に対する要約を自然な日本語で箇条書きで出力せよ。Do not say anything else.", label="Summary Prompt"),
gr.Textbox(
placeholder="Enter Anthropic API Key", label="Anthropic API Key"
),
],
outputs=[
gr.Textbox(label="Summary"),
gr.Textbox(label="Summarized URL")
],
title="arXiv Summarizer",
)
gr_interface.launch(debug=True)
if __name__ == "__main__":
main_interface()