File size: 2,585 Bytes
cf71fc7
 
 
 
 
fb277c7
cf71fc7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b72813c
cf71fc7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from langchain.chat_models import ChatAnthropic
import gradio as gr
import requests
from bs4 import BeautifulSoup
from langchain.schema import AIMessage, HumanMessage, SystemMessage
import os

def summarize_with_anthropic(article, api_key, summary_prompt):
    chat = ChatAnthropic(
        anthropic_api_key=api_key,
        model="claude-2",
        max_tokens_to_sample=1024,
        temperature=0,
    )
    prompt_content = f"{summary_prompt}\n\n<article>{article}</article>\n\n"
    messages = [HumanMessage(content=prompt_content)]
    response = chat(messages)
    return response.content


def modify_url_to_target(url):
    if "arxiv.org/pdf" in url:
        modified_url = url.replace("arxiv.org/pdf", "arxiv.org/abs")
    elif "arxiv.org/abs" in url:
        modified_url = url.replace("arxiv.org/abs", "ar5iv.labs.arxiv.org/html")
    else:
        modified_url = url
    return modified_url


def scrape_article_from_url(url):
    modified_url = modify_url_to_target(url)
    response = requests.get(modified_url, allow_redirects=True)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    if "arxiv.org/abs" in response.url:
        abstract_section = soup.find('blockquote', {'class': 'abstract'})
        if abstract_section:
            abstract = abstract_section.text.strip().replace("Abstract:  ", "")
            return abstract, modified_url
        else:
            return "Abstract not found.", modified_url
    return soup.get_text(), modified_url


def summarize_from_url(
    article_url,
    summary_prompt,
    anthropic_key=None,
):
    if not anthropic_key:
        anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
    article_content, summarized_url = scrape_article_from_url(article_url)
    summary = summarize_with_anthropic(article_content, anthropic_key, summary_prompt)
    return summary, summarized_url

def main_interface():
    gr_interface = gr.Interface(
        fn=summarize_from_url,
        inputs=[
            gr.Textbox(placeholder="Enter Article URL", label="arXiv URL"),
            gr.Textbox(value="与えた文章に対する要約を自然な日本語で箇条書きで出力せよ。Do not say anything else.", label="Summary Prompt"),
            gr.Textbox(
                placeholder="Enter Anthropic API Key", label="Anthropic API Key"
            ),
        ],
        outputs=[
            gr.Textbox(label="Summary"),
            gr.Textbox(label="Summarized URL")
        ],
        title="arXiv Summarizer",
    )

    gr_interface.launch(debug=True)

if __name__ == "__main__":
    main_interface()