Spaces:
Paused
Paused
yuta_hayashi
commited on
Commit
•
cf71fc7
1
Parent(s):
86b56ff
Add application file
Browse files- app.py +76 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.chat_models import ChatAnthropic
|
2 |
+
import gradio as gr
|
3 |
+
import requests
|
4 |
+
from bs4 import BeautifulSoup
|
5 |
+
from langchain.schema import AIMessage, HumanMessage, SystemMessage
|
6 |
+
|
7 |
+
def summarize_with_anthropic(article, api_key, summary_prompt):
|
8 |
+
chat = ChatAnthropic(
|
9 |
+
anthropic_api_key=api_key,
|
10 |
+
model="claude-2",
|
11 |
+
max_tokens_to_sample=1024,
|
12 |
+
temperature=0,
|
13 |
+
)
|
14 |
+
prompt_content = f"{summary_prompt}\n\n<article>{article}</article>\n\n"
|
15 |
+
messages = [HumanMessage(content=prompt_content)]
|
16 |
+
response = chat(messages)
|
17 |
+
return response.content
|
18 |
+
|
19 |
+
|
20 |
+
def modify_url_to_target(url):
|
21 |
+
if "arxiv.org/pdf" in url:
|
22 |
+
modified_url = url.replace("arxiv.org/pdf", "arxiv.org/abs")
|
23 |
+
elif "arxiv.org/abs" in url:
|
24 |
+
modified_url = url.replace("arxiv.org/abs", "ar5iv.labs.arxiv.org/html")
|
25 |
+
else:
|
26 |
+
modified_url = url
|
27 |
+
return modified_url
|
28 |
+
|
29 |
+
|
30 |
+
def scrape_article_from_url(url):
|
31 |
+
modified_url = modify_url_to_target(url)
|
32 |
+
response = requests.get(modified_url, allow_redirects=True)
|
33 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
34 |
+
|
35 |
+
if "arxiv.org/abs" in response.url:
|
36 |
+
abstract_section = soup.find('blockquote', {'class': 'abstract'})
|
37 |
+
if abstract_section:
|
38 |
+
abstract = abstract_section.text.strip().replace("Abstract: ", "")
|
39 |
+
return abstract, modified_url
|
40 |
+
else:
|
41 |
+
return "Abstract not found.", modified_url
|
42 |
+
return soup.get_text(), modified_url
|
43 |
+
|
44 |
+
|
45 |
+
def summarize_from_url(
|
46 |
+
article_url,
|
47 |
+
summary_prompt,
|
48 |
+
anthropic_key=None,
|
49 |
+
):
|
50 |
+
if not anthropic_key:
|
51 |
+
anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
|
52 |
+
article_content, summarized_url = scrape_article_from_url(article_url)
|
53 |
+
summary = summarize_with_anthropic(article_content, anthropic_key, summary_prompt)
|
54 |
+
return summary, summarized_url
|
55 |
+
|
56 |
+
def main_interface():
|
57 |
+
gr_interface = gr.Interface(
|
58 |
+
fn=summarize_from_url,
|
59 |
+
inputs=[
|
60 |
+
gr.Textbox(placeholder="Enter Article URL", label="arXiv URL"),
|
61 |
+
gr.Textbox(value="自然な日本語で箇条書きで解説してください。", label="Summary Prompt"),
|
62 |
+
gr.Textbox(
|
63 |
+
placeholder="Enter Anthropic API Key", label="Anthropic API Key"
|
64 |
+
),
|
65 |
+
],
|
66 |
+
outputs=[
|
67 |
+
gr.Textbox(label="Summary"),
|
68 |
+
gr.Textbox(label="Summarized URL")
|
69 |
+
],
|
70 |
+
title="arXiv Summarizer",
|
71 |
+
)
|
72 |
+
|
73 |
+
gr_interface.launch(debug=True)
|
74 |
+
|
75 |
+
if __name__ == "__main__":
|
76 |
+
main_interface()
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
anthropic
|
3 |
+
gradio
|
4 |
+
pdfminer.six
|