from langchain.chat_models import ChatAnthropic import gradio as gr import requests from bs4 import BeautifulSoup from langchain.schema import AIMessage, HumanMessage, SystemMessage import os def summarize_with_anthropic(article, api_key, summary_prompt): chat = ChatAnthropic( anthropic_api_key=api_key, model="claude-2", max_tokens_to_sample=1024, temperature=0, ) prompt_content = f"{summary_prompt}\n\n
{article}
\n\n" messages = [HumanMessage(content=prompt_content)] response = chat(messages) return response.content def modify_url_to_target(url): if "arxiv.org/pdf" in url: modified_url = url.replace("arxiv.org/pdf", "arxiv.org/abs") elif "arxiv.org/abs" in url: modified_url = url.replace("arxiv.org/abs", "ar5iv.labs.arxiv.org/html") else: modified_url = url return modified_url def scrape_article_from_url(url): modified_url = modify_url_to_target(url) response = requests.get(modified_url, allow_redirects=True) soup = BeautifulSoup(response.content, 'html.parser') if "arxiv.org/abs" in response.url: abstract_section = soup.find('blockquote', {'class': 'abstract'}) if abstract_section: abstract = abstract_section.text.strip().replace("Abstract: ", "") return abstract, modified_url else: return "Abstract not found.", modified_url return soup.get_text(), modified_url def summarize_from_url( article_url, summary_prompt, anthropic_key=None, ): if not anthropic_key: anthropic_key = os.environ.get("ANTHROPIC_API_KEY") article_content, summarized_url = scrape_article_from_url(article_url) summary = summarize_with_anthropic(article_content, anthropic_key, summary_prompt) return summary, summarized_url def main_interface(): gr_interface = gr.Interface( fn=summarize_from_url, inputs=[ gr.Textbox(placeholder="Enter Article URL", label="arXiv URL"), gr.Textbox(value="与えた文章に対する要約を自然な日本語で箇条書きで出力せよ。Do not say anything else.", label="Summary Prompt"), gr.Textbox( placeholder="Enter Anthropic API Key", label="Anthropic API Key" ), ], outputs=[ gr.Textbox(label="Summary"), gr.Textbox(label="Summarized URL") ], title="arXiv Summarizer", ) gr_interface.launch(debug=True) if __name__ == "__main__": main_interface()