import io import os import re import tarfile import anthropic import gradio as gr import requests import arxiv def replace_texttt(text): return re.sub(r"\\texttt\{(.*?)\}", r"*\1*", text) def get_paper_info(paper_id): # Create a search query with the arXiv ID search = arxiv.Search(id_list=[paper_id]) # Fetch the paper using its arXiv ID paper = next(search.results(), None) if paper is not None: # Return the paper's title and abstract return paper.title, paper.summary else: return None, None def download_arxiv_source(paper_id): url = f"https://arxiv.org/e-print/{paper_id}" # Get the tar file response = requests.get(url) response.raise_for_status() # Open the tar file tar = tarfile.open(fileobj=io.BytesIO(response.content), mode="r") # Load all .tex files into memory, including their subdirectories tex_files = { member.name: tar.extractfile(member).read().decode("utf-8") for member in tar.getmembers() if member.name.endswith(".tex") } # Load all .tex files into memory, including their subdirectories tex_files = { member.name: tar.extractfile(member).read().decode("utf-8") for member in tar.getmembers() if member.isfile() and member.name.endswith(".tex") } # Pattern to match \input{filename} and \include{filename} pattern = re.compile(r"\\(input|include){(.*?)}") # Function to replace \input{filename} and \include{filename} with file contents def replace_includes(text): output = [] for line in text.split("\n"): match = re.search(pattern, line) if match: command, filename = match.groups() # LaTeX automatically adds .tex extension for \input and \include commands if not filename.endswith(".tex"): filename += ".tex" if filename in tex_files: output.append(replace_includes(tex_files[filename])) else: output.append(f"% {line} % FILE NOT FOUND") else: output.append(line) return "\n".join(output) if "main.tex" in tex_files: # Start with the contents of main.tex main_tex = replace_includes(tex_files["main.tex"]) else: # No main.tex, concatenate all .tex files main_tex = "\n".join(replace_includes(text) for text in tex_files.values()) return main_tex class ContextualQA: def __init__(self, client, model="claude-v1.3-100k"): self.client = client self.model = model self.context = "" self.questions = [] self.responses = [] def load_text(self, text): self.context = text def ask_question(self, question): leading_prompt = "Here is the content of a paper:" trailing_prompt = "Now, answer the following question below. You can optionally use Markdown to format your answer." prompt = f"{anthropic.HUMAN_PROMPT} {leading_prompt}\n\n{self.context}\n\n{trailing_prompt}\n\n{anthropic.HUMAN_PROMPT} {question}\n\n{anthropic.AI_PROMPT}" response = self.client.completion_stream( prompt=prompt, stop_sequences=[anthropic.HUMAN_PROMPT], max_tokens_to_sample=6000, model=self.model, stream=False, ) responses = [data for data in response] self.questions.append(question) self.responses.append(responses) return responses def clear_context(self): self.context = "" self.questions = [] self.responses = [] def __getstate__(self): state = self.__dict__.copy() del state["client"] return state def __setstate__(self, state): self.__dict__.update(state) self.client = None def load_context(paper_id): try: latex_source = download_arxiv_source(paper_id) except Exception as e: return None, [(f"Error loading paper with id {paper_id}.", str(e))] client = anthropic.Client(api_key=os.environ["ANTHROPIC_API_KEY"]) model = ContextualQA(client, model="claude-v1.3-100k") model.load_text(latex_source) # Usage title, abstract = get_paper_info(paper_id) # remove special symbols from title and abstract title = replace_texttt(title) abstract = replace_texttt(abstract) return ( model, [ ( f"Load the paper with id {paper_id}.", f"\n**Title**: {title}\n\n**Abstract**: {abstract}\n\nPaper loaded, You can now ask questions.", ) ], ) def answer_fn(model, question, chat_history): # if question is empty, tell user that they need to ask a question if question == "": chat_history.append(("No Question Asked", "Please ask a question.")) return model, chat_history, "" client = anthropic.Client(api_key=os.environ["ANTHROPIC_API_KEY"]) model.client = client try: response = model.ask_question(question) except Exception as e: chat_history.append(("Error Asking Question", str(e))) return model, chat_history, "" chat_history.append((question, response[0]["completion"])) return model, chat_history, "" def clear_context(): return [] with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown( "# Explore ArXiv Papers in Depth with `claude-v1.3-100k` - Ask Questions and Receive Detailed Answers Instantly" ) gr.Markdown( "Dive into the world of academic papers with our dynamic app, powered by the cutting-edge `claude-v1.3-100k` model. This app allows you to ask detailed questions about any ArXiv paper and receive direct answers from the paper's content. Utilizing a context length of 100k tokens, it provides an efficient and comprehensive exploration of complex research studies, making knowledge acquisition simpler and more interactive. (This text is generated by GPT-4 )" ) gr.HTML( """