taesiri commited on
Commit
affe617
β€’
1 Parent(s): 658ce61

Initial Commit

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +177 -0
  3. requirements.txt +0 -0
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: ClaudeReadsArxiv
3
  emoji: πŸŒ–
4
  colorFrom: purple
5
  colorTo: yellow
 
1
  ---
2
+ title: Claude Reads Arxiv
3
  emoji: πŸŒ–
4
  colorFrom: purple
5
  colorTo: yellow
app.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ import re
4
+ import tarfile
5
+
6
+ import anthropic
7
+ import gradio as gr
8
+ import matplotlib.pyplot as plt
9
+ import numpy as np
10
+ import openai
11
+ import pandas as pd
12
+ import requests
13
+ import seaborn as sns
14
+ from tqdm import tqdm
15
+
16
+
17
+ def download_arxiv_source(paper_id):
18
+ url = f"https://arxiv.org/e-print/{paper_id}"
19
+
20
+ # Get the tar file
21
+ response = requests.get(url)
22
+ response.raise_for_status()
23
+
24
+ # Open the tar file
25
+ tar = tarfile.open(fileobj=io.BytesIO(response.content), mode="r")
26
+
27
+ # Load all .tex files into memory, including their subdirectories
28
+ tex_files = {
29
+ member.name: tar.extractfile(member).read().decode("utf-8")
30
+ for member in tar.getmembers()
31
+ if member.name.endswith(".tex")
32
+ }
33
+
34
+ # Pattern to match \input{filename} and \include{filename}
35
+ pattern = re.compile(r"\\(input|include){(.*?)}")
36
+
37
+ # Function to replace \input{filename} and \include{filename} with file contents
38
+ def replace_includes(text):
39
+ output = []
40
+ for line in text.split("\n"):
41
+ match = re.search(pattern, line)
42
+ if match:
43
+ command, filename = match.groups()
44
+ # LaTeX automatically adds .tex extension for \include command
45
+ if command == "include":
46
+ filename += ".tex"
47
+ if filename in tex_files:
48
+ output.append(replace_includes(tex_files[filename]))
49
+ else:
50
+ output.append(f"% {line} % FILE NOT FOUND")
51
+ else:
52
+ output.append(line)
53
+ return "\n".join(output)
54
+
55
+ if "main.tex" in tex_files:
56
+ # Start with the contents of main.tex
57
+ main_tex = replace_includes(tex_files["main.tex"])
58
+ else:
59
+ # No main.tex, concatenate all .tex files
60
+ main_tex = "\n".join(replace_includes(text) for text in tex_files.values())
61
+
62
+ return main_tex
63
+
64
+
65
+ class ContextualQA:
66
+ def __init__(self, client, model="claude-v1.3-100k"):
67
+ self.client = client
68
+ self.model = model
69
+ self.context = ""
70
+ self.questions = []
71
+ self.responses = []
72
+
73
+ def load_text(self, text):
74
+ self.context = text
75
+
76
+ def ask_question(self, question):
77
+ leading_prompt = "Consider the text document below:"
78
+ trailing_prompt = (
79
+ "Now answer the following question, use Markdown to format your answer."
80
+ )
81
+ prompt = f"{anthropic.HUMAN_PROMPT} {leading_prompt}\n\n{self.context}\n\n{trailing_prompt}\n\n{anthropic.HUMAN_PROMPT} {question} {anthropic.AI_PROMPT}"
82
+ response = self.client.completion_stream(
83
+ prompt=prompt,
84
+ stop_sequences=[anthropic.HUMAN_PROMPT],
85
+ max_tokens_to_sample=6000,
86
+ model=self.model,
87
+ stream=False,
88
+ )
89
+ responses = [data for data in response]
90
+ self.questions.append(question)
91
+ self.responses.append(responses)
92
+ return responses
93
+
94
+ def clear_context(self):
95
+ self.context = ""
96
+ self.questions = []
97
+ self.responses = []
98
+
99
+ def __getstate__(self):
100
+ state = self.__dict__.copy()
101
+ del state["client"]
102
+ return state
103
+
104
+ def __setstate__(self, state):
105
+ self.__dict__.update(state)
106
+ self.client = None
107
+
108
+
109
+ client = anthropic.Client(api_key=os.environ["ANTHROPIC_API_KEY"])
110
+
111
+
112
+ def load_context(paper_id):
113
+ latex_source = download_arxiv_source(paper_id)
114
+ model = ContextualQA(client, model="claude-v1.3-100k")
115
+ model.load_text(latex_source)
116
+ return (
117
+ model,
118
+ [(f"Load the paper with id {paper_id}.", "Paper loaded, Now ask a question.")],
119
+ )
120
+
121
+
122
+ def answer_fn(model, question, chat_history):
123
+ # if question is empty, tell user that they need to ask a question
124
+ if question == "":
125
+ chat_history.append(("No Question Asked", "Please ask a question."))
126
+ return model, chat_history, ""
127
+
128
+ response = model.ask_question(question)
129
+
130
+ chat_history.append((question, response[0]["completion"]))
131
+ return model, chat_history, ""
132
+
133
+
134
+ def clear_context():
135
+ return []
136
+
137
+
138
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
139
+ gr.Markdown(
140
+ "# Explore ArXiv Papers in Depth with `claude-v1.3-100k` - Ask Questions and Receive Detailed Answers Instantly"
141
+ )
142
+ gr.Markdown(
143
+ "Dive into the world of academic papers with our dynamic app, powered by the cutting-edge `claude-v1.3-100k` model. This app allows you to ask detailed questions about any ArXiv paper and receive direct answers from the paper's content. Utilizing a context length of 100k tokens, it provides an efficient and comprehensive exploration of complex research studies, making knowledge acquisition simpler and more interactive. (This text is generated by GPT-4 )"
144
+ )
145
+
146
+ with gr.Column():
147
+ with gr.Row():
148
+ paper_id_input = gr.Textbox(label="Enter Paper ID", value="2303.10130")
149
+ btn_load = gr.Button("Load Paper")
150
+ qa_model = gr.State()
151
+
152
+ with gr.Column():
153
+ chatbot = gr.Chatbot().style(color_map=("blue", "yellow"))
154
+ question_txt = gr.Textbox(
155
+ label="Question", lines=1, placeholder="Type your question here..."
156
+ )
157
+ btn_answer = gr.Button("Answer Question")
158
+
159
+ btn_clear = gr.Button("Clear Chat")
160
+
161
+ btn_load.click(load_context, inputs=[paper_id_input], outputs=[qa_model, chatbot])
162
+
163
+ btn_answer.click(
164
+ answer_fn,
165
+ inputs=[qa_model, question_txt, chatbot],
166
+ outputs=[qa_model, chatbot, question_txt],
167
+ )
168
+
169
+ question_txt.submit(
170
+ answer_fn,
171
+ inputs=[qa_model, question_txt, chatbot],
172
+ outputs=[qa_model, chatbot, question_txt],
173
+ )
174
+
175
+ btn_clear.click(clear_context, outputs=[chatbot])
176
+
177
+ demo.launch()
requirements.txt ADDED
File without changes