Spaces:

firefighter
/

PdfSumGPT

Running

Qifan Zhang commited on Mar 5, 2023

Commit

806d7c6

1 Parent(s): 17719d2

complete version 1.0

Files changed (5) hide show

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .idea/

app.py ADDED Viewed

+import gradio as gr
+from utils.chatgpt import ChatGPTAPI
+from utils.read_pdf import read_pdf
+def process(api_key: str = '', prompt: str = '', file=None) -> str:
+    chatgpt = ChatGPTAPI(api_key, max_input_length=1024)
+    pdf_contents = read_pdf(file.name)
+    pdf_str = '\n'.join(pdf_contents)
+    content = prompt + '\n' + pdf_str
+    response = chatgpt(content)
+    return response
+gr.Interface(fn=process, inputs=["text", "text", "file"], outputs="text").launch()

requirements.txt ADDED Viewed

+openai
+gradio
+pypdf

utils/chatgpt.py ADDED Viewed

+import openai
+import tiktoken
+class ChatGPTAPI:
+    def __init__(self, api_key='', max_input_length=1024):
+        if not api_key:
+            try:
+                api_key = open('data/api_key.txt', 'r').read()
+            except Exception as e:
+                raise Exception(f'ChatGPT Error: No API key provided {e}')
+        openai.api_key = api_key
+        self.max_input_length = max_input_length
+        self.encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
+    def truncate_string(self, s):
+        e = self.encoding.encode(s)[:self.max_input_length]
+        s = self.encoding.decode(e)
+        return s
+    def __call__(self, content: str):
+        assert isinstance(content, str), 'ChatGPT Error: content must be a string'
+        content = content.strip()
+        content = self.truncate_string(content)
+        messages = [{'role': 'user', 'content': content}]
+        try:
+            resp = openai.ChatCompletion.create(
+                model="gpt-3.5-turbo",
+                messages=messages
+            )
+            output: str = resp['choices'][0]['message']['content']
+            output = output.strip()
+        except Exception as e:
+            raise Exception(f'ChatGPT Error: {e}')
+        return output
+if __name__ == '__main__':
+    chatgpt = ChatGPTAPI()
+    r = chatgpt.truncate_string('how are you ' * 10000)
+    r_list = r.split(' ')
+    # response = chatgpt('Hello, how are you?')
+    # print(response)

utils/read_pdf.py ADDED Viewed

+import pypdf
+def read_pdf(filepath) -> list[str]:
+    outputs = []
+    with open(filepath, 'rb') as f:
+        pdf_reader = pypdf.PdfReader(f)
+        for page in pdf_reader.pages:
+            outputs.append(page.extract_text())
+    return outputs
+if __name__ == '__main__':
+    r = read_pdf('data/109-411-2-PB.pdf')
+    print(r)