KingNish commited on
Commit
c87c622
·
verified ·
1 Parent(s): 8dc1546

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -0
app.py CHANGED
@@ -73,6 +73,12 @@ def read_document(file):
73
  except Exception as e:
74
  return f"Error reading file: {e}"
75
 
 
 
 
 
 
 
76
  def chat_document(file, question):
77
  content = str(read_document(file))
78
  if len(content) > 32000:
@@ -103,6 +109,58 @@ def chat_document(file, question):
103
  yield output
104
 
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  with gr.Blocks() as demo:
107
  with gr.Tabs():
108
  with gr.TabItem("Document Reader"):
@@ -121,5 +179,13 @@ with gr.Blocks() as demo:
121
  title="Document Chat",
122
  description="Upload a document and ask questions about its content."
123
  )
 
 
 
 
 
 
 
 
124
 
125
  demo.launch()
 
73
  except Exception as e:
74
  return f"Error reading file: {e}"
75
 
76
+ def split_content(content, chunk_size=32000):
77
+ chunks = []
78
+ for i in range(0, len(content), chunk_size):
79
+ chunks.append(content[i:i + chunk_size])
80
+ return chunks
81
+
82
  def chat_document(file, question):
83
  content = str(read_document(file))
84
  if len(content) > 32000:
 
109
  yield output
110
 
111
 
112
+ def chat_document_v2(file, question):
113
+ content = str(read_document(file))
114
+ content = content.replace('\n', ' ')
115
+ content = content.replace('\r', ' ')
116
+ content = content.replace('\t', ' ')
117
+ content = content.strip()
118
+
119
+ chunks = split_content(content)
120
+
121
+ # Define system prompt for the chat API
122
+ system_prompt = """
123
+ You are a helpful and informative assistant that can answer questions based on the content of documents.
124
+ You will receive the content of a document and a question about it.
125
+ Your task is to provide a concise and accurate answer to the question based solely on the provided document content.
126
+ If the document does not contain enough information to answer the question, simply state that you cannot answer the question based on the provided information.
127
+ """
128
+
129
+ all_answers = []
130
+ for chunk in chunks:
131
+ message = f"""[INST] [SYSTEM] {system_prompt}
132
+ Document Content: {chunk[:32000]}
133
+ Question: {question}
134
+ Answer:"""
135
+
136
+ stream = client.text_generation(message, max_new_tokens=4096, stream=True, details=True, return_full_text=False)
137
+ output = ""
138
+ for response in stream:
139
+ if not response.token.text == "</s>":
140
+ output += response.token.text
141
+ all_answers.append(output)
142
+
143
+ # Summarize all answers using Mistral
144
+ summary_prompt = """
145
+ You are a helpful and informative assistant that can summarize multiple answers related to the same question.
146
+ You will receive a list of answers to a question, and your task is to generate a concise and comprehensive summary that incorporates the key information from all the answers.
147
+ Avoid repeating information unnecessarily and focus on providing the most relevant and accurate summary based on the provided answers.
148
+
149
+ Answers:
150
+ """
151
+
152
+ all_answers_str = "\n".join(all_answers)
153
+ summary_message = f"""[INST] {summary_prompt}
154
+ {all_answers_str[:30000]}
155
+ Summary:"""
156
+
157
+ stream = client.text_generation(summary_message, max_new_tokens=4096, stream=True, details=True, return_full_text=False)
158
+ output = ""
159
+ for response in stream:
160
+ if not response.token.text == "</s>":
161
+ output += response.token.text
162
+ yield output
163
+
164
  with gr.Blocks() as demo:
165
  with gr.Tabs():
166
  with gr.TabItem("Document Reader"):
 
179
  title="Document Chat",
180
  description="Upload a document and ask questions about its content."
181
  )
182
+ with gr.TabItem("Document Chat V2"):
183
+ iface3 = gr.Interface(
184
+ fn=chat_document_v2,
185
+ inputs=[gr.File(label="Upload a Document"), gr.Textbox(label="Question")],
186
+ outputs=gr.Textbox(label="Answer"),
187
+ title="Document Chat V2",
188
+ description="Upload a document and ask questions about its content (using chunk-based approach)."
189
+ )
190
 
191
  demo.launch()