DrishtiSharma commited on
Commit
60297bb
·
verified ·
1 Parent(s): 03e92fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -97,7 +97,7 @@ def load_docs(document_path):
97
 
98
  # Combine all pages into one text
99
  full_text = "\n".join(extracted_text)
100
- st.write(f"\ud83d\udd8d Total Cleaned Text Length: {len(full_text)} characters")
101
 
102
  # Step 2: Chunk the cleaned text
103
  text_splitter = RecursiveCharacterTextSplitter(
@@ -107,7 +107,7 @@ def load_docs(document_path):
107
  )
108
  split_docs = text_splitter.create_documents([full_text])
109
 
110
- st.write(f"\ud83d\udd0d Total Chunks After Splitting: {len(split_docs)}")
111
  for i, doc in enumerate(split_docs[:3]): # Show first 3 chunks only
112
  st.write(f"Chunk {i + 1}: {doc.page_content[:300]}...")
113
 
@@ -175,7 +175,7 @@ if __name__ == "__main__":
175
  layout="wide",
176
  initial_sidebar_state="expanded",
177
  )
178
- st.header("\ud83d\udd8a\ufe0f Patent Chat: Google Patents Chat Demo")
179
 
180
  # Input for Google Patent Link
181
  patent_link = st.text_area(
@@ -206,7 +206,7 @@ if __name__ == "__main__":
206
  # File handling
207
  pdf_path = os.path.join(tempfile.gettempdir(), f"{patent_number}.pdf")
208
  if not os.path.isfile(pdf_path):
209
- with st.spinner("\ud83d\udd10 Downloading patent file..."):
210
  try:
211
  pdf_path = download_pdf(patent_number)
212
  st.write(f"\u2705 File downloaded: {pdf_path}")
@@ -218,7 +218,7 @@ if __name__ == "__main__":
218
 
219
  # Generate PDF preview only if not already displayed
220
  if not st.session_state.get("pdf_preview_displayed", False):
221
- with st.spinner("\ud83d\uddbc\ufe0f Generating PDF preview..."):
222
  preview_image_path = preview_pdf(pdf_path, scale_factor=0.5)
223
  if preview_image_path:
224
  st.session_state.pdf_preview = preview_image_path
@@ -230,7 +230,7 @@ if __name__ == "__main__":
230
 
231
  # Load the document into the system
232
  st.session_state["loading_complete"] = False
233
- with st.spinner("\ud83d\udd04 Loading document into the system..."):
234
  try:
235
  st.session_state.chain = setup_retrieval_pipeline(
236
  pdf_path, PERSISTED_DIRECTORY, OPENAI_API_KEY
@@ -245,7 +245,7 @@ if __name__ == "__main__":
245
  st.stop()
246
 
247
  if st.session_state["loading_complete"]:
248
- st.success("\ud83d\ude80 Document successfully loaded! You can now start asking questions.")
249
 
250
  # Display previous chat messages
251
  if st.session_state.messages:
 
97
 
98
  # Combine all pages into one text
99
  full_text = "\n".join(extracted_text)
100
+ st.write(f"Total Cleaned Text Length: {len(full_text)} characters")
101
 
102
  # Step 2: Chunk the cleaned text
103
  text_splitter = RecursiveCharacterTextSplitter(
 
107
  )
108
  split_docs = text_splitter.create_documents([full_text])
109
 
110
+ st.write(f"Total Chunks After Splitting: {len(split_docs)}")
111
  for i, doc in enumerate(split_docs[:3]): # Show first 3 chunks only
112
  st.write(f"Chunk {i + 1}: {doc.page_content[:300]}...")
113
 
 
175
  layout="wide",
176
  initial_sidebar_state="expanded",
177
  )
178
+ st.header("Patent Chat: Google Patents Chat Demo")
179
 
180
  # Input for Google Patent Link
181
  patent_link = st.text_area(
 
206
  # File handling
207
  pdf_path = os.path.join(tempfile.gettempdir(), f"{patent_number}.pdf")
208
  if not os.path.isfile(pdf_path):
209
+ with st.spinner("Downloading patent file..."):
210
  try:
211
  pdf_path = download_pdf(patent_number)
212
  st.write(f"\u2705 File downloaded: {pdf_path}")
 
218
 
219
  # Generate PDF preview only if not already displayed
220
  if not st.session_state.get("pdf_preview_displayed", False):
221
+ with st.spinner("Generating PDF preview..."):
222
  preview_image_path = preview_pdf(pdf_path, scale_factor=0.5)
223
  if preview_image_path:
224
  st.session_state.pdf_preview = preview_image_path
 
230
 
231
  # Load the document into the system
232
  st.session_state["loading_complete"] = False
233
+ with st.spinner("Loading document into the system..."):
234
  try:
235
  st.session_state.chain = setup_retrieval_pipeline(
236
  pdf_path, PERSISTED_DIRECTORY, OPENAI_API_KEY
 
245
  st.stop()
246
 
247
  if st.session_state["loading_complete"]:
248
+ st.success("Document successfully loaded! You can now start asking questions.")
249
 
250
  # Display previous chat messages
251
  if st.session_state.messages: