Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -97,7 +97,7 @@ def load_docs(document_path):
|
|
97 |
|
98 |
# Combine all pages into one text
|
99 |
full_text = "\n".join(extracted_text)
|
100 |
-
st.write(f"
|
101 |
|
102 |
# Step 2: Chunk the cleaned text
|
103 |
text_splitter = RecursiveCharacterTextSplitter(
|
@@ -107,7 +107,7 @@ def load_docs(document_path):
|
|
107 |
)
|
108 |
split_docs = text_splitter.create_documents([full_text])
|
109 |
|
110 |
-
st.write(f"
|
111 |
for i, doc in enumerate(split_docs[:3]): # Show first 3 chunks only
|
112 |
st.write(f"Chunk {i + 1}: {doc.page_content[:300]}...")
|
113 |
|
@@ -175,7 +175,7 @@ if __name__ == "__main__":
|
|
175 |
layout="wide",
|
176 |
initial_sidebar_state="expanded",
|
177 |
)
|
178 |
-
st.header("
|
179 |
|
180 |
# Input for Google Patent Link
|
181 |
patent_link = st.text_area(
|
@@ -206,7 +206,7 @@ if __name__ == "__main__":
|
|
206 |
# File handling
|
207 |
pdf_path = os.path.join(tempfile.gettempdir(), f"{patent_number}.pdf")
|
208 |
if not os.path.isfile(pdf_path):
|
209 |
-
with st.spinner("
|
210 |
try:
|
211 |
pdf_path = download_pdf(patent_number)
|
212 |
st.write(f"\u2705 File downloaded: {pdf_path}")
|
@@ -218,7 +218,7 @@ if __name__ == "__main__":
|
|
218 |
|
219 |
# Generate PDF preview only if not already displayed
|
220 |
if not st.session_state.get("pdf_preview_displayed", False):
|
221 |
-
with st.spinner("
|
222 |
preview_image_path = preview_pdf(pdf_path, scale_factor=0.5)
|
223 |
if preview_image_path:
|
224 |
st.session_state.pdf_preview = preview_image_path
|
@@ -230,7 +230,7 @@ if __name__ == "__main__":
|
|
230 |
|
231 |
# Load the document into the system
|
232 |
st.session_state["loading_complete"] = False
|
233 |
-
with st.spinner("
|
234 |
try:
|
235 |
st.session_state.chain = setup_retrieval_pipeline(
|
236 |
pdf_path, PERSISTED_DIRECTORY, OPENAI_API_KEY
|
@@ -245,7 +245,7 @@ if __name__ == "__main__":
|
|
245 |
st.stop()
|
246 |
|
247 |
if st.session_state["loading_complete"]:
|
248 |
-
st.success("
|
249 |
|
250 |
# Display previous chat messages
|
251 |
if st.session_state.messages:
|
|
|
97 |
|
98 |
# Combine all pages into one text
|
99 |
full_text = "\n".join(extracted_text)
|
100 |
+
st.write(f"Total Cleaned Text Length: {len(full_text)} characters")
|
101 |
|
102 |
# Step 2: Chunk the cleaned text
|
103 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
107 |
)
|
108 |
split_docs = text_splitter.create_documents([full_text])
|
109 |
|
110 |
+
st.write(f"Total Chunks After Splitting: {len(split_docs)}")
|
111 |
for i, doc in enumerate(split_docs[:3]): # Show first 3 chunks only
|
112 |
st.write(f"Chunk {i + 1}: {doc.page_content[:300]}...")
|
113 |
|
|
|
175 |
layout="wide",
|
176 |
initial_sidebar_state="expanded",
|
177 |
)
|
178 |
+
st.header("Patent Chat: Google Patents Chat Demo")
|
179 |
|
180 |
# Input for Google Patent Link
|
181 |
patent_link = st.text_area(
|
|
|
206 |
# File handling
|
207 |
pdf_path = os.path.join(tempfile.gettempdir(), f"{patent_number}.pdf")
|
208 |
if not os.path.isfile(pdf_path):
|
209 |
+
with st.spinner("Downloading patent file..."):
|
210 |
try:
|
211 |
pdf_path = download_pdf(patent_number)
|
212 |
st.write(f"\u2705 File downloaded: {pdf_path}")
|
|
|
218 |
|
219 |
# Generate PDF preview only if not already displayed
|
220 |
if not st.session_state.get("pdf_preview_displayed", False):
|
221 |
+
with st.spinner("Generating PDF preview..."):
|
222 |
preview_image_path = preview_pdf(pdf_path, scale_factor=0.5)
|
223 |
if preview_image_path:
|
224 |
st.session_state.pdf_preview = preview_image_path
|
|
|
230 |
|
231 |
# Load the document into the system
|
232 |
st.session_state["loading_complete"] = False
|
233 |
+
with st.spinner("Loading document into the system..."):
|
234 |
try:
|
235 |
st.session_state.chain = setup_retrieval_pipeline(
|
236 |
pdf_path, PERSISTED_DIRECTORY, OPENAI_API_KEY
|
|
|
245 |
st.stop()
|
246 |
|
247 |
if st.session_state["loading_complete"]:
|
248 |
+
st.success("Document successfully loaded! You can now start asking questions.")
|
249 |
|
250 |
# Display previous chat messages
|
251 |
if st.session_state.messages:
|