Spaces:

Arxived
/

chat-w-g-patents

Sleeping

App Files Files Community

DrishtiSharma commited on Dec 21, 2024

Commit

60297bb

verified ·

1 Parent(s): 03e92fa

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -7

app.py CHANGED Viewed

@@ -97,7 +97,7 @@ def load_docs(document_path):
         # Combine all pages into one text
         full_text = "\n".join(extracted_text)
-        st.write(f"\ud83d\udd8d Total Cleaned Text Length: {len(full_text)} characters")
         # Step 2: Chunk the cleaned text
         text_splitter = RecursiveCharacterTextSplitter(
@@ -107,7 +107,7 @@ def load_docs(document_path):
         )
         split_docs = text_splitter.create_documents([full_text])
-        st.write(f"\ud83d\udd0d Total Chunks After Splitting: {len(split_docs)}")
         for i, doc in enumerate(split_docs[:3]):  # Show first 3 chunks only
             st.write(f"Chunk {i + 1}: {doc.page_content[:300]}...")
@@ -175,7 +175,7 @@ if __name__ == "__main__":
         layout="wide",
         initial_sidebar_state="expanded",
     )
-    st.header("\ud83d\udd8a\ufe0f Patent Chat: Google Patents Chat Demo")
     # Input for Google Patent Link
     patent_link = st.text_area(
@@ -206,7 +206,7 @@ if __name__ == "__main__":
         # File handling
         pdf_path = os.path.join(tempfile.gettempdir(), f"{patent_number}.pdf")
         if not os.path.isfile(pdf_path):
-            with st.spinner("\ud83d\udd10 Downloading patent file..."):
                 try:
                     pdf_path = download_pdf(patent_number)
                     st.write(f"\u2705 File downloaded: {pdf_path}")
@@ -218,7 +218,7 @@ if __name__ == "__main__":
         # Generate PDF preview only if not already displayed
         if not st.session_state.get("pdf_preview_displayed", False):
-            with st.spinner("\ud83d\uddbc\ufe0f Generating PDF preview..."):
                 preview_image_path = preview_pdf(pdf_path, scale_factor=0.5)
                 if preview_image_path:
                     st.session_state.pdf_preview = preview_image_path
@@ -230,7 +230,7 @@ if __name__ == "__main__":
         # Load the document into the system
         st.session_state["loading_complete"] = False
-        with st.spinner("\ud83d\udd04 Loading document into the system..."):
             try:
                 st.session_state.chain = setup_retrieval_pipeline(
                     pdf_path, PERSISTED_DIRECTORY, OPENAI_API_KEY
@@ -245,7 +245,7 @@ if __name__ == "__main__":
                 st.stop()
         if st.session_state["loading_complete"]:
-            st.success("\ud83d\ude80 Document successfully loaded! You can now start asking questions.")
     # Display previous chat messages
     if st.session_state.messages:

         # Combine all pages into one text
         full_text = "\n".join(extracted_text)
+        st.write(f"Total Cleaned Text Length: {len(full_text)} characters")
         # Step 2: Chunk the cleaned text
         text_splitter = RecursiveCharacterTextSplitter(
         )
         split_docs = text_splitter.create_documents([full_text])
+        st.write(f"Total Chunks After Splitting: {len(split_docs)}")
         for i, doc in enumerate(split_docs[:3]):  # Show first 3 chunks only
             st.write(f"Chunk {i + 1}: {doc.page_content[:300]}...")
         layout="wide",
         initial_sidebar_state="expanded",
     )
+    st.header("Patent Chat: Google Patents Chat Demo")
     # Input for Google Patent Link
     patent_link = st.text_area(
         # File handling
         pdf_path = os.path.join(tempfile.gettempdir(), f"{patent_number}.pdf")
         if not os.path.isfile(pdf_path):
+            with st.spinner("Downloading patent file..."):
                 try:
                     pdf_path = download_pdf(patent_number)
                     st.write(f"\u2705 File downloaded: {pdf_path}")
         # Generate PDF preview only if not already displayed
         if not st.session_state.get("pdf_preview_displayed", False):
+            with st.spinner("Generating PDF preview..."):
                 preview_image_path = preview_pdf(pdf_path, scale_factor=0.5)
                 if preview_image_path:
                     st.session_state.pdf_preview = preview_image_path
         # Load the document into the system
         st.session_state["loading_complete"] = False
+        with st.spinner("Loading document into the system..."):
             try:
                 st.session_state.chain = setup_retrieval_pipeline(
                     pdf_path, PERSISTED_DIRECTORY, OPENAI_API_KEY
                 st.stop()
         if st.session_state["loading_complete"]:
+            st.success("Document successfully loaded! You can now start asking questions.")
     # Display previous chat messages
     if st.session_state.messages: