Spaces:

Makima57
/

query-app

Sleeping

App Files Files Community

Makima57 commited on Sep 25, 2024

Commit

05f7c2a

verified ·

1 Parent(s): 92a289d

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -15

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import streamlit as st
 from googlesearch import search
 import requests
 from bs4 import BeautifulSoup
-import chunk  # Import the chunking functions from chunk.py
 # Function to perform Google search and return the first two links
 def google_search(query):
@@ -29,11 +29,11 @@ def scrape_text(webpage_content):
     try:
         soup = BeautifulSoup(webpage_content, 'html.parser')
         for script in soup(["script", "style"]):
-            script.decompose()
-        text = soup.get_text()
-        lines = (line.strip() for line in text.splitlines())
-        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-        text = '\n'.join(chunk for chunk in chunks if chunk)
         return text
     except Exception as e:
         st.error(f"Failed to scrape text from webpage content: {e}")
@@ -48,23 +48,39 @@ query = st.text_input("Enter search query", "")
 # Button to trigger search
 if st.button("Search"):
     if query:
-        first_two_links = google_search(query)
         if first_two_links:
             for i, link in enumerate(first_two_links, 1):
-                st.success(f"Link {i}: [Click here]({link})")
                 # Fetch webpage content
                 webpage_content = fetch_webpage_content(link)
                 if webpage_content:
                     # Scrape text from webpage content
                     scraped_text = scrape_text(webpage_content)
-                    if scraped_text:
                         # Chunk the scraped text using chunk.py
                         chunked_text = chunk.chunk_text(scraped_text)
-                        st.write(f"Chunked Data for Link {i}:")
-                        for chunk_part in chunked_text:
-                            st.write(chunk_part)
-                        # Save and download chunked data using the function from chunk.py
-                        chunk.save_and_download_chunked_data(chunked_text, file_name=f"chunked_data_link_{i}.txt")

 from googlesearch import search
 import requests
 from bs4 import BeautifulSoup
+import chunk  # Importing the chunk module
 # Function to perform Google search and return the first two links
 def google_search(query):
     try:
         soup = BeautifulSoup(webpage_content, 'html.parser')
         for script in soup(["script", "style"]):
+            script.decompose()  # Remove unnecessary elements
+        text = soup.get_text()  # Get raw text
+        lines = (line.strip() for line in text.splitlines())  # Strip lines
+        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))  # Split and clean
+        text = '\n'.join(chunk for chunk in chunks if chunk)  # Join cleaned text
         return text
     except Exception as e:
         st.error(f"Failed to scrape text from webpage content: {e}")
 # Button to trigger search
 if st.button("Search"):
     if query:
+        first_two_links = google_search(query)  # Get first two links
         if first_two_links:
             for i, link in enumerate(first_two_links, 1):
+                st.success(f"Link {i}: [Click here]({link})")  # Display links
                 # Fetch webpage content
                 webpage_content = fetch_webpage_content(link)
                 if webpage_content:
                     # Scrape text from webpage content
                     scraped_text = scrape_text(webpage_content)
+                    if scraped_text:  # Ensure scraped_text is not empty
+                        st.write(f"Scraped Content for Link {i}:")
+                        st.text(scraped_text[:500])  # Display first 500 characters of the content
                         # Chunk the scraped text using chunk.py
                         chunked_text = chunk.chunk_text(scraped_text)
+                        if chunked_text:  # Ensure chunked_text is not empty
+                            st.write(f"Chunked Data for Link {i}:")
+                            for chunk_part in chunked_text:
+                                st.write(chunk_part)  # Display each chunk
+                            # Save and download chunked data using chunk.py
+                            chunk.save_and_download_chunked_data(chunked_text, file_name=f"chunked_data_link_{i}.txt")
+                        else:
+                            st.warning("No chunked data available")
+                    else:
+                        st.warning("No content scraped from this link")
+        else:
+            st.warning("No results found")
+    else:
+        st.error("Please enter a query")