webspider

Running

App Files Files Community

bsenst commited on Dec 30, 2024

Commit

9b6b215

verified ·

1 Parent(s): 78ad8e5

remove variable handling bug (#1)

Browse files

- remove variable handling bug (90127a481dc38e08133155d2f20c48b87c18751f)

Files changed (1) hide show

app.py +18 -17

app.py CHANGED Viewed

@@ -34,45 +34,46 @@ def run_scraping(url, depth_limit, pagecount_limit):
     return output_filename
 # Streamlit interface
-st.title("Scraping")
 col1, col2 = st.columns(2)
 with col1:
     depth_limit = st.slider("Depth Limit", min_value=1, value=2, max_value=5, step=1)
 with col2:
-    pagecount_limit = st.slider(
-        "Page Count", min_value=10, value=10, max_value=50, step=10
-    )
 url = st.text_input("Enter URL", value="https://bsenst.github.io/toscrape/app-website/")
 if st.button("Run Scraping"):
     if check_scraping_status() == "Scraping running":
         st.warning("Scraping in progress...")
     else:
-        run_scraping(url)
 if st.button("Status Scraping"):
-    # Check if output.json exists and load data
-    output_file = 'output.json'
     if check_scraping_status() == "Scraping running":
-        st.warning("Scraping is running")
     elif os.path.exists(output_file):
         try:
             with open(output_file, "r") as f:
                 scraped_data = json.load(f)
                 page_count = len(scraped_data)
-            # Show download button if output.json exists
-            st.download_button("Download Scraping Output", data=json.dumps(scraped_data), file_name=output_file)
-            # Display no of pages scraped results
             st.write(f"{page_count} pages scraped:")
             # Display scraping results
-            st.write([(el["url"],el["title"]) for el in scraped_data])
         except Exception as e:
-            st.warning(f"Error with opening the output.json {e}")
     else:
-        st.warning("No output file found. Please run the scraping command.")

     return output_filename
 # Streamlit interface
+st.title("Scraping Tool with URL-based Output File")
 col1, col2 = st.columns(2)
 with col1:
     depth_limit = st.slider("Depth Limit", min_value=1, value=2, max_value=5, step=1)
 with col2:
+    pagecount_limit = st.slider("Page Count Limit", min_value=10, value=10, max_value=50, step=10)
 url = st.text_input("Enter URL", value="https://bsenst.github.io/toscrape/app-website/")
 if st.button("Run Scraping"):
     if check_scraping_status() == "Scraping running":
         st.warning("Scraping in progress...")
     else:
+        output_filename = run_scraping(url, depth_limit, pagecount_limit)
 if st.button("Status Scraping"):
+    identifier = clean_string_for_filename(url)
+    output_file = f"output_{identifier}.json"
     if check_scraping_status() == "Scraping running":
+        st.warning("Scraping is running.")
     elif os.path.exists(output_file):
         try:
             with open(output_file, "r") as f:
                 scraped_data = json.load(f)
                 page_count = len(scraped_data)
+            # Show download button if output file exists
+            st.download_button(
+                "Download Scraping Output",
+                data=json.dumps(scraped_data),
+                file_name=output_file,
+            )
+            # Display number of pages scraped
             st.write(f"{page_count} pages scraped:")
             # Display scraping results
+            st.write([(el["url"], el["title"]) for el in scraped_data])
         except Exception as e:
+            st.warning(f"Error with opening {output_file}: {e}")
     else:
+        st.warning("No output file found. Please run the scraping command.")