Spaces:
Running
Running
remove variable handling bug (#1)
Browse files- remove variable handling bug (90127a481dc38e08133155d2f20c48b87c18751f)
app.py
CHANGED
@@ -34,45 +34,46 @@ def run_scraping(url, depth_limit, pagecount_limit):
|
|
34 |
return output_filename
|
35 |
|
36 |
# Streamlit interface
|
37 |
-
st.title("Scraping")
|
38 |
col1, col2 = st.columns(2)
|
39 |
|
40 |
with col1:
|
41 |
depth_limit = st.slider("Depth Limit", min_value=1, value=2, max_value=5, step=1)
|
42 |
with col2:
|
43 |
-
pagecount_limit = st.slider(
|
44 |
-
|
45 |
-
)
|
46 |
url = st.text_input("Enter URL", value="https://bsenst.github.io/toscrape/app-website/")
|
47 |
|
48 |
if st.button("Run Scraping"):
|
49 |
-
|
50 |
if check_scraping_status() == "Scraping running":
|
51 |
st.warning("Scraping in progress...")
|
52 |
else:
|
53 |
-
run_scraping(url)
|
54 |
|
55 |
if st.button("Status Scraping"):
|
56 |
-
|
57 |
-
|
58 |
-
output_file = 'output.json'
|
59 |
|
60 |
if check_scraping_status() == "Scraping running":
|
61 |
-
st.warning("Scraping is running")
|
62 |
elif os.path.exists(output_file):
|
63 |
try:
|
64 |
with open(output_file, "r") as f:
|
65 |
scraped_data = json.load(f)
|
66 |
page_count = len(scraped_data)
|
67 |
|
68 |
-
# Show download button if output
|
69 |
-
st.download_button(
|
70 |
-
|
|
|
|
|
|
|
|
|
71 |
st.write(f"{page_count} pages scraped:")
|
72 |
# Display scraping results
|
73 |
-
st.write([(el["url"],el["title"]) for el in scraped_data])
|
74 |
-
|
75 |
except Exception as e:
|
76 |
-
st.warning(f"Error with opening
|
77 |
else:
|
78 |
-
st.warning("No output file found. Please run the scraping command.")
|
|
|
34 |
return output_filename
|
35 |
|
36 |
# Streamlit interface
|
37 |
+
st.title("Scraping Tool with URL-based Output File")
|
38 |
col1, col2 = st.columns(2)
|
39 |
|
40 |
with col1:
|
41 |
depth_limit = st.slider("Depth Limit", min_value=1, value=2, max_value=5, step=1)
|
42 |
with col2:
|
43 |
+
pagecount_limit = st.slider("Page Count Limit", min_value=10, value=10, max_value=50, step=10)
|
44 |
+
|
|
|
45 |
url = st.text_input("Enter URL", value="https://bsenst.github.io/toscrape/app-website/")
|
46 |
|
47 |
if st.button("Run Scraping"):
|
|
|
48 |
if check_scraping_status() == "Scraping running":
|
49 |
st.warning("Scraping in progress...")
|
50 |
else:
|
51 |
+
output_filename = run_scraping(url, depth_limit, pagecount_limit)
|
52 |
|
53 |
if st.button("Status Scraping"):
|
54 |
+
identifier = clean_string_for_filename(url)
|
55 |
+
output_file = f"output_{identifier}.json"
|
|
|
56 |
|
57 |
if check_scraping_status() == "Scraping running":
|
58 |
+
st.warning("Scraping is running.")
|
59 |
elif os.path.exists(output_file):
|
60 |
try:
|
61 |
with open(output_file, "r") as f:
|
62 |
scraped_data = json.load(f)
|
63 |
page_count = len(scraped_data)
|
64 |
|
65 |
+
# Show download button if output file exists
|
66 |
+
st.download_button(
|
67 |
+
"Download Scraping Output",
|
68 |
+
data=json.dumps(scraped_data),
|
69 |
+
file_name=output_file,
|
70 |
+
)
|
71 |
+
# Display number of pages scraped
|
72 |
st.write(f"{page_count} pages scraped:")
|
73 |
# Display scraping results
|
74 |
+
st.write([(el["url"], el["title"]) for el in scraped_data])
|
75 |
+
|
76 |
except Exception as e:
|
77 |
+
st.warning(f"Error with opening {output_file}: {e}")
|
78 |
else:
|
79 |
+
st.warning("No output file found. Please run the scraping command.")
|