bsenst commited on
Commit
9b6b215
·
verified ·
1 Parent(s): 78ad8e5

remove variable handling bug (#1)

Browse files

- remove variable handling bug (90127a481dc38e08133155d2f20c48b87c18751f)

Files changed (1) hide show
  1. app.py +18 -17
app.py CHANGED
@@ -34,45 +34,46 @@ def run_scraping(url, depth_limit, pagecount_limit):
34
  return output_filename
35
 
36
  # Streamlit interface
37
- st.title("Scraping")
38
  col1, col2 = st.columns(2)
39
 
40
  with col1:
41
  depth_limit = st.slider("Depth Limit", min_value=1, value=2, max_value=5, step=1)
42
  with col2:
43
- pagecount_limit = st.slider(
44
- "Page Count", min_value=10, value=10, max_value=50, step=10
45
- )
46
  url = st.text_input("Enter URL", value="https://bsenst.github.io/toscrape/app-website/")
47
 
48
  if st.button("Run Scraping"):
49
-
50
  if check_scraping_status() == "Scraping running":
51
  st.warning("Scraping in progress...")
52
  else:
53
- run_scraping(url)
54
 
55
  if st.button("Status Scraping"):
56
-
57
- # Check if output.json exists and load data
58
- output_file = 'output.json'
59
 
60
  if check_scraping_status() == "Scraping running":
61
- st.warning("Scraping is running")
62
  elif os.path.exists(output_file):
63
  try:
64
  with open(output_file, "r") as f:
65
  scraped_data = json.load(f)
66
  page_count = len(scraped_data)
67
 
68
- # Show download button if output.json exists
69
- st.download_button("Download Scraping Output", data=json.dumps(scraped_data), file_name=output_file)
70
- # Display no of pages scraped results
 
 
 
 
71
  st.write(f"{page_count} pages scraped:")
72
  # Display scraping results
73
- st.write([(el["url"],el["title"]) for el in scraped_data])
74
-
75
  except Exception as e:
76
- st.warning(f"Error with opening the output.json {e}")
77
  else:
78
- st.warning("No output file found. Please run the scraping command.")
 
34
  return output_filename
35
 
36
  # Streamlit interface
37
+ st.title("Scraping Tool with URL-based Output File")
38
  col1, col2 = st.columns(2)
39
 
40
  with col1:
41
  depth_limit = st.slider("Depth Limit", min_value=1, value=2, max_value=5, step=1)
42
  with col2:
43
+ pagecount_limit = st.slider("Page Count Limit", min_value=10, value=10, max_value=50, step=10)
44
+
 
45
  url = st.text_input("Enter URL", value="https://bsenst.github.io/toscrape/app-website/")
46
 
47
  if st.button("Run Scraping"):
 
48
  if check_scraping_status() == "Scraping running":
49
  st.warning("Scraping in progress...")
50
  else:
51
+ output_filename = run_scraping(url, depth_limit, pagecount_limit)
52
 
53
  if st.button("Status Scraping"):
54
+ identifier = clean_string_for_filename(url)
55
+ output_file = f"output_{identifier}.json"
 
56
 
57
  if check_scraping_status() == "Scraping running":
58
+ st.warning("Scraping is running.")
59
  elif os.path.exists(output_file):
60
  try:
61
  with open(output_file, "r") as f:
62
  scraped_data = json.load(f)
63
  page_count = len(scraped_data)
64
 
65
+ # Show download button if output file exists
66
+ st.download_button(
67
+ "Download Scraping Output",
68
+ data=json.dumps(scraped_data),
69
+ file_name=output_file,
70
+ )
71
+ # Display number of pages scraped
72
  st.write(f"{page_count} pages scraped:")
73
  # Display scraping results
74
+ st.write([(el["url"], el["title"]) for el in scraped_data])
75
+
76
  except Exception as e:
77
+ st.warning(f"Error with opening {output_file}: {e}")
78
  else:
79
+ st.warning("No output file found. Please run the scraping command.")