Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -199,9 +199,10 @@ def process_urls(url_input, bulk_toggle, action_radio, max_urls, crawl_depth, pr
|
|
199 |
|
200 |
# Prepare output data
|
201 |
if action_radio in ['Scrape data', 'Both']:
|
|
|
202 |
scraped_data.append({
|
203 |
'url': url,
|
204 |
-
'content':
|
205 |
'timestamp': datetime.datetime.now().isoformat(),
|
206 |
'changes_detected': changes_log
|
207 |
})
|
@@ -231,19 +232,20 @@ def process_urls(url_input, bulk_toggle, action_radio, max_urls, crawl_depth, pr
|
|
231 |
}
|
232 |
zipf.writestr('data.json', json.dumps(data_to_save, indent=2))
|
233 |
|
234 |
-
# Get the path to the temporary file
|
235 |
zip_file_path = tmp_file.name
|
236 |
|
237 |
# Prepare display data
|
238 |
display_data = {
|
239 |
'scraped_urls': len(scraped_data),
|
240 |
'screenshots_taken': len(screenshots),
|
241 |
-
'changes_detected': changes_log
|
|
|
242 |
}
|
243 |
|
244 |
-
# Return the path to the temporary ZIP file
|
245 |
-
return zip_file_path, json.dumps(display_data, indent=2)
|
246 |
-
|
247 |
def create_interface():
|
248 |
"""Create the Gradio interface."""
|
249 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
@@ -289,8 +291,9 @@ def create_interface():
|
|
289 |
process_button = gr.Button("Process URLs", variant="primary")
|
290 |
|
291 |
with gr.Column():
|
292 |
-
screenshot_zip = gr.File(label="Download Results")
|
293 |
scraped_data_output = gr.JSON(label="Results Summary")
|
|
|
294 |
|
295 |
process_button.click(
|
296 |
fn=process_urls,
|
@@ -303,7 +306,8 @@ def create_interface():
|
|
303 |
],
|
304 |
outputs=[
|
305 |
screenshot_zip,
|
306 |
-
scraped_data_output
|
|
|
307 |
],
|
308 |
show_progress=True
|
309 |
)
|
|
|
199 |
|
200 |
# Prepare output data
|
201 |
if action_radio in ['Scrape data', 'Both']:
|
202 |
+
cleaned_content = BeautifulSoup(latest_html, 'html.parser').get_text(separator="\n").strip()
|
203 |
scraped_data.append({
|
204 |
'url': url,
|
205 |
+
'content': cleaned_content,
|
206 |
'timestamp': datetime.datetime.now().isoformat(),
|
207 |
'changes_detected': changes_log
|
208 |
})
|
|
|
232 |
}
|
233 |
zipf.writestr('data.json', json.dumps(data_to_save, indent=2))
|
234 |
|
235 |
+
# # Get the path to the temporary file
|
236 |
zip_file_path = tmp_file.name
|
237 |
|
238 |
# Prepare display data
|
239 |
display_data = {
|
240 |
'scraped_urls': len(scraped_data),
|
241 |
'screenshots_taken': len(screenshots),
|
242 |
+
'changes_detected': changes_log,
|
243 |
+
'screenshots': [(screenshot_url, screenshot_data) for screenshot_url, screenshot_data in screenshots]
|
244 |
}
|
245 |
|
246 |
+
# Return the path to the temporary ZIP file, display data, and screenshots
|
247 |
+
return zip_file_path, json.dumps(display_data, indent=2), [screenshot for _, screenshot in screenshots]
|
248 |
+
|
249 |
def create_interface():
|
250 |
"""Create the Gradio interface."""
|
251 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
|
291 |
process_button = gr.Button("Process URLs", variant="primary")
|
292 |
|
293 |
with gr.Column():
|
294 |
+
screenshot_zip = gr.File(label="Download Results")
|
295 |
scraped_data_output = gr.JSON(label="Results Summary")
|
296 |
+
screenshot_gallery = gr.Gallery(label="Screenshots", show_label=True).style(grid=2)
|
297 |
|
298 |
process_button.click(
|
299 |
fn=process_urls,
|
|
|
306 |
],
|
307 |
outputs=[
|
308 |
screenshot_zip,
|
309 |
+
scraped_data_output,
|
310 |
+
screenshot_gallery
|
311 |
],
|
312 |
show_progress=True
|
313 |
)
|