acecalisto3 commited on
Commit
61a1b66
·
verified ·
1 Parent(s): 70be69d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -199,9 +199,10 @@ def process_urls(url_input, bulk_toggle, action_radio, max_urls, crawl_depth, pr
199
 
200
  # Prepare output data
201
  if action_radio in ['Scrape data', 'Both']:
 
202
  scraped_data.append({
203
  'url': url,
204
- 'content': latest_html,
205
  'timestamp': datetime.datetime.now().isoformat(),
206
  'changes_detected': changes_log
207
  })
@@ -231,19 +232,20 @@ def process_urls(url_input, bulk_toggle, action_radio, max_urls, crawl_depth, pr
231
  }
232
  zipf.writestr('data.json', json.dumps(data_to_save, indent=2))
233
 
234
- # Get the path to the temporary file
235
  zip_file_path = tmp_file.name
236
 
237
  # Prepare display data
238
  display_data = {
239
  'scraped_urls': len(scraped_data),
240
  'screenshots_taken': len(screenshots),
241
- 'changes_detected': changes_log
 
242
  }
243
 
244
- # Return the path to the temporary ZIP file and display data
245
- return zip_file_path, json.dumps(display_data, indent=2)
246
-
247
  def create_interface():
248
  """Create the Gradio interface."""
249
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
@@ -289,8 +291,9 @@ def create_interface():
289
  process_button = gr.Button("Process URLs", variant="primary")
290
 
291
  with gr.Column():
292
- screenshot_zip = gr.File(label="Download Results") # Removed file_name
293
  scraped_data_output = gr.JSON(label="Results Summary")
 
294
 
295
  process_button.click(
296
  fn=process_urls,
@@ -303,7 +306,8 @@ def create_interface():
303
  ],
304
  outputs=[
305
  screenshot_zip,
306
- scraped_data_output
 
307
  ],
308
  show_progress=True
309
  )
 
199
 
200
  # Prepare output data
201
  if action_radio in ['Scrape data', 'Both']:
202
+ cleaned_content = BeautifulSoup(latest_html, 'html.parser').get_text(separator="\n").strip()
203
  scraped_data.append({
204
  'url': url,
205
+ 'content': cleaned_content,
206
  'timestamp': datetime.datetime.now().isoformat(),
207
  'changes_detected': changes_log
208
  })
 
232
  }
233
  zipf.writestr('data.json', json.dumps(data_to_save, indent=2))
234
 
235
+ # # Get the path to the temporary file
236
  zip_file_path = tmp_file.name
237
 
238
  # Prepare display data
239
  display_data = {
240
  'scraped_urls': len(scraped_data),
241
  'screenshots_taken': len(screenshots),
242
+ 'changes_detected': changes_log,
243
+ 'screenshots': [(screenshot_url, screenshot_data) for screenshot_url, screenshot_data in screenshots]
244
  }
245
 
246
+ # Return the path to the temporary ZIP file, display data, and screenshots
247
+ return zip_file_path, json.dumps(display_data, indent=2), [screenshot for _, screenshot in screenshots]
248
+
249
  def create_interface():
250
  """Create the Gradio interface."""
251
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
291
  process_button = gr.Button("Process URLs", variant="primary")
292
 
293
  with gr.Column():
294
+ screenshot_zip = gr.File(label="Download Results")
295
  scraped_data_output = gr.JSON(label="Results Summary")
296
+ screenshot_gallery = gr.Gallery(label="Screenshots", show_label=True).style(grid=2)
297
 
298
  process_button.click(
299
  fn=process_urls,
 
306
  ],
307
  outputs=[
308
  screenshot_zip,
309
+ scraped_data_output,
310
+ screenshot_gallery
311
  ],
312
  show_progress=True
313
  )