SoMeScreenShotter

Runtime error

App Files Files Community

acecalisto3 commited on Jan 13

Commit

59bd798

verified ·

1 Parent(s): 4e194df

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -60

app.py CHANGED Viewed

@@ -136,7 +136,7 @@ def crawl_url(url, depth, max_depth, visited=None):
     return screenshots
-def process_urls(url_input, bulk_toggle, action_radio, max_urls, crawl_depth, progress=gr.Progress()):
     """Process URLs with crawl depth and change detection."""
     # Validate URLs first
     urls = re.split(r'[,\n]+', url_input.strip()) if bulk_toggle else [url_input]
@@ -146,7 +146,10 @@ def process_urls(url_input, bulk_toggle, action_radio, max_urls, crawl_depth, pr
     # Validate all URLs
     invalid_urls = [url for url in urls if not validate_url(url)]
     if invalid_urls:
-        return None, json.dumps({"error": f"Invalid URLs detected: {', '.join(invalid_urls)}"}, indent=2)
     scraped_data = []
     screenshots = []
@@ -213,72 +216,53 @@ def process_urls(url_input, bulk_toggle, action_radio, max_urls, crawl_depth, pr
         # Update progress
         progress((idx + 1) / total_urls)
-    # Create a temporary file to store the ZIP
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp_file:
-        with zipfile.ZipFile(tmp_file, 'w', zipfile.ZIP_DEFLATED) as zipf:
-            # Add screenshots to ZIP
-            for screenshot_url, screenshot_data in screenshots:
-                sanitized_screenshot_url = sanitize_filename(screenshot_url)
-                filename = f"{sanitized_screenshot_url}.png"
-                zipf.writestr(filename, screenshot_data)
-            # Add scraped data and changes log to ZIP
-            if scraped_data:
-                data_to_save = {
-                    'scraped_data': scraped_data,
-                    'changes_log': changes_log,
-                    'timestamp': datetime.datetime.now().isoformat()
-                }
-                zipf.writestr('data.json', json.dumps(data_to_save, indent=2))
-        # Get the path to the temporary file
-        zip_file_path = tmp_file.name
-    # Prepare display data
-    display_data = {
-        'total_scraped_urls': len(scraped_data),
-        'total_screenshots_taken': len(screenshots),
-        'changes_detected': changes_log,
-        'scraped_data': scraped_data  # Include full scraped data
-    }
-    # Return the path to the temporary ZIP file and display data
-    return zip_file_path, json.dumps(display_data, indent=2)
-from smolagents import tool
-@tool
 def recognize_intent(instruction: str) -> str:
-    """
-    Recognizes the intent from the user's instruction.
-    Args:
-        instruction: The input instruction from the user.
-    Returns:
-        The recognized intent as a string.
-    """
     instruction = instruction.lower()
     if "scrape all links" in instruction:
         return "scrape_links"
     elif "extract all images" in instruction:
         return "extract_images"
     else:
         return "unknown"
-@tool
-def generate_command(intent: str, url_input: str, bulk_toggle: bool, max_urls: int) -> str:
-    """
-    Generates a command based on the recognized intent.
-    Args:
-        intent: The recognized intent from the user input.
-        url_input: The input URL(s) from the user.
-        bulk_toggle: Indicates if multiple URLs are being processed.
-        max_urls: The maximum number of URLs to process.
-    Returns:
-        The result of the command execution.
-    """
     urls = re.split(r'[,\n]+', url_input.strip()) if bulk_toggle else [url_input]
     urls = [url.strip() for url in urls if url.strip()]
     urls = urls[:max_urls]
@@ -299,15 +283,18 @@ def generate_command(intent: str, url_input: str, bulk_toggle: bool, max_urls: i
             all_images.extend(images)
         return f"Extracted images: {', '.join(all_images)}"
     return "Instruction not recognized. Please try again."
-def chat_based_scrape(instruction, url_input, bulk_toggle, max_urls, crawl_depth):
-    """Handle chat-based instructions for scraping."""
     # Recognize intent
     intent = recognize_intent(instruction)
     # Generate command based on the recognized intent
-    command_output = generate_command(intent, url_input, bulk_toggle, max_urls)
     return command_output
@@ -393,6 +380,13 @@ def create_interface():
                     step=1,
                     label="Max URLs to process"
                 )
                 chat_output = gr.Textbox(label="Chat Output")
                 chat_button = gr.Button("Submit Instruction", variant="primary")
@@ -403,7 +397,9 @@ def create_interface():
                         chat_instruction,
                         chat_url_input,
                         chat_bulk_toggle,
-                        chat_max_urls
                     ],
                     outputs=chat_output
                 )
@@ -420,6 +416,7 @@ def create_interface():
         )
     return demo
 if __name__ == "__main__":
     demo = create_interface()  # Call the function to create the interface
     demo.launch()  # Launch the Gradio app

     return screenshots
+def process_urls(url_input, bulk_toggle, action_radio, max_urls, crawl_depth, mode='standard', progress=gr.Progress()):
     """Process URLs with crawl depth and change detection."""
     # Validate URLs first
     urls = re.split(r'[,\n]+', url_input.strip()) if bulk_toggle else [url_input]
     # Validate all URLs
     invalid_urls = [url for url in urls if not validate_url(url)]
     if invalid_urls:
+        if mode == 'chat':
+            return f"Invalid URLs detected: {', '.join(invalid_urls)}"
+        else:
+            return None, json.dumps({"error": f"Invalid URLs detected: {', '.join(invalid_urls)}"}, indent=2)
     scraped_data = []
     screenshots = []
         # Update progress
         progress((idx + 1) / total_urls)
+    if mode == 'chat':
+        return "\n".join(changes_log)
+    else:
+        # Create a temporary file to store the ZIP
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp_file:
+            with zipfile.ZipFile(tmp_file, 'w', zipfile.ZIP_DEFLATED) as zipf:
+                # Add screenshots to ZIP
+                for screenshot_url, screenshot_data in screenshots:
+                    sanitized_screenshot_url = sanitize_filename(screenshot_url)
+                    filename = f"{sanitized_screenshot_url}.png"
+                    zipf.writestr(filename, screenshot_data)
+                # Add scraped data and changes log to ZIP
+                if scraped_data:
+                    data_to_save = {
+                        'scraped_data': scraped_data,
+                        'changes_log': changes_log,
+                        'timestamp': datetime.datetime.now().isoformat()
+                    }
+                    zipf.writestr('data.json', json.dumps(data_to_save, indent=2))
+            # Get the path to the temporary file
+            zip_file_path = tmp_file.name
+        # Prepare display data
+        display_data = {
+            'total_scraped_urls': len(scraped_data),
+            'total_screenshots_taken': len(screenshots),
+            'changes_detected': changes_log,
+            'scraped_data': scraped_data  # Include full scraped data
+        }
+        # Return the path to the temporary ZIP file and display data
+        return zip_file_path, json.dumps(display_data, indent=2)
 def recognize_intent(instruction: str) -> str:
     instruction = instruction.lower()
     if "scrape all links" in instruction:
         return "scrape_links"
     elif "extract all images" in instruction:
         return "extract_images"
+    elif "monitor changes" in instruction:
+        return "monitor_changes"
     else:
         return "unknown"
+def generate_command(intent: str, url_input: str, bulk_toggle: bool, max_urls: int, crawl_depth: int, session_id: str) -> str:
     urls = re.split(r'[,\n]+', url_input.strip()) if bulk_toggle else [url_input]
     urls = [url.strip() for url in urls if url.strip()]
     urls = urls[:max_urls]
             all_images.extend(images)
         return f"Extracted images: {', '.join(all_images)}"
+    elif intent == "monitor_changes":
+        changes_log = process_urls(url_input, bulk_toggle, "Scrape data", max_urls, crawl_depth, mode='chat')
+        return changes_log
     return "Instruction not recognized. Please try again."
+def chat_based_scrape(instruction, url_input, bulk_toggle, max_urls, crawl_depth, session_id):
     # Recognize intent
     intent = recognize_intent(instruction)
     # Generate command based on the recognized intent
+    command_output = generate_command(intent, url_input, bulk_toggle, max_urls, crawl_depth, session_id)
     return command_output
                     step=1,
                     label="Max URLs to process"
                 )
+                chat_crawl_depth = gr.Slider(
+                    minimum=1,
+                    maximum=3,
+                    value=1,
+                    step=1,
+                    label="Crawl Depth"
+                )
                 chat_output = gr.Textbox(label="Chat Output")
                 chat_button = gr.Button("Submit Instruction", variant="primary")
                         chat_instruction,
                         chat_url_input,
                         chat_bulk_toggle,
+                        chat_max_urls,
+                        chat_crawl_depth,
+                        gr.Session
                     ],
                     outputs=chat_output
                 )
         )
     return demo
 if __name__ == "__main__":
     demo = create_interface()  # Call the function to create the interface
     demo.launch()  # Launch the Gradio app