SoMeScreenShotter

Runtime error

App Files Files Community

acecalisto3 commited on Jan 13

Commit

52c508e

verified ·

1 Parent(s): cc5a0ec

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -56

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import requests
 import re
 import logging
 import json
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 from PIL import Image
@@ -11,8 +12,6 @@ import zipfile
 import os
 import datetime
 from urllib.parse import urlparse
-from bs4 import BeautifulSoup
-import tempfile
 # Configure logging
 logging.basicConfig(level=logging.INFO,
@@ -253,16 +252,23 @@ def process_urls(url_input, bulk_toggle, action_radio, max_urls, crawl_depth, mo
 def recognize_intent(instruction: str) -> str:
     instruction = instruction.lower()
-    if re.search(r'\bscrape\s+all\s+links\b', instruction):
         return "scrape_links"
-    elif re.search(r'\bextract\s+all\s+images\b', instruction):
-        return "extract_images"
-    elif re.search(r'\bmonitor\s+changes\b', instruction):
         return "monitor_changes"
     else:
         return "unknown"
-def generate_command(intent: str, url_input: str, bulk_toggle: bool, max_urls: int, crawl_depth: int, session_id: str) -> str:
     urls = re.split(r'[,\n]+', url_input.strip()) if bulk_toggle else [url_input]
     urls = [url.strip() for url in urls if url.strip()]
     urls = urls[:max_urls]
@@ -274,14 +280,14 @@ def generate_command(intent: str, url_input: str, bulk_toggle: bool, max_urls: i
             all_links.extend(links)
         return f"Extracted links: {', '.join(all_links)}"
-    elif intent == "extract_images":
-        all_images = []
         for url in urls:
             response = requests.get(url, timeout=10)
             soup = BeautifulSoup(response.text, 'html.parser')
-            images = [img['src'] for img in soup.find_all('img', src=True)]
-            all_images.extend(images)
-        return f"Extracted images: {', '.join(all_images)}"
     elif intent == "monitor_changes":
         changes_log = process_urls(url_input, bulk_toggle, "Scrape data", max_urls, crawl_depth, mode='chat')
@@ -289,14 +295,17 @@ def generate_command(intent: str, url_input: str, bulk_toggle: bool, max_urls: i
     return "Instruction not recognized. Please try again."
-def chat_based_scrape(instruction, url_input, bulk_toggle, max_urls, crawl_depth, session_id):
     # Recognize intent
     intent = recognize_intent(instruction)
     # Generate command based on the recognized intent
-    command_output = generate_command(intent, url_input, bulk_toggle, max_urls, crawl_depth, session_id)
     return command_output
 def create_interface():
     """Create the Gradio interface."""
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
@@ -388,9 +397,6 @@ def create_interface():
                 )
                 chat_output = gr.Textbox(label="Chat Output")
-                # Initialize session state
-                session_state = gr.State({})
                 chat_button = gr.Button("Submit Instruction", variant="primary")
                 chat_button.click(
@@ -418,44 +424,6 @@ def create_interface():
     return demo
-def chat_based_scrape(instruction, url_input, bulk_toggle, max_urls, crawl_depth):
-    print(f"Received instruction: {instruction}")
-    # Recognize intent
-    intent = recognize_intent(instruction)
-    print(f"Recognized intent: {intent}")
-    # Generate command based on the recognized intent
-    command_output = generate_command(intent, url_input, bulk_toggle, max_urls, crawl_depth)
-    return command_output
-def generate_command(intent: str, url_input: str, bulk_toggle: bool, max_urls: int, crawl_depth: int) -> str:
-    urls = re.split(r'[,\n]+', url_input.strip()) if bulk_toggle else [url_input]
-    urls = [url.strip() for url in urls if url.strip()]
-    urls = urls[:max_urls]
-    if intent == "scrape_links":
-        all_links = []
-        for url in urls:
-            links = extract_links_from_page(url)
-            all_links.extend(links)
-        return f"Extracted links: {', '.join(all_links)}"
-    elif intent == "extract_images":
-        all_images = []
-        for url in urls:
-            response = requests.get(url, timeout=10)
-            soup = BeautifulSoup(response.text, 'html.parser')
-            images = [img['src'] for img in soup.find_all('img', src=True)]
-            all_images.extend(images)
-        return f"Extracted images: {', '.join(all_images)}"
-    elif intent == "monitor_changes":
-        changes_log = process_urls(url_input, bulk_toggle, "Scrape data", max_urls, crawl_depth, mode='chat')
-        return changes_log
-    return "Instruction not recognized. Please try again."
 if __name__ == "__main__":
     demo = create_interface()  # Call the function to create the interface
-    demo.launch()  # Launch the Gradio app app

 import re
 import logging
 import json
+from bs4 import BeautifulSoup
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 from PIL import Image
 import os
 import datetime
 from urllib.parse import urlparse
 # Configure logging
 logging.basicConfig(level=logging.INFO,
 def recognize_intent(instruction: str) -> str:
     instruction = instruction.lower()
+    # Patterns for counting images
+    if re.search(r'\b(count the images|how many images|total images|image count)', instruction):
+        return "count_images"
+    # Patterns for listing links
+    elif re.search(r'\b(list all links|find hyperlinks|show me urls|extract links)', instruction):
         return "scrape_links"
+    # Patterns for monitoring changes
+    elif re.search(r'\b(monitor changes|watch for updates|detect changes|track updates)', instruction):
         return "monitor_changes"
     else:
         return "unknown"
+def generate_command(intent: str, url_input: str, bulk_toggle: bool, max_urls: int, crawl_depth: int) -> str:
     urls = re.split(r'[,\n]+', url_input.strip()) if bulk_toggle else [url_input]
     urls = [url.strip() for url in urls if url.strip()]
     urls = urls[:max_urls]
             all_links.extend(links)
         return f"Extracted links: {', '.join(all_links)}"
+    elif intent == "count_images":
+        total_images = 0
         for url in urls:
             response = requests.get(url, timeout=10)
             soup = BeautifulSoup(response.text, 'html.parser')
+            images = soup.find_all('img')
+            total_images += len(images)
+        return f"There are {total_images} images across the specified URLs."
     elif intent == "monitor_changes":
         changes_log = process_urls(url_input, bulk_toggle, "Scrape data", max_urls, crawl_depth, mode='chat')
     return "Instruction not recognized. Please try again."
+def chat_based_scrape(instruction, url_input, bulk_toggle, max_urls, crawl_depth):
+    print(f"Received instruction: {instruction}")
     # Recognize intent
     intent = recognize_intent(instruction)
+    print(f"Recognized intent: {intent}")
     # Generate command based on the recognized intent
+    command_output = generate_command(intent, url_input, bulk_toggle, max_urls, crawl_depth)
     return command_output
 def create_interface():
     """Create the Gradio interface."""
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 )
                 chat_output = gr.Textbox(label="Chat Output")
                 chat_button = gr.Button("Submit Instruction", variant="primary")
                 chat_button.click(
     return demo
 if __name__ == "__main__":
     demo = create_interface()  # Call the function to create the interface
+    demo.launch()  # Launch the Gradio app