Spaces:
Runtime error
Runtime error
Update 2app.py
Browse files
2app.py
CHANGED
@@ -7,15 +7,12 @@ from bs4 import BeautifulSoup
|
|
7 |
from selenium import webdriver
|
8 |
from selenium.webdriver.chrome.options import Options
|
9 |
from PIL import Image
|
10 |
-
from datetime import datetime
|
11 |
import io
|
12 |
import zipfile
|
13 |
import os
|
14 |
-
import datetime
|
15 |
-
from urllib.parse import urlparse
|
16 |
import tempfile
|
17 |
import nltk
|
18 |
-
|
19 |
try:
|
20 |
nltk.download('punkt')
|
21 |
nltk.download('stopwords')
|
@@ -26,7 +23,6 @@ except Exception as e:
|
|
26 |
|
27 |
# Configure logging
|
28 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
29 |
-
|
30 |
def sanitize_filename(filename):
|
31 |
return re.sub(r'[<>:"/\\|?*\n]+', '_', filename)
|
32 |
|
@@ -67,7 +63,7 @@ def compare_screenshot(old_screenshot, new_screenshot):
|
|
67 |
|
68 |
def alert_changes(url, change_type):
|
69 |
"""Log detected changes."""
|
70 |
-
timestamp = datetime.
|
71 |
logging.warning(f"[{timestamp}] Changes detected at {url}: {change_type}")
|
72 |
return f"[{timestamp}] {change_type}"
|
73 |
|
@@ -209,7 +205,7 @@ def process_urls(url_input, bulk_toggle, action_radio, max_urls, crawl_depth, mo
|
|
209 |
scraped_data.append({
|
210 |
'url': url,
|
211 |
'content': latest_html, # Include full HTML content
|
212 |
-
'timestamp': datetime.
|
213 |
'changes_detected': changes_log
|
214 |
})
|
215 |
|
|
|
7 |
from selenium import webdriver
|
8 |
from selenium.webdriver.chrome.options import Options
|
9 |
from PIL import Image
|
10 |
+
from datetime import datetime # Keep this line
|
11 |
import io
|
12 |
import zipfile
|
13 |
import os
|
|
|
|
|
14 |
import tempfile
|
15 |
import nltk
|
|
|
16 |
try:
|
17 |
nltk.download('punkt')
|
18 |
nltk.download('stopwords')
|
|
|
23 |
|
24 |
# Configure logging
|
25 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
26 |
def sanitize_filename(filename):
|
27 |
return re.sub(r'[<>:"/\\|?*\n]+', '_', filename)
|
28 |
|
|
|
63 |
|
64 |
def alert_changes(url, change_type):
|
65 |
"""Log detected changes."""
|
66 |
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
67 |
logging.warning(f"[{timestamp}] Changes detected at {url}: {change_type}")
|
68 |
return f"[{timestamp}] {change_type}"
|
69 |
|
|
|
205 |
scraped_data.append({
|
206 |
'url': url,
|
207 |
'content': latest_html, # Include full HTML content
|
208 |
+
'timestamp': datetime.now().isoformat(),
|
209 |
'changes_detected': changes_log
|
210 |
})
|
211 |
|