Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -141,7 +141,11 @@ def process_urls(url_input, bulk_toggle, action_radio, max_urls, crawl_depth):
|
|
141 |
screenshot_image = take_screenshot(url) # Capture screenshot
|
142 |
if screenshot_image is not None:
|
143 |
logging.info(f"Successfully captured screenshot of: {url}")
|
144 |
-
|
|
|
|
|
|
|
|
|
145 |
except Exception as e:
|
146 |
logging.error(f"Error capturing screenshot for {url}: {str(e)}")
|
147 |
|
@@ -154,6 +158,17 @@ def process_urls(url_input, bulk_toggle, action_radio, max_urls, crawl_depth):
|
|
154 |
logging.error(f"Unexpected error: {str(e)}")
|
155 |
return json.dumps([], indent=2), None
|
156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
# Global variables for monitoring
|
158 |
stop_event = threading.Event()
|
159 |
monitor_thread = None
|
|
|
141 |
screenshot_image = take_screenshot(url) # Capture screenshot
|
142 |
if screenshot_image is not None:
|
143 |
logging.info(f"Successfully captured screenshot of: {url}")
|
144 |
+
# Preprocess the image and get CLIP embedding
|
145 |
+
inputs = processor(images=screenshot_image, return_tensors="pt")
|
146 |
+
with clip_model_lock:
|
147 |
+
embedding = model.get_image_embeddings(inputs['pixel_values'])
|
148 |
+
embeddings[url] = embedding.squeeze() # Store the embedding tensor
|
149 |
except Exception as e:
|
150 |
logging.error(f"Error capturing screenshot for {url}: {str(e)}")
|
151 |
|
|
|
158 |
logging.error(f"Unexpected error: {str(e)}")
|
159 |
return json.dumps([], indent=2), None
|
160 |
|
161 |
+
# Function to save the CLIP embedding to a JSON file
|
162 |
+
def save_embedding(embedding, url):
|
163 |
+
date_dir = datetime.datetime.now().strftime("%Y%m%d")
|
164 |
+
embedding_dir = os.path.join('embeddings', date_dir)
|
165 |
+
if not os.path.exists(embedding_dir):
|
166 |
+
os.makedirs(embedding_dir)
|
167 |
+
filename = os.path.join(embedding_dir, f'embedding_{datetime.datetime.now().strftime("%Y%m%d%H%M%S")}_{uuid.uuid4().hex}.json')
|
168 |
+
with open(filename, 'w') as f:
|
169 |
+
json.dump({url: embedding.tolist()}, f)
|
170 |
+
return filename
|
171 |
+
|
172 |
# Global variables for monitoring
|
173 |
stop_event = threading.Event()
|
174 |
monitor_thread = None
|