acecalisto3 commited on
Commit
ba341f3
·
verified ·
1 Parent(s): f0c5eb5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -1
app.py CHANGED
@@ -141,7 +141,11 @@ def process_urls(url_input, bulk_toggle, action_radio, max_urls, crawl_depth):
141
  screenshot_image = take_screenshot(url) # Capture screenshot
142
  if screenshot_image is not None:
143
  logging.info(f"Successfully captured screenshot of: {url}")
144
- embeddings[url] = screenshot_image
 
 
 
 
145
  except Exception as e:
146
  logging.error(f"Error capturing screenshot for {url}: {str(e)}")
147
 
@@ -154,6 +158,17 @@ def process_urls(url_input, bulk_toggle, action_radio, max_urls, crawl_depth):
154
  logging.error(f"Unexpected error: {str(e)}")
155
  return json.dumps([], indent=2), None
156
 
 
 
 
 
 
 
 
 
 
 
 
157
  # Global variables for monitoring
158
  stop_event = threading.Event()
159
  monitor_thread = None
 
141
  screenshot_image = take_screenshot(url) # Capture screenshot
142
  if screenshot_image is not None:
143
  logging.info(f"Successfully captured screenshot of: {url}")
144
+ # Preprocess the image and get CLIP embedding
145
+ inputs = processor(images=screenshot_image, return_tensors="pt")
146
+ with clip_model_lock:
147
+ embedding = model.get_image_embeddings(inputs['pixel_values'])
148
+ embeddings[url] = embedding.squeeze() # Store the embedding tensor
149
  except Exception as e:
150
  logging.error(f"Error capturing screenshot for {url}: {str(e)}")
151
 
 
158
  logging.error(f"Unexpected error: {str(e)}")
159
  return json.dumps([], indent=2), None
160
 
161
+ # Function to save the CLIP embedding to a JSON file
162
+ def save_embedding(embedding, url):
163
+ date_dir = datetime.datetime.now().strftime("%Y%m%d")
164
+ embedding_dir = os.path.join('embeddings', date_dir)
165
+ if not os.path.exists(embedding_dir):
166
+ os.makedirs(embedding_dir)
167
+ filename = os.path.join(embedding_dir, f'embedding_{datetime.datetime.now().strftime("%Y%m%d%H%M%S")}_{uuid.uuid4().hex}.json')
168
+ with open(filename, 'w') as f:
169
+ json.dump({url: embedding.tolist()}, f)
170
+ return filename
171
+
172
  # Global variables for monitoring
173
  stop_event = threading.Event()
174
  monitor_thread = None