Spaces:

bgaspra
/

Rec_Sys_Flo2

Running

App Files Files Community

bgaspra commited on Nov 6, 2024

Commit

8fa2606

verified ·

1 Parent(s): ec29692

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -9

app.py CHANGED Viewed

@@ -14,7 +14,6 @@ model_name = "microsoft/Florence-2-base"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-# Modify model loading to disable flash attention
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch_dtype,
@@ -22,21 +21,27 @@ model = AutoModelForCausalLM.from_pretrained(
 ).to(device)
 processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
-# Load CivitAI dataset (limited to 1000 samples)
 print("Loading dataset...")
 dataset = load_dataset("thefcraft/civitai-stable-diffusion-337k", split="train[:1000]")
 df = pd.DataFrame(dataset)
 print("Dataset loaded successfully!")
-# Create cache for embeddings to improve performance
 text_embedding_cache = {}
 def get_image_embedding(image):
     try:
-        inputs = processor(images=image, return_tensors="pt").to(device, torch_dtype)
         with torch.no_grad():
-            outputs = model.get_image_features(**inputs)
-        return outputs.cpu().numpy()
     except Exception as e:
         print(f"Error in get_image_embedding: {str(e)}")
         return None
@@ -46,11 +51,17 @@ def get_text_embedding(text):
         if text in text_embedding_cache:
             return text_embedding_cache[text]
-        inputs = processor(text=text, return_tensors="pt").to(device, torch_dtype)
         with torch.no_grad():
-            outputs = model.get_text_features(**inputs)
-        embedding = outputs.cpu().numpy()
         text_embedding_cache[text] = embedding
         return embedding
     except Exception as e:

 device = "cuda" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch_dtype,
 ).to(device)
 processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
+# Load CivitAI dataset
 print("Loading dataset...")
 dataset = load_dataset("thefcraft/civitai-stable-diffusion-337k", split="train[:1000]")
 df = pd.DataFrame(dataset)
 print("Dataset loaded successfully!")
 text_embedding_cache = {}
 def get_image_embedding(image):
     try:
+        inputs = processor(
+            images=image,
+            text=[""], # Florence-2 requires both image and text inputs
+            return_tensors="pt"
+        ).to(device, torch_dtype)
         with torch.no_grad():
+            outputs = model(**inputs)
+            # Get the image embeddings from the last hidden states
+            image_embeddings = outputs.last_hidden_state[:, 0, :]  # Take CLS token
+        return image_embeddings.cpu().numpy()
     except Exception as e:
         print(f"Error in get_image_embedding: {str(e)}")
         return None
         if text in text_embedding_cache:
             return text_embedding_cache[text]
+        inputs = processor(
+            text=text,
+            images=None,
+            return_tensors="pt"
+        ).to(device, torch_dtype)
         with torch.no_grad():
+            outputs = model(**inputs)
+            text_embeddings = outputs.last_hidden_state[:, 0, :]  # Take CLS token
+        embedding = text_embeddings.cpu().numpy()
         text_embedding_cache[text] = embedding
         return embedding
     except Exception as e: