IbrahimHasani commited on
Commit
a0727b2
1 Parent(s): 87045f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -13
app.py CHANGED
@@ -1,25 +1,30 @@
1
  from PIL import Image
2
  from transformers import CLIPProcessor, CLIPModel
3
  import gradio as gr
 
4
 
5
  # Initialize CLIP model and processor
6
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
7
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
8
 
9
  def image_similarity(image: Image.Image, positive_prompt: str, negative_prompts: str):
10
-
 
 
 
 
 
 
 
11
  # Split the negative prompts string into a list of prompts
12
  negative_prompts_list = negative_prompts.split(";")
13
-
14
  # Combine positive and negative prompts into one list
15
  prompts = [positive_prompt.strip()] + [np.strip() for np in negative_prompts_list]
16
 
17
- # ... rest of the code
18
-
19
-
20
  inputs = processor(
21
  text=prompts,
22
- images=image,
23
  return_tensors="pt",
24
  padding=True
25
  )
@@ -45,10 +50,4 @@ interface = gr.Interface(
45
  gr.components.Textbox(label="Probability for Positive Prompt")
46
  ],
47
  title="Engagify's Image Action Detection",
48
- description="[Author: Ibrahim Hasani] This Method uses CLIP-VIT [Version: BASE-PATCH-16] to determine if an action is being performed in an image or not. (Binary Classifier). It contrasts an Action against multiple negative labels. Ensure the prompts accurately describe the desired detection.",
49
- live=False,
50
- theme=gr.themes.Monochrome(),
51
-
52
- )
53
-
54
- interface.launch()
 
1
  from PIL import Image
2
  from transformers import CLIPProcessor, CLIPModel
3
  import gradio as gr
4
+ import torchvision.transforms as transforms
5
 
6
  # Initialize CLIP model and processor
7
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
8
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
9
 
10
  def image_similarity(image: Image.Image, positive_prompt: str, negative_prompts: str):
11
+ # Convert the PIL Image to a tensor and preprocess
12
+ transform = transforms.Compose([
13
+ transforms.Resize((224, 224)),
14
+ transforms.ToTensor(),
15
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
16
+ ])
17
+ image_tensor = transform(image).unsqueeze(0) # Add batch dimension
18
+
19
  # Split the negative prompts string into a list of prompts
20
  negative_prompts_list = negative_prompts.split(";")
 
21
  # Combine positive and negative prompts into one list
22
  prompts = [positive_prompt.strip()] + [np.strip() for np in negative_prompts_list]
23
 
24
+ # Process prompts and image tensor
 
 
25
  inputs = processor(
26
  text=prompts,
27
+ images=image_tensor,
28
  return_tensors="pt",
29
  padding=True
30
  )
 
50
  gr.components.Textbox(label="Probability for Positive Prompt")
51
  ],
52
  title="Engagify's Image Action Detection",
53
+ description="[Author: Ibrahim Hasani] This Method uses CLIP-VIT [Version: BASE-PATCH-16] to determine if an action is being performed in an image or not. (Binary Classifier). It contrasts an Action against multiple negative labels. Ensure the prompts accurately describe the desired