taesiri commited on
Commit
d60d34b
1 Parent(s): 3601eff
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +41 -16
  3. requirements.txt +1 -2
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 💯
4
  colorFrom: red
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 3.0.24
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
4
  colorFrom: red
5
  colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 4.36.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
app.py CHANGED
@@ -1,10 +1,17 @@
 
1
  import gradio as gr
2
  from transformers import CLIPProcessor, CLIPModel
 
3
 
4
- model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
 
 
 
 
5
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
6
 
7
 
 
8
  def calculate_score(image, text):
9
  labels = text.split(";")
10
  labels = [l.strip() for l in labels]
@@ -12,8 +19,13 @@ def calculate_score(image, text):
12
  if len(labels) == 0:
13
  return dict()
14
  inputs = processor(text=labels, images=image, return_tensors="pt", padding=True)
 
 
 
15
  outputs = model(**inputs)
16
- logits_per_image = outputs.logits_per_image.detach().numpy()
 
 
17
 
18
  results_dict = {
19
  label: score / 100.0 for label, score in zip(labels, logits_per_image[0])
@@ -21,21 +33,34 @@ def calculate_score(image, text):
21
  return results_dict
22
 
23
 
24
- if __name__ == "__main__":
25
- cat_example = [
26
- "cat.jpg",
27
- "a cat stuck in a door; a cat in the air; a cat sitting; a cat standing; a cat is entering the matrix; a cat is entering the void",
28
- ]
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- demo = gr.Interface(
 
 
 
 
 
 
31
  fn=calculate_score,
32
- inputs=["image", "text"],
33
- outputs="label",
34
- examples=[cat_example],
35
- allow_flagging="never",
36
- description="# CLIP Score",
37
- article="Calculate the [CLIP](https://openai.com/blog/clip/) score of a given image and text",
38
- cache_examples=True,
39
  )
40
 
41
- demo.launch()
 
1
+ import torch
2
  import gradio as gr
3
  from transformers import CLIPProcessor, CLIPModel
4
+ import spaces
5
 
6
+
7
+ # Check if CUDA is available and set the device accordingly
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
9
+
10
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16").to(device)
11
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
12
 
13
 
14
+ @spaces.GPU
15
  def calculate_score(image, text):
16
  labels = text.split(";")
17
  labels = [l.strip() for l in labels]
 
19
  if len(labels) == 0:
20
  return dict()
21
  inputs = processor(text=labels, images=image, return_tensors="pt", padding=True)
22
+ inputs = {
23
+ k: v.to(device) for k, v in inputs.items()
24
+ } # Move tensors to the appropriate device
25
  outputs = model(**inputs)
26
+ logits_per_image = (
27
+ outputs.logits_per_image.detach().cpu().numpy()
28
+ ) # Move results back to CPU for further processing
29
 
30
  results_dict = {
31
  label: score / 100.0 for label, score in zip(labels, logits_per_image[0])
 
33
  return results_dict
34
 
35
 
36
+ with gr.Blocks() as demo:
37
+ gr.Markdown("# CLIP Score")
38
+ gr.Markdown(
39
+ "Calculate the [CLIP](https://openai.com/blog/clip/) score of a given image and text"
40
+ )
41
+ with gr.Row():
42
+ image_input = gr.Image()
43
+ output_label = gr.Label()
44
+
45
+ text_input = gr.Textbox(label="Descriptions (separated by semicolons)")
46
+
47
+ image_input.change(
48
+ fn=calculate_score, inputs=[image_input, text_input], outputs=output_label
49
+ )
50
+ text_input.submit(
51
+ fn=calculate_score, inputs=[image_input, text_input], outputs=output_label
52
+ )
53
 
54
+ gr.Examples(
55
+ examples=[
56
+ [
57
+ "cat.jpg",
58
+ "a cat stuck in a door; a cat in the air; a cat sitting; a cat standing; a cat is entering the matrix; a cat is entering the void",
59
+ ]
60
+ ],
61
  fn=calculate_score,
62
+ inputs=[image_input, text_input],
63
+ outputs=output_label,
 
 
 
 
 
64
  )
65
 
66
+ demo.launch()
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- gradio
2
- transformers
3
  torch
4
  torchvision
 
1
+ git+https://github.com/huggingface/transformers
 
2
  torch
3
  torchvision