taesiri commited on
Commit
c890be1
1 Parent(s): b5e4aa1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -23
app.py CHANGED
@@ -3,50 +3,50 @@ import gradio as gr
3
  from transformers import CLIPProcessor, CLIPModel
4
  import spaces
5
 
6
- model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16").to("cuda")
7
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
8
 
9
-
10
- @spaces.GPU(duration=120)
11
  def calculate_score(image, text):
 
 
12
  labels = text.split(";")
13
  labels = [l.strip() for l in labels]
14
  labels = list(filter(None, labels))
15
  if len(labels) == 0:
16
  return dict()
 
17
  inputs = processor(text=labels, images=image, return_tensors="pt", padding=True)
18
- inputs = {
19
- k: v.to("cuda") for k, v in inputs.items()
20
- }
21
  outputs = model(**inputs)
22
- logits_per_image = (
23
- outputs.logits_per_image.detach().cpu().numpy()
24
- ) # Move results back to CPU for further processing
25
-
26
- results_dict = {
27
- label: score / 100.0 for label, score in zip(labels, logits_per_image[0])
28
- }
29
  return results_dict
30
 
31
-
32
  with gr.Blocks() as demo:
33
  gr.Markdown("# CLIP Score")
34
- gr.Markdown(
35
- "Calculate the [CLIP](https://openai.com/blog/clip/) score of a given image and text"
36
- )
37
  with gr.Row():
38
  image_input = gr.Image()
39
  output_label = gr.Label()
40
-
41
  text_input = gr.Textbox(label="Descriptions (separated by semicolons)")
42
-
43
  image_input.change(
44
- fn=calculate_score, inputs=[image_input, text_input], outputs=output_label
 
 
45
  )
 
46
  text_input.submit(
47
- fn=calculate_score, inputs=[image_input, text_input], outputs=output_label
 
 
48
  )
49
-
50
  gr.Examples(
51
  examples=[
52
  [
@@ -59,4 +59,4 @@ with gr.Blocks() as demo:
59
  outputs=output_label,
60
  )
61
 
62
- demo.launch()
 
3
  from transformers import CLIPProcessor, CLIPModel
4
  import spaces
5
 
6
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
7
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
8
 
9
+ @spaces.GPU
 
10
  def calculate_score(image, text):
11
+ model.to("cuda") # Move model to CUDA inside the GPU-decorated function
12
+
13
  labels = text.split(";")
14
  labels = [l.strip() for l in labels]
15
  labels = list(filter(None, labels))
16
  if len(labels) == 0:
17
  return dict()
18
+
19
  inputs = processor(text=labels, images=image, return_tensors="pt", padding=True)
20
+ inputs = {k: v.to("cuda") for k, v in inputs.items()}
21
+
 
22
  outputs = model(**inputs)
23
+ logits_per_image = outputs.logits_per_image.detach().cpu().numpy()
24
+
25
+ results_dict = {label: score / 100.0 for label, score in zip(labels, logits_per_image[0])}
 
 
 
 
26
  return results_dict
27
 
 
28
  with gr.Blocks() as demo:
29
  gr.Markdown("# CLIP Score")
30
+ gr.Markdown("Calculate the [CLIP](https://openai.com/blog/clip/) score of a given image and text")
31
+
 
32
  with gr.Row():
33
  image_input = gr.Image()
34
  output_label = gr.Label()
35
+
36
  text_input = gr.Textbox(label="Descriptions (separated by semicolons)")
37
+
38
  image_input.change(
39
+ fn=calculate_score,
40
+ inputs=[image_input, text_input],
41
+ outputs=output_label
42
  )
43
+
44
  text_input.submit(
45
+ fn=calculate_score,
46
+ inputs=[image_input, text_input],
47
+ outputs=output_label
48
  )
49
+
50
  gr.Examples(
51
  examples=[
52
  [
 
59
  outputs=output_label,
60
  )
61
 
62
+ demo.launch()