gokaygokay commited on
Commit
e1a2485
1 Parent(s): 87966a5

qwen2-vl-fix

Browse files
Files changed (1) hide show
  1. app.py +22 -5
app.py CHANGED
@@ -49,11 +49,28 @@ def florence_caption(image):
49
  )
50
  return parsed_answer["<MORE_DETAILED_CAPTION>"]
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  # Qwen2-VL-2B caption function
53
  @spaces.GPU
54
  def qwen_caption(image):
55
  if not isinstance(image, Image.Image):
56
- image = Image.fromarray(image)
57
 
58
  image_path = array_to_image_path(np.array(image))
59
 
@@ -65,7 +82,7 @@ def qwen_caption(image):
65
  "type": "image",
66
  "image": image_path,
67
  },
68
- {"type": "text", "text": "Describe this image in detail."},
69
  ],
70
  }
71
  ]
@@ -518,7 +535,7 @@ def create_interface():
518
  with gr.Accordion("Image and Caption", open=False):
519
  input_image = gr.Image(label="Input Image (optional)")
520
  caption_output = gr.Textbox(label="Generated Caption", lines=3)
521
- caption_model = gr.Radio(["Florence", "Qwen"], label="Caption Model", value="Florence")
522
  create_caption_button = gr.Button("Create Caption")
523
  add_caption_button = gr.Button("Add Caption to Prompt")
524
 
@@ -540,9 +557,9 @@ def create_interface():
540
 
541
  def create_caption(image, model):
542
  if image is not None:
543
- if model == "Florence":
544
  return florence_caption(image)
545
- elif model == "Qwen":
546
  return qwen_caption(image)
547
  return ""
548
 
 
49
  )
50
  return parsed_answer["<MORE_DETAILED_CAPTION>"]
51
 
52
+ # Add this function to your code
53
+ def array_to_image_path(image_array):
54
+ # Convert numpy array to PIL Image
55
+ img = Image.fromarray(np.uint8(image_array))
56
+
57
+ # Generate a unique filename using timestamp
58
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
59
+ filename = f"image_{timestamp}.png"
60
+
61
+ # Save the image
62
+ img.save(filename)
63
+
64
+ # Get the full path of the saved image
65
+ full_path = os.path.abspath(filename)
66
+
67
+ return full_path
68
+
69
  # Qwen2-VL-2B caption function
70
  @spaces.GPU
71
  def qwen_caption(image):
72
  if not isinstance(image, Image.Image):
73
+ image = Image.fromarray(np.uint8(image))
74
 
75
  image_path = array_to_image_path(np.array(image))
76
 
 
82
  "type": "image",
83
  "image": image_path,
84
  },
85
+ {"type": "text", "text": "Describe this image in great detail."},
86
  ],
87
  }
88
  ]
 
535
  with gr.Accordion("Image and Caption", open=False):
536
  input_image = gr.Image(label="Input Image (optional)")
537
  caption_output = gr.Textbox(label="Generated Caption", lines=3)
538
+ caption_model = gr.Radio(["Florence-2", "Qwen2-VL"], label="Caption Model", value="Florence-2")
539
  create_caption_button = gr.Button("Create Caption")
540
  add_caption_button = gr.Button("Add Caption to Prompt")
541
 
 
557
 
558
  def create_caption(image, model):
559
  if image is not None:
560
+ if model == "Florence-2":
561
  return florence_caption(image)
562
+ elif model == "Qwen2-VL":
563
  return qwen_caption(image)
564
  return ""
565