zamal commited on
Commit
4f1e215
·
verified ·
1 Parent(s): e80f948

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -16
app.py CHANGED
@@ -18,14 +18,14 @@ processor = AutoProcessor.from_pretrained(repo_name, **arguments)
18
 
19
  # Define the function for image description
20
  @spaces.GPU # This ensures the function gets GPU access when needed
21
- def describe_image(image):
22
  # Load the model inside the function and move it to GPU
23
  model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments).to('cuda')
24
 
25
- # Process the uploaded image
26
  inputs = processor.process(
27
  images=[image],
28
- text="Describe this image in great detail without missing any piece of information"
29
  )
30
 
31
  # Move inputs to model device (GPU)
@@ -46,21 +46,27 @@ def describe_image(image):
46
 
47
  # Gradio interface
48
  def gradio_app():
49
- # Define Gradio interface
50
- image_input = gr.Image(type="pil", label="Upload Image")
51
- output_text = gr.Textbox(label="Image Description", interactive=False)
52
 
53
- # Create Gradio interface
54
- interface = gr.Interface(
55
- fn=describe_image,
56
- inputs=image_input,
57
- outputs=output_text,
58
- title="Image Description App",
59
- description="Upload an image and get a detailed description using the Molmo 7B model"
60
- )
 
 
 
 
 
 
 
61
 
62
- # Launch the interface
63
- interface.launch()
64
 
65
  # Launch the Gradio app
66
  gradio_app()
 
18
 
19
  # Define the function for image description
20
  @spaces.GPU # This ensures the function gets GPU access when needed
21
+ def describe_image(image, question):
22
  # Load the model inside the function and move it to GPU
23
  model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments).to('cuda')
24
 
25
+ # Process the uploaded image along with the user's question
26
  inputs = processor.process(
27
  images=[image],
28
+ text=question if question else "Describe this image in great detail without missing any piece of information"
29
  )
30
 
31
  # Move inputs to model device (GPU)
 
46
 
47
  # Gradio interface
48
  def gradio_app():
49
+ with gr.Blocks() as demo:
50
+ gr.Markdown("# Image Long Description with Molmo-7B 4 bit quantized\n### Upload an image and ask a question about it!")
 
51
 
52
+ with gr.Row():
53
+ image_input = gr.Image(type="pil", label="Upload an Image")
54
+ question_input = gr.Textbox(placeholder="Ask a question about the image (e.g., 'What is happening in this image?')", label="Question (Optional)")
55
+
56
+ output_text = gr.Textbox(label="Image Description", interactive=False)
57
+
58
+ # Submit button to generate the description
59
+ submit_btn = gr.Button("Generate Description")
60
+
61
+ # Callback to run when submit button is clicked
62
+ submit_btn.click(
63
+ fn=describe_image,
64
+ inputs=[image_input, question_input],
65
+ outputs=output_text
66
+ )
67
 
68
+ # Launch the Gradio interface
69
+ demo.launch()
70
 
71
  # Launch the Gradio app
72
  gradio_app()