cosmo3769 commited on
Commit
4a0c9f0
·
verified ·
1 Parent(s): 1432c6f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
3
+ from PIL import Image
4
+ import torch
5
+
6
+ # Load model and processor
7
+ model_id = "cosmo3769/finetuned_paligemma_vqav2_small"
8
+ model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
9
+ processor = AutoProcessor.from_pretrained("google/paligemma-3b-pt-224")
10
+
11
+ # Define inference function
12
+ def process_image(image, prompt):
13
+ # Process the image and prompt using the processor
14
+ inputs = processor(image.convert("RGB"), prompt, return_tensors="pt")
15
+
16
+ # Generate output from the model
17
+ output = model.generate(**inputs, max_new_tokens=20)
18
+
19
+ # Decode and return the output
20
+ decoded_output = processor.decode(output[0], skip_special_tokens=True)
21
+
22
+ # Return the answer (exclude the prompt part from output)
23
+ return decoded_output[len(prompt):]
24
+
25
+ # Define the Gradio interface
26
+ inputs = [
27
+ gr.Image(type="pil"),
28
+ gr.Textbox(label="Prompt", placeholder="Enter your question")
29
+ ]
30
+ outputs = gr.Textbox(label="Answer")
31
+
32
+ # Create the Gradio app
33
+ demo = gr.Interface(fn=process_image, inputs=inputs, outputs=outputs, title="Finetuned PaliGemma on VQAv2 Small Dataset",
34
+ description="Ask a question about an image")
35
+
36
+ # Launch the app
37
+ demo.launch()