Tonic commited on
Commit
6e402f7
1 Parent(s): a1ac6ed

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from io import BytesIO
4
+
5
+ from PIL import Image
6
+ from transformers import AutoProcessor, AutoModelForVision2Seq
7
+
8
+ def generate_caption(image):
9
+ # Load pre-trained models & processors
10
+ model = AutoModelForVision2Seq.from_pretrained("microsoft/kosmos-2-patch14-224")
11
+ processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
12
+
13
+ prompt = "<grounding>An image of"
14
+
15
+ # Open the uploaded image file
16
+ img = Image.open(BytesIO(image))
17
+
18
+ # Save the image locally and open it again to avoid potential issues with reusing the same PIL object
19
+ img.save("temp_image.jpg")
20
+ img = Image.open("temp_image.jpg")
21
+
22
+ inputs = processor(text=prompt, images=img, return_tensors="pt")
23
+
24
+ # Generate caption
25
+ generated_ids = model.generate(**inputs, max_new_tokens=128)
26
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
27
+
28
+ # Process the generated caption
29
+ processed_text, _ = processor.post_process_generation(generated_text)
30
+
31
+ return processed_text
32
+
33
+ import gradio as gr
34
+
35
+ title = 'Image Caption Generator'
36
+ description = 'Generate descriptive captions for images.'
37
+ examples = [["https://example.com/image1.jpg"]]
38
+ article = '<p style="margin:auto;max-width:600px;">This tool generates descriptive captions for given images.</p>'
39
+
40
+ interface = gr.Interface(fn=generate_caption,
41
+ inputs=gr.inputs.Image(source='upload'),
42
+ outputs=gr.outputs.Textbox(),
43
+ title=title, description=description, examples=examples, article=article)
44
+
45
+ interface.launch()