Spaces:

Tonic1
/

kosmos-2

Running

App Files Files Community

Tonic commited on Mar 13

Commit

6e402f7

•

1 Parent(s): a1ac6ed

Create app.py

Browse files

Files changed (1) hide show

app.py +45 -0

app.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+import requests
+from io import BytesIO
+from PIL import Image
+from transformers import AutoProcessor, AutoModelForVision2Seq
+def generate_caption(image):
+  # Load pre-trained models & processors
+  model = AutoModelForVision2Seq.from_pretrained("microsoft/kosmos-2-patch14-224")
+  processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
+  prompt = "<grounding>An image of"
+  # Open the uploaded image file
+  img = Image.open(BytesIO(image))
+  # Save the image locally and open it again to avoid potential issues with reusing the same PIL object
+  img.save("temp_image.jpg")
+  img = Image.open("temp_image.jpg")
+  inputs = processor(text=prompt, images=img, return_tensors="pt")
+  # Generate caption
+  generated_ids = model.generate(**inputs, max_new_tokens=128)
+  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+  # Process the generated caption
+  processed_text, _ = processor.post_process_generation(generated_text)
+  return processed_text
+import gradio as gr
+title = 'Image Caption Generator'
+description = 'Generate descriptive captions for images.'
+examples = [["https://example.com/image1.jpg"]]
+article = '<p style="margin:auto;max-width:600px;">This tool generates descriptive captions for given images.</p>'
+interface = gr.Interface(fn=generate_caption,
+                        inputs=gr.inputs.Image(source='upload'),
+                        outputs=gr.outputs.Textbox(),
+                        title=title, description=description, examples=examples, article=article)
+interface.launch()