Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig | |
from PIL import Image | |
import requests | |
from io import BytesIO | |
import subprocess | |
import sys | |
# Load the model and processor | |
repo_name = "cyan2k/molmo-7B-O-bnb-4bit" | |
arguments = {"device_map": "auto", "torch_dtype": "auto", "trust_remote_code": True} | |
# Load the processor and model | |
processor = AutoProcessor.from_pretrained(repo_name, **arguments) | |
model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments) | |
def describe_image(image): | |
# Process the uploaded image | |
inputs = processor.process( | |
images=[image], | |
text="Describe this image." | |
) | |
# Move inputs to model device | |
inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()} | |
# Generate output | |
output = model.generate_from_batch( | |
inputs, | |
GenerationConfig(max_new_tokens=200, stop_strings="<|endoftext|>"), | |
tokenizer=processor.tokenizer, | |
) | |
# Decode the generated tokens | |
generated_tokens = output[0, inputs["input_ids"].size(1):] | |
generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True) | |
return generated_text | |
def gradio_app(): | |
# Define Gradio interface | |
image_input = gr.Image(type="pil", label="Upload Image") | |
output_text = gr.Textbox(label="Image Description", interactive=False) | |
# Create Gradio interface | |
interface = gr.Interface( | |
fn=describe_image, | |
inputs=image_input, | |
outputs=output_text, | |
title="Image Description App", | |
description="Upload an image and get a detailed description using the Molmo 7B model" | |
) | |
# Launch the interface | |
interface.launch() | |
# Launch the Gradio app | |
gradio_app() |