Molmo-4bit / app.py
zamal's picture
Update app.py
bba5804 verified
raw
history blame
1.78 kB
import gradio as gr
from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
from PIL import Image
import requests
from io import BytesIO
import subprocess
import sys
# Load the model and processor
repo_name = "cyan2k/molmo-7B-O-bnb-4bit"
arguments = {"device_map": "auto", "torch_dtype": "auto", "trust_remote_code": True}
# Load the processor and model
processor = AutoProcessor.from_pretrained(repo_name, **arguments)
model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments)
def describe_image(image):
# Process the uploaded image
inputs = processor.process(
images=[image],
text="Describe this image."
)
# Move inputs to model device
inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}
# Generate output
output = model.generate_from_batch(
inputs,
GenerationConfig(max_new_tokens=200, stop_strings="<|endoftext|>"),
tokenizer=processor.tokenizer,
)
# Decode the generated tokens
generated_tokens = output[0, inputs["input_ids"].size(1):]
generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
return generated_text
def gradio_app():
# Define Gradio interface
image_input = gr.Image(type="pil", label="Upload Image")
output_text = gr.Textbox(label="Image Description", interactive=False)
# Create Gradio interface
interface = gr.Interface(
fn=describe_image,
inputs=image_input,
outputs=output_text,
title="Image Description App",
description="Upload an image and get a detailed description using the Molmo 7B model"
)
# Launch the interface
interface.launch()
# Launch the Gradio app
gradio_app()