Spaces:

zamal
/

Molmo-4bit

Running on Zero

App Files Files Community

Molmo-4bit / app.py

zamal

Update app.py

fbbadab verified 6 months ago

raw

history blame

1.84 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
	from PIL import Image

	# Load the model and processor
	repo_name = "cyan2k/molmo-7B-O-bnb-4bit"
	arguments = {
	"device_map": "auto",
	"torch_dtype": "auto",
	"trust_remote_code": True,
	"load_in_8bit": True # Use 8-bit for reduced memory footprint
	}

	# Load the processor and model
	processor = AutoProcessor.from_pretrained(repo_name, **arguments)
	model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments)

	def describe_image(image):
	# Process the uploaded image
	inputs = processor.process(
	images=[image],
	text="Describe this image in great detail."
	)

	# Move inputs to model device
	inputs = {k: v.to(model.device) for k, v in inputs.items()} # Removed unsqueeze(0) to keep batch size

	# Generate output
	output = model.generate_from_batch(
	inputs,
	GenerationConfig(max_new_tokens=200, stop_strings="<\|endoftext\|>"),
	tokenizer=processor.tokenizer,
	)

	# Decode the generated tokens
	generated_tokens = output[0, inputs["input_ids"].size(1):]
	generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)

	return generated_text


	def gradio_app():
	# Define Gradio interface
	image_input = gr.Image(type="pil", label="Upload Image")
	output_text = gr.Textbox(label="Image Description", interactive=False)

	# Create Gradio interface
	interface = gr.Interface(
	fn=describe_image,
	inputs=image_input,
	outputs=output_text,
	title="Image Description App",
	description="Upload an image and get a detailed description using the Molmo 7B model"
	)

	# Launch the interface
	interface.launch()

	# Launch the Gradio app
	gradio_app()