liudongqing
adjust prompt
4d30ab9
raw
history blame
1.29 kB
from transformers import AutoProcessor, AutoModelForImageTextToText, MllamaForConditionalGeneration
import torch
from threading import Thread
import gradio as gr
import spaces
from PIL import Image
# from zipnn import zipnn_hf
# zipnn_hf()
# model_id = "royleibov/Llama-3.2-11B-Vision-Instruct-ZipNN-Compressed"
model_id = "unsloth/Llama-3.2-11B-Vision"
model = AutoModelForImageTextToText.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
processor = AutoProcessor.from_pretrained(model_id)
@spaces.GPU
def score_it(input_img):
# Convert numpy array to PIL Image
pil_image = Image.fromarray(input_img.astype('uint8'))
# Now use the PIL Image as before
image = pil_image.convert("RGB").resize((224, 224))
prompt = "<|begin_of_text|><|image|this script says"
inputs = processor(image, prompt, return_tensors="pt").to(model.device)
output = model.generate(**inputs, max_new_tokens=200)
return processor.decode(output[0])
demo = gr.Interface(fn=score_it, title="Upload your English script and get the score",
inputs=[gr.Image()],
outputs=['text'],
stop_btn="Stop Generation",
)
demo.launch(debug=True)