vlm-playground / app.py
lewtun's picture
lewtun HF staff
Update app.py
3c7aec5 verified
raw
history blame
2.32 kB
import gradio as gr
from transformers import LlavaProcessor, LlavaForConditionalGeneration, TextIteratorStreamer
from threading import Thread
import re
import time
from PIL import Image
import torch
import spaces
import os
from huggingface_hub import login
login(token=os.environ["HF_TOKEN"])
MODEL_ID = os.environ["MODEL_ID"]
REVISION = os.environ["MODEL_REVISION"]
processor = LlavaProcessor.from_pretrained(MODEL_ID, revision=REVISION)
model = LlavaForConditionalGeneration.from_pretrained(MODEL_ID, revision=REVISION, torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")
@spaces.GPU
def bot_streaming(message, history):
print(message)
if message["files"]:
image = message["files"][-1]["path"]
else:
# if there's no image uploaded for this turn, look for images in the past turns
# kept inside tuples, take the last one
for hist in history:
if type(hist[0])==tuple:
image = hist[0][0]
if image is None:
gr.Error("You need to upload an image for LLaVA to work.")
prompt=f"USER: <image>\n{message['text']}\nASSISTANT:" #f"[INST] <image>\n{message['text']} [/INST]"
image = Image.open(image).convert("RGB")
inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": True})
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=100)
generated_text = ""
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
text_prompt =f"USER: \n{message['text']}\nASSISTANT: " #f"[INST] \n{message['text']} [/INST]"
buffer = ""
for new_text in streamer:
buffer += new_text
generated_text_without_prompt = buffer[len(text_prompt):]
time.sleep(0.04)
yield generated_text_without_prompt
demo = gr.ChatInterface(fn=bot_streaming, title="VLM Playground", examples=[{"text": "What is on the flower?", "files":["./bee.jpg"]},
{"text": "How to make this pastry?", "files":["./baklava.png"]}],
description="Playground for internal VLMs. Change the model ID and revision under the environments of the Space settings.",
stop_btn="Stop Generation", multimodal=True)
demo.launch(debug=True)