Spaces:
Runtime error
Runtime error
File size: 3,310 Bytes
6df7939 9b4901f 6df7939 9b4901f 6df7939 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
from transformers import MllamaForConditionalGeneration, AutoProcessor, TextIteratorStreamer
from PIL import Image
import torch
from threading import Thread
import gradio as gr
from gradio import FileData
import time
import spaces
# Load model directly
from transformers import AutoProcessor, AutoModelForPreTraining
processor = AutoProcessor.from_pretrained("meta-llama/Llama-3.2-11B-Vision-Instruct")
model = AutoModelForPreTraining.from_pretrained("meta-llama/Llama-3.2-11B-Vision-Instruct")
@spaces.CPU
def bot_streaming(message, history, max_new_tokens=250):
txt = message["text"]
ext_buffer = f"{txt}"
messages = []
images = []
for i, msg in enumerate(history):
if isinstance(msg[0], tuple):
messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image"}]})
messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
images.append(Image.open(msg[0][0]).convert("RGB"))
elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
pass
elif isinstance(history[i-1][0], str) and isinstance(msg[0], str):
messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
if len(message["files"]) == 1:
if isinstance(message["files"][0], str):
image = Image.open(message["files"][0]).convert("RGB")
else:
image = Image.open(message["files"][0]["path"]).convert("RGB")
images.append(image)
messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image"}]})
else:
messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
texts = processor.apply_chat_template(messages, add_generation_prompt=True)
if images == []:
inputs = processor(text=texts, return_tensors="pt") # No .to("cuda"), stays on CPU
else:
inputs = processor(text=texts, images=images, return_tensors="pt") # No .to("cuda"), stays on CPU
streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
generated_text = ""
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
buffer = ""
for new_text in streamer:
buffer += new_text
generated_text_without_prompt = buffer
time.sleep(0.01)
yield buffer
demo = gr.ChatInterface(
fn=bot_streaming,
title="Multimodal Llama",
examples=[], # No examples provided
textbox=gr.MultimodalTextbox(),
additional_inputs=[gr.Slider(
minimum=10,
maximum=500,
value=250,
step=10,
label="Maximum number of new tokens to generate",
)],
cache_examples=False,
description="Try Multimodal Llama by Meta with transformers in this demo. Upload an image, and start chatting about it. To learn more about Llama Vision, visit [our blog post](https://huggingface.co/blog/llama32).",
stop_btn="Stop Generation",
fill_height=True,
multimodal=True
)
demo.launch(debug=True)
|