Spaces:
Runtime error
Runtime error
import gradio as gr | |
from PIL import Image | |
import requests | |
import os | |
from together import Together | |
import base64 | |
from threading import Thread | |
import time | |
# Initialize Together client | |
client = Together() | |
# Ensure API key is set | |
if "TOGETHER_API_KEY" not in os.environ: | |
raise ValueError("Please set the TOGETHER_API_KEY environment variable") | |
def encode_image(image_path): | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode('utf-8') | |
def bot_streaming(message, history, max_new_tokens=250): | |
txt = message["text"] | |
messages = [] | |
images = [] | |
for i, msg in enumerate(history): | |
if isinstance(msg[0], tuple): | |
messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(msg[0][0])}"}}]}) | |
messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]}) | |
elif isinstance(history[i-1], tuple) and isinstance(msg[0], str): | |
pass | |
elif isinstance(history[i-1][0], str) and isinstance(msg[0], str): | |
messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]}) | |
messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]}) | |
if len(message["files"]) == 1: | |
if isinstance(message["files"][0], str): # examples | |
image_path = message["files"][0] | |
else: # regular input | |
image_path = message["files"][0]["path"] | |
messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path)}"}}]}) | |
else: | |
messages.append({"role": "user", "content": [{"type": "text", "text": txt}]}) | |
stream = client.chat.completions.create( | |
model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo", | |
messages=messages, | |
max_tokens=max_new_tokens, | |
stream=True, | |
) | |
buffer = "" | |
for chunk in stream: | |
if chunk.choices[0].delta.content is not None: | |
buffer += chunk.choices[0].delta.content | |
time.sleep(0.01) | |
yield buffer | |
demo = gr.ChatInterface( | |
fn=bot_streaming, | |
title="Multimodal Llama", | |
examples=[ | |
[{"text": "Which era does this piece belong to? Give details about the era.", "files":["./examples/rococo.jpg"]}, 200], | |
[{"text": "Where do the droughts happen according to this diagram?", "files":["./examples/weather_events.png"]}, 250], | |
[{"text": "What happens when you take out white cat from this chain?", "files":["./examples/ai2d_test.jpg"]}, 250], | |
[{"text": "Which company was this invoice addressed to?", "files":["./examples/invoice.png"]}, 250], | |
[{"text": "Where to find this monument? Can you give me other recommendations around the area?", "files":["./examples/wat_arun.jpg"]}, 250], | |
], | |
textbox=gr.MultimodalTextbox(), | |
additional_inputs=[ | |
gr.Slider( | |
minimum=10, | |
maximum=500, | |
value=250, | |
step=10, | |
label="Maximum number of new tokens to generate", | |
) | |
], | |
cache_examples=False, | |
description="Try Multimodal Llama by Meta with the Together API in this demo. Upload an image, and start chatting about it, or simply try one of the examples below.", | |
stop_btn="Stop Generation", | |
fill_height=True, | |
multimodal=True | |
) | |
if __name__ == "__main__": | |
demo.launch(debug=True) |