import torch import gradio as gr from transformers import TextIteratorStreamer, AutoProcessor, LlavaForConditionalGeneration from PIL import Image import requests import threading import spaces import accelerate DESCRIPTION = '''

Krypton 🕋

This uses an Open Source model from xtuner/llava-llama-3-8b-v1_1-transformers

''' model_id = "xtuner/llava-llama-3-8b-v1_1-transformers" model = LlavaForConditionalGeneration.from_pretrained( model_id, torch_dtype=torch.float16, low_cpu_mem_usage=True ).to('cuda') processor = AutoProcessor.from_pretrained(model_id) @spaces.GPU(duration=120) def krypton(input_image): pil_image = Image.fromarray(input_image.astype('uint8'), 'RGB') # image = Image.open(requests.get(url, stream=True).raw) prompt = ("<|start_header_id|>user<|end_header_id|>\n\n\nWhat are these?<|eot_id|>" "<|start_header_id|>assistant<|end_header_id|>\n\n") inputs = processor(prompt, pil_image, return_tensors='pt').to('cuda', torch.float16) outputs = model.generate(**inputs, max_new_tokens=200, do_sample=False) output_text = processor.decode(outputs[0][:2], skip_special_tokens=True) return output_text with gr.Blocks(fill_height=True) as demo: gr.Markdown(DESCRIPTION) gr.Interface( fn=krypton, inputs="image", outputs="text", fill_height=True ) if __name__ == "__main__": demo.launch()