from lmdeploy import pipeline, GenerationConfig, TurbomindEngineConfig from lmdeploy.vl import load_image import spaces import gradio as gr from PIL import Image import numpy as np pipe = pipeline('gokaygokay/llava-llama3-docci') @spaces.GPU def create_captions_llava_llama3_docci(image): gen_config = GenerationConfig(repetition_penalty=1.10) image = Image.fromarray(np.uint8(image)).convert('RGB') response = pipe(('describe this image in detail', image), gen_config=gen_config) return response.text css = """ #mkd { height: 500px; overflow: auto; border: 1px solid #ccc; } """ with gr.Blocks(css=css) as demo: gr.HTML("

Fine tuned version of xtuner/llava-llama-3-8b-v1_1 on google/docci dataset.

") with gr.Tab(label="SD3 Llava Llama3 Captioner"): with gr.Row(): with gr.Column(): input_img = gr.Image(label="Input Picture") submit_btn = gr.Button(value="Submit") output = gr.Text(label="Caption") gr.Examples( [["image1.jpg"], ["image2.jpg"], ["image3.png"]], inputs = [input_img], outputs = [output], fn=create_captions_llava_llama3_docci, label='Try captioning on examples' ) submit_btn.click(create_captions_llava_llama3_docci, [input_img], [output]) demo.launch(debug=True)