Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,416 Bytes
7e0dcfc 6385c67 4397550 ae892a5 e7a1352 4397550 e7a1352 6385c67 e7a1352 6385c67 efb77d1 6385c67 e7a1352 6385c67 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
from lmdeploy import pipeline, GenerationConfig, TurbomindEngineConfig
from lmdeploy.vl import load_image
import spaces
import gradio as gr
from PIL import Image
import numpy as np
pipe = pipeline('gokaygokay/llava-llama3-docci')
@spaces.GPU
def create_captions_llava_llama3_docci(image):
gen_config = GenerationConfig(repetition_penalty=1.10)
image = Image.fromarray(np.uint8(image)).convert('RGB')
response = pipe(('describe this image in detail', image), gen_config=gen_config)
return response.text
css = """
#mkd {
height: 500px;
overflow: auto;
border: 1px solid #ccc;
}
"""
with gr.Blocks(css=css) as demo:
gr.HTML("<h1><center>Fine tuned version of xtuner/llava-llama-3-8b-v1_1 on google/docci dataset.<center><h1>")
with gr.Tab(label="SD3 Llava Llama3 Captioner"):
with gr.Row():
with gr.Column():
input_img = gr.Image(label="Input Picture")
submit_btn = gr.Button(value="Submit")
output = gr.Text(label="Caption")
gr.Examples(
[["image1.jpg"], ["image2.jpg"], ["image3.png"]],
inputs = [input_img],
outputs = [output],
fn=create_captions_llava_llama3_docci,
label='Try captioning on examples'
)
submit_btn.click(create_captions_llava_llama3_docci, [input_img], [output])
demo.launch(debug=True) |