from lmdeploy import pipeline
from lmdeploy.vl import load_image
import spaces
pipe = pipeline('gokaygokay/llava-llama3-docci')
@spaces.GPU
def create_captions_llava_llama3_docci(image):
gen_config = GenerationConfig(repetition_penalty=1.10)
image = Image.fromarray(np.uint8(image)).convert('RGB')
response = pipe(('describe this image in detail', image), gen_config=gen_config)
return response.text
css = """
#mkd {
height: 500px;
overflow: auto;
border: 1px solid #ccc;
}
"""
with gr.Blocks(css=css) as demo:
gr.HTML("
Fine tuned version of xtuner/llava-llama-3-8b-v1_1 on google/docci dataset.")
with gr.Tab(label="SD3 Llava Llama3 Captioner"):
with gr.Row():
with gr.Column():
input_img = gr.Image(label="Input Picture")
submit_btn = gr.Button(value="Submit")
output = gr.Text(label="Caption")
gr.Examples(
[["assets/image1.png"], ["assets/image2.PNG"], ["assets/image3.jpg"]],
inputs = [input_img],
outputs = [output],
fn=create_captions_llava_llama3_docci,
label='Try captioning on examples'
)
submit_btn.click(create_captions_llava_llama3_docci, [input_img], [output])
demo.launch(debug=True)