File size: 1,834 Bytes
7e0dcfc
6385c67
4397550
ae892a5
 
e7a1352
4397550
6385c67
 
4db78e7
6385c67
 
402ed4a
6385c67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
efb77d1
 
 
6385c67
 
e7a1352
6385c67
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from lmdeploy import pipeline, GenerationConfig, TurbomindEngineConfig
from lmdeploy.vl import load_image
import spaces
import gradio as gr
from PIL import Image
import numpy as np

@spaces.GPU
def create_captions_llava_llama3_docci(image):
    pipe = pipeline('Lin-Chen/open-llava-next-llama3-8b')
    gen_config = GenerationConfig(repetition_penalty=1.10)
    image = Image.fromarray(np.uint8(image)).convert('RGB')
    response = pipe(('As an AI image annotation expert, please provide accurate annotations for images to enhance the CLIP model's understanding of the content.If the image has a distinct style or filter, it also needs to be labeled. Your label should be accurate, non repetitive. These labels will be used for image reconstruction, so the closer the similarity to the original image, the better the label quality.Special tags will receive a reward of $10 per image.', image), gen_config=gen_config)
    return response.text

css = """
  #mkd {
    height: 500px; 
    overflow: auto; 
    border: 1px solid #ccc; 
  }
"""

with gr.Blocks(css=css) as demo:
    gr.HTML("<h1><center>Fine tuned version of xtuner/llava-llama-3-8b-v1_1 on google/docci dataset.<center><h1>")

    with gr.Tab(label="SD3 Llava Llama3 Captioner"):
        with gr.Row():
            with gr.Column():
                input_img = gr.Image(label="Input Picture")
                submit_btn = gr.Button(value="Submit")
                output = gr.Text(label="Caption")
            
        gr.Examples(
        [["image1.jpg"], ["image2.jpg"], ["image3.png"]],
        inputs = [input_img],
        outputs = [output],
        fn=create_captions_llava_llama3_docci,
        label='Try captioning on examples'
        )
        
        submit_btn.click(create_captions_llava_llama3_docci, [input_img], [output])
    

demo.launch(debug=True)