Spaces:

bdsqlsz
/

SD3-Llava-Llama3-Captioner

Running on Zero

Update app.py

5c6c732 verified 7 months ago

1.71 kB

	from lmdeploy import pipeline, GenerationConfig, TurbomindEngineConfig
	from lmdeploy.vl import load_image
	import spaces
	import gradio as gr
	from PIL import Image
	import numpy as np

	@spaces.GPU
	def create_captions_llava_llama3_docci(image):
	pipe = pipeline('Lin-Chen/open-llava-next-llama3-8b')
	gen_config = GenerationConfig(repetition_penalty=1.10)
	image = Image.fromarray(np.uint8(image)).convert('RGB')
	response = pipe(('As an AI image annotation expert, please provide accurate annotations for images to enhance model understanding of the content.These prompts will be used for image reconstruction, so the closer the similarity to the original image, the better the prompts quality.Special prompts will receive a reward of $10 per image.', image), gen_config=gen_config)
	return response.text

	css = """
	#mkd {
	height: 500px;
	overflow: auto;
	border: 1px solid #ccc;
	}
	"""

	with gr.Blocks(css=css) as demo:
	gr.HTML("<h1><center>Fine tuned version of xtuner/llava-llama-3-8b-v1_1 on google/docci dataset.<center><h1>")

	with gr.Tab(label="SD3 Llava Llama3 Captioner"):
	with gr.Row():
	with gr.Column():
	input_img = gr.Image(label="Input Picture")
	submit_btn = gr.Button(value="Submit")
	output = gr.Text(label="Caption")

	gr.Examples(
	[["image1.jpg"], ["image2.jpg"], ["image3.png"]],
	inputs = [input_img],
	outputs = [output],
	fn=create_captions_llava_llama3_docci,
	label='Try captioning on examples'
	)

	submit_btn.click(create_captions_llava_llama3_docci, [input_img], [output])


	demo.launch(debug=True)