Fluxi-IA / app.py
J-LAB's picture
Update app.py
9c53151 verified
raw
history blame
3.96 kB
import gradio as gr
from transformers import AutoProcessor, AutoModelForCausalLM
import spaces
import io
from PIL import Image
import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
models = {
'J-LAB/Florence_2_B_FluxiAI_Product_Caption': AutoModelForCausalLM.from_pretrained('J-LAB/Florence_2_B_FluxiAI_Product_Caption', trust_remote_code=True).to("cuda").eval(),
'J-LAB/Florence_2_L_FluxiAI_Product_Caption': AutoModelForCausalLM.from_pretrained('J-LAB/Florence_2_L_FluxiAI_Product_Caption', trust_remote_code=True).to("cuda").eval()
}
processors = {
'J-LAB/Florence_2_B_FluxiAI_Product_Caption': AutoProcessor.from_pretrained('J-LAB/Florence_2_B_FluxiAI_Product_Caption', trust_remote_code=True),
'J-LAB/Florence_2_L_FluxiAI_Product_Caption': AutoProcessor.from_pretrained('J-LAB/Florence_2_L_FluxiAI_Product_Caption', trust_remote_code=True)
}
DESCRIPTION = "# [Florence-2 Product Describe by Fluxi IA](https://huggingface.co/microsoft/Florence-2-large)"
@spaces.GPU
def run_example(task_prompt, image, text_input=None, model_id='J-LAB/Florence-Idesire'):
model = models[model_id]
processor = processors[model_id]
if text_input is None:
prompt = task_prompt
else:
prompt = task_prompt + text_input
inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda")
generated_ids = model.generate(
input_ids=inputs["input_ids"],
pixel_values=inputs["pixel_values"],
max_new_tokens=1024,
early_stopping=False,
do_sample=False,
num_beams=3,
)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
parsed_answer = processor.post_process_generation(
generated_text,
task=task_prompt,
image_size=(image.width, image.height)
)
return parsed_answer
def process_image(image, task_prompt, text_input=None, model_id='J-LAB/Florence_2_B_FluxiAI_Product_Caption'):
image = Image.fromarray(image) # Convert NumPy array to PIL Image
if task_prompt == 'Product Caption':
task_prompt = '<PC>'
results = run_example(task_prompt, image, model_id=model_id)
elif task_prompt == 'More Detailed Caption':
task_prompt = '<MORE_DETAILED_CAPTION>'
results = run_example(task_prompt, image, model_id=model_id)
else:
return "", None # Return empty string and None for unknown task prompts
# Remove the key and get the text value
if results and task_prompt in results:
output_text = results[task_prompt]
else:
output_text = ""
# Convert newline characters to HTML line breaks
output_text = output_text.replace("\n\n", "<br><br>").replace("\n", "<br>")
return output_text
css = """
#output {
height: 500px;
overflow: auto;
border: 1px solid #ccc;
}
"""
single_task_list = [
'Product Caption', 'More Detailed Caption'
]
with gr.Blocks(css=css) as demo:
gr.Markdown(DESCRIPTION)
with gr.Tab(label="Florence-2 Image Captioning"):
with gr.Row():
with gr.Column():
input_img = gr.Image(label="Input Picture")
model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value='J-LAB/Florence_2_B_FluxiAI_Product_Caption')
task_type = gr.Radio(choices=['Single task', 'Cascased task'], label='Task type selector', value='Single task')
task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="Caption")
text_input = gr.Textbox(label="Text Input (optional)")
submit_btn = gr.Button(value="Submit")
with gr.Column():
output_text = gr.HTML(label="Output Text")
submit_btn.click(process_image, [input_img, task_prompt, text_input, model_selector], [output_text])
demo.launch(debug=True)