File size: 4,965 Bytes
ade70cf 1d51385 d69fd19 d7f29ce 9ec0fd2 2ff3a1c 39ae23a 5df4b4d ca4e95c 9ec0fd2 c279f7e ade70cf 3b56f9e ca16909 beec895 c279f7e d59f119 9c53151 5ae9be1 9ec0fd2 c279f7e 9ec0fd2 144ba4b 2617a01 144ba4b 53581ac 9ec0fd2 c279f7e d59f119 9ec0fd2 5ae9be1 1d51385 5ae9be1 9ec0fd2 5ae9be1 2ff3a1c 84d0e49 6172e67 d59f119 dd36999 6172e67 92e51e9 53581ac 5df4b4d 53581ac 6172e67 63ed30f 1d51385 6172e67 5df4b4d 144ba4b 6172e67 8dc80bf 63ed30f 5df4b4d 63ed30f 6172e67 63ed30f 9ec0fd2 5df4b4d 63ed30f c279f7e 6172e67 e5ad09f 5df4b4d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import gradio as gr
from transformers import AutoProcessor, AutoModelForCausalLM
import spaces
import io
import base64 # Adicionando a biblioteca base64 para decodificação
from PIL import Image
import subprocess
# Instalando a dependência flash-attn se necessário
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
# Carregando o modelo e o processador
model_id = 'J-LAB/Florence-vl3'
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to("cuda").eval()
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
DESCRIPTION = "# Product Describe by Fluxi IA\n### Base Model [Florence-2] (https://huggingface.co/microsoft/Florence-2-large)"
@spaces.GPU
def run_example(task_prompt, image):
inputs = processor(text=task_prompt, images=image, return_tensors="pt").to("cuda")
generated_ids = model.generate(
input_ids=inputs["input_ids"],
pixel_values=inputs["pixel_values"],
max_new_tokens=1024,
early_stopping=False,
do_sample=False,
num_beams=3,
)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
parsed_answer = processor.post_process_generation(
generated_text,
task=task_prompt,
image_size=(image.width, image.height)
)
return parsed_answer
# Função para processar imagens, agora suportando Base64
def process_image(image, task_prompt):
# Verifica se a imagem é uma string base64
if isinstance(image, str) and image.startswith("data:image"):
# Extraindo a parte base64 da string
base64_image = image.split(",")[1]
# Decodificando a imagem base64
image = Image.open(io.BytesIO(base64.b64decode(base64_image)))
elif isinstance(image, bytes):
image = Image.open(io.BytesIO(image))
else:
image = Image.fromarray(image) # Convertendo um array NumPy para imagem PIL, se aplicável
# Mapeando os prompts de tarefas
if task_prompt == 'Product Caption':
task_prompt = '<MORE_DETAILED_CAPTION>'
elif task_prompt == 'OCR':
task_prompt = '<OCR>'
# Chamando o exemplo com a imagem processada e o prompt da tarefa
results = run_example(task_prompt, image)
# Extraindo o texto gerado a partir dos resultados
if results and task_prompt in results:
output_text = results[task_prompt]
else:
output_text = ""
# Convertendo quebras de linha para quebras de linha HTML
output_text = output_text.replace("\n\n", "<br><br>").replace("\n", "<br>")
return output_text
css = """
#output {
overflow: auto;
border: 1px solid #ccc;
padding: 10px;
background-color: rgb(31 41 55);
color: #fff;
}
"""
js = """
function adjustHeight() {
var outputElement = document.getElementById('output');
outputElement.style.height = 'auto'; // Reset height to auto to get the actual content height
var height = outputElement.scrollHeight + 'px'; // Get the scrollHeight
outputElement.style.height = height; // Set the height
}
// Attach the adjustHeight function to the click event of the submit button
document.querySelector('button').addEventListener('click', function() {
setTimeout(adjustHeight, 500); // Adjust the height after a small delay to ensure content is loaded
});
"""
single_task_list = ['Product Caption', 'OCR']
with gr.Blocks(css=css) as demo:
gr.Markdown(DESCRIPTION)
with gr.Tab(label="Product Image Select"):
with gr.Row():
with gr.Column():
input_img = gr.Image(label="Input Picture", source="upload", type="pil") # Suporte a PIL images
task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="Product Caption")
submit_btn = gr.Button(value="Submit")
with gr.Column():
output_text = gr.HTML(label="Output Text", elem_id="output")
gr.Markdown("""
## How to use via API
To use this model via API, you can follow the example code below:
```python
import base64
from PIL import Image
import io
import requests
# Converting image to base64
image_path = 'path_to_image.png'
with open(image_path, 'rb') as image_file:
image_base64 = base64.b64encode(image_file.read()).decode('utf-8')
# Preparing the payload
payload = {
"image": f"data:image/png;base64,{image_base64}",
"task_prompt": "Product Caption"
}
response = requests.post("http://your-space-url-here", json=payload)
print(response.json())
```
""")
submit_btn.click(process_image, [input_img, task_prompt], [output_text])
demo.load(lambda: None, inputs=None, outputs=None, js=js)
demo.launch(debug=True) |