commited on
Browse files
@@ -2,7 +2,7 @@ import gradio as gr
2 |
import logging
3 |
import os
4 |
import json
5 |
from PIL import Image, ImageDraw
6 |
import torch
7 |
from surya.ocr import run_ocr
8 |
from surya.detection import batch_text_detection
@@ -13,88 +13,59 @@ from surya.model.recognition.model import load_model as load_rec_model
13 |
from surya.model.recognition.processor import load_processor as load_rec_processor
14 |
from surya.settings import settings
15 |
from surya.model.ordering.processor import load_processor as load_order_processor
16 |
17 |
18 |
19 |
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
20 |
logger = logging.getLogger(__name__)
21 |
22 |
# Configuração do TorchDynamo
23 |
torch._dynamo.config.capture_scalar_outputs = True
24 |
25 |
# Configuração de variáveis de ambiente
26 |
os.environ["RECOGNITION_BATCH_SIZE"] = "512"
27 |
os.environ["DETECTOR_BATCH_SIZE"] = "36"
28 |
os.environ["ORDER_BATCH_SIZE"] = "32"
29 |
os.environ["RECOGNITION_STATIC_CACHE"] = "true"
30 |
31 |
# Carregamento de modelos
32 |
-"Iniciando carregamento dos modelos...")
33 |
det_processor, det_model = load_det_processor(), load_det_model()
34 |
rec_model, rec_processor = load_rec_model(), load_rec_processor()
35 |
layout_model = load_det_model(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT)
36 |
layout_processor = load_det_processor(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT)
37 |
order_processor = load_order_processor()
38 |
# Vamos tentar carregar o modelo de ordenação de uma maneira diferente
39 |
from surya.model.ordering import model as order_model_module
40 |
order_model = order_model_module.Model()
41 |
42 |
# Compilação do modelo de reconhecimento
43 |
-"Compilando modelo de reconhecimento...")
44 |
rec_model.decoder.model = torch.compile(rec_model.decoder.model)
45 |
46 |
class CustomJSONEncoder(json.JSONEncoder):
47 |
def default(self, obj):
48 |
if hasattr(obj, '__dict__'):
49 |
return obj.__dict__
50 |
51 |
52 |
def serialize_result(result):
53 |
return json.dumps(result, cls=CustomJSONEncoder, indent=2)
54 |
55 |
def draw_boxes(image, predictions):
56 |
draw = ImageDraw.Draw(image)
57 |
58 |
59 |
60 |
61 |
draw.text((bbox[0], bbox[1] - 10), f"{idx+1}", font=font, fill="red")
62 |
return image
63 |
64 |
def format_ocr_text(predictions):
65 |
formatted_text = ""
66 |
for idx, pred in enumerate(predictions[0]['text_lines']):
67 |
formatted_text += f"{idx+1}. {pred['text']} (Confidence: {pred['confidence']:.2f})\n"
68 |
return formatted_text
69 |
70 |
def ocr_workflow(image, langs):
71 |"Iniciando workflow OCR com idiomas: {langs}")
72 |
73 |
74 |
75 |
76 |
77 |
78 |
image_with_boxes = draw_boxes(
79 |
80 |
81 |
formatted_text =
82 |
83 |
return serialize_result(predictions), image_with_boxes, formatted_text
84 |
except Exception as e:
85 |
logger.error(f"Erro durante o workflow OCR: {e}")
86 |
return serialize_result({"error": str(e)}), None,
87 |
88 |
def text_detection_workflow(image):
89 |"Iniciando workflow de detecção de texto")
90 |
91 |
92 |
93 |
94 |
95 |
96 |
image_with_boxes = draw_boxes(
97 |
98 |
return serialize_result(predictions), image_with_boxes
99 |
except Exception as e:
100 |
logger.error(f"Erro durante o workflow de detecção de texto: {e}")
@@ -103,14 +74,16 @@ def text_detection_workflow(image):
103 |
def layout_analysis_workflow(image):
104 |"Iniciando workflow de análise de layout")
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
image_with_boxes = draw_boxes(
113 |
114 |
return serialize_result(layout_predictions), image_with_boxes
115 |
except Exception as e:
116 |
logger.error(f"Erro durante o workflow de análise de layout: {e}")
@@ -119,22 +92,24 @@ def layout_analysis_workflow(image):
119 |
def reading_order_workflow(image):
120 |"Iniciando workflow de ordem de leitura")
121 |
122 |
123 |
124 |
125 |
bboxes = [pred.bbox for pred in layout_predictions[0].bboxes]
126 |
order_predictions = batch_ordering([
127 |
-"Workflow de ordem de leitura concluído com sucesso")
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
draw.text((bbox['bbox'][0], bbox['bbox'][1] - 10), f"{idx+1}", font=font, fill="blue")
136 |
137 |
138 |
except Exception as e:
139 |
logger.error(f"Erro durante o workflow de ordem de leitura: {e}")
140 |
return serialize_result({"error": str(e)}), None
@@ -148,19 +123,17 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
148 |
ocr_input = gr.File(label="Carregar Imagem ou PDF")
149 |
ocr_langs = gr.Textbox(label="Idiomas (separados por vírgula)", value="en")
150 |
ocr_button = gr.Button("Executar OCR")
151 |
152 |
153 |
154 |
ocr_text = gr.Textbox(label="Texto Reconhecido", lines=10)
155 |, inputs=[ocr_input, ocr_langs], outputs=[ocr_output, ocr_image, ocr_text])
156 |
157 |
with gr.Tab("Detecção de Texto"):
158 |
gr.Markdown("## Detecção de Linhas de Texto")
159 |
det_input = gr.File(label="Carregar Imagem ou PDF")
160 |
det_button = gr.Button("Executar Detecção de Texto")
161 |
162 |
163 |
det_image = gr.Image(label="Imagem com Caixas")
164 |, inputs=det_input, outputs=[det_output, det_image])
165 |
166 |
with gr.Tab("Análise de Layout"):
@@ -168,12 +141,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
168 |
layout_input = gr.File(label="Carregar Imagem ou PDF")
169 |
layout_button = gr.Button("Executar Análise de Layout")
170 |
order_button = gr.Button("Determinar Ordem de Leitura")
171 |
172 |
173 |
174 |
175 |
order_output = gr.JSON(label="Resultados da Ordem de Leitura")
176 |
order_image = gr.Image(label="Imagem com Ordem de Leitura")
177 |, inputs=layout_input, outputs=[layout_output, layout_image])
178 |, inputs=layout_input, outputs=[order_output, order_image])
179 |
2 |
import logging
3 |
import os
4 |
import json
5 |
from PIL import Image, ImageDraw
6 |
import torch
7 |
from surya.ocr import run_ocr
8 |
from surya.detection import batch_text_detection
13 |
from surya.model.recognition.processor import load_processor as load_rec_processor
14 |
from surya.settings import settings
15 |
from surya.model.ordering.processor import load_processor as load_order_processor
16 |
from surya.model.ordering.model import load_model as load_order_model
17 |
18 |
# ... (rest of the imports and configurations remain the same)
19 |
20 |
class CustomJSONEncoder(json.JSONEncoder):
21 |
def default(self, obj):
22 |
if isinstance(obj, Image.Image):
23 |
return "Image object (not serializable)"
24 |
if hasattr(obj, '__dict__'):
25 |
return obj.__dict__
26 |
return super().default(obj)
27 |
28 |
def serialize_result(result):
29 |
return json.dumps(result, cls=CustomJSONEncoder, indent=2)
30 |
31 |
def draw_boxes(image, predictions, color=(255, 0, 0)):
32 |
draw = ImageDraw.Draw(image)
33 |
for pred in predictions:
34 |
bbox = pred.get('bbox') or pred.get('polygon')
35 |
if bbox:
36 |
draw.rectangle(bbox, outline=color, width=2)
37 |
return image
38 |
39 |
def ocr_workflow(image, langs):
40 |"Iniciando workflow OCR com idiomas: {langs}")
41 |
42 |
image =
43 |
logger.debug(f"Imagem carregada: {image.size}")
44 |
predictions = run_ocr([image], [langs.split(',')], det_model, det_processor, rec_model, rec_processor)
45 |
46 |
# Draw bounding boxes on the image
47 |
image_with_boxes = draw_boxes(image.copy(), predictions[0]['text_lines'])
48 |
49 |
# Format the OCR results
50 |
formatted_text = "\n".join([line['text'] for line in predictions[0]['text_lines']])
51 |
52 |
+"Workflow OCR concluído com sucesso")
53 |
return serialize_result(predictions), image_with_boxes, formatted_text
54 |
except Exception as e:
55 |
logger.error(f"Erro durante o workflow OCR: {e}")
56 |
return serialize_result({"error": str(e)}), None, ""
57 |
58 |
def text_detection_workflow(image):
59 |"Iniciando workflow de detecção de texto")
60 |
61 |
image =
62 |
logger.debug(f"Imagem carregada: {image.size}")
63 |
predictions = batch_text_detection([image], det_model, det_processor)
64 |
65 |
# Draw bounding boxes on the image
66 |
image_with_boxes = draw_boxes(image.copy(), predictions[0].bboxes)
67 |
68 |
+"Workflow de detecção de texto concluído com sucesso")
69 |
return serialize_result(predictions), image_with_boxes
70 |
except Exception as e:
71 |
logger.error(f"Erro durante o workflow de detecção de texto: {e}")
74 |
def layout_analysis_workflow(image):
75 |"Iniciando workflow de análise de layout")
76 |
77 |
image =
78 |
logger.debug(f"Imagem carregada: {image.size}")
79 |
line_predictions = batch_text_detection([image], det_model, det_processor)
80 |
logger.debug(f"Detecção de linhas concluída. Número de linhas detectadas: {len(line_predictions[0].bboxes)}")
81 |
layout_predictions = batch_layout_detection([image], layout_model, layout_processor, line_predictions)
82 |
83 |
# Draw bounding boxes on the image
84 |
image_with_boxes = draw_boxes(image.copy(), layout_predictions[0].bboxes, color=(0, 255, 0))
85 |
86 |
+"Workflow de análise de layout concluído com sucesso")
87 |
return serialize_result(layout_predictions), image_with_boxes
88 |
except Exception as e:
89 |
logger.error(f"Erro durante o workflow de análise de layout: {e}")
92 |
def reading_order_workflow(image):
93 |"Iniciando workflow de ordem de leitura")
94 |
95 |
image =
96 |
logger.debug(f"Imagem carregada: {image.size}")
97 |
line_predictions = batch_text_detection([image], det_model, det_processor)
98 |
logger.debug(f"Detecção de linhas concluída. Número de linhas detectadas: {len(line_predictions[0].bboxes)}")
99 |
layout_predictions = batch_layout_detection([image], layout_model, layout_processor, line_predictions)
100 |
logger.debug(f"Análise de layout concluída. Número de elementos de layout: {len(layout_predictions[0].bboxes)}")
101 |
bboxes = [pred.bbox for pred in layout_predictions[0].bboxes]
102 |
order_predictions = batch_ordering([image], [bboxes], order_model, order_processor)
103 |
104 |
# Draw bounding boxes on the image
105 |
image_with_boxes = image.copy()
106 |
for i, bbox in enumerate(order_predictions[0]['bboxes']):
107 |
draw = ImageDraw.Draw(image_with_boxes)
108 |
draw.rectangle(bbox['bbox'], outline=(0, 0, 255), width=2)
109 |
draw.text((bbox['bbox'][0], bbox['bbox'][1]), str(bbox['position']), fill=(255, 0, 0))
110 |
111 |
+"Workflow de ordem de leitura concluído com sucesso")
112 |
return serialize_result(order_predictions), image_with_boxes
113 |
except Exception as e:
114 |
logger.error(f"Erro durante o workflow de ordem de leitura: {e}")
115 |
return serialize_result({"error": str(e)}), None
123 |
ocr_input = gr.File(label="Carregar Imagem ou PDF")
124 |
ocr_langs = gr.Textbox(label="Idiomas (separados por vírgula)", value="en")
125 |
ocr_button = gr.Button("Executar OCR")
126 |
ocr_output = gr.JSON(label="Resultados OCR")
127 |
ocr_image = gr.Image(label="Imagem com Bounding Boxes")
128 |
ocr_text = gr.Textbox(label="Texto Extraído", lines=10)
129 |, inputs=[ocr_input, ocr_langs], outputs=[ocr_output, ocr_image, ocr_text])
130 |
131 |
with gr.Tab("Detecção de Texto"):
132 |
gr.Markdown("## Detecção de Linhas de Texto")
133 |
det_input = gr.File(label="Carregar Imagem ou PDF")
134 |
det_button = gr.Button("Executar Detecção de Texto")
135 |
det_output = gr.JSON(label="Resultados da Detecção de Texto")
136 |
det_image = gr.Image(label="Imagem com Bounding Boxes")
137 |, inputs=det_input, outputs=[det_output, det_image])
138 |
139 |
with gr.Tab("Análise de Layout"):
141 |
layout_input = gr.File(label="Carregar Imagem ou PDF")
142 |
layout_button = gr.Button("Executar Análise de Layout")
143 |
order_button = gr.Button("Determinar Ordem de Leitura")
144 |
layout_output = gr.JSON(label="Resultados da Análise de Layout")
145 |
layout_image = gr.Image(label="Imagem com Layout")
146 |
order_output = gr.JSON(label="Resultados da Ordem de Leitura")
147 |
order_image = gr.Image(label="Imagem com Ordem de Leitura")
148 |, inputs=layout_input, outputs=[layout_output, layout_image])
149 |, inputs=layout_input, outputs=[order_output, order_image])
150 |