artificialguybr commited on
Commit
449c0a8
·
verified ·
1 Parent(s): 1af55e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -87
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  import logging
3
  import os
4
  import json
5
- from PIL import Image, ImageDraw, ImageFont
6
  import torch
7
  from surya.ocr import run_ocr
8
  from surya.detection import batch_text_detection
@@ -13,88 +13,59 @@ from surya.model.recognition.model import load_model as load_rec_model
13
  from surya.model.recognition.processor import load_processor as load_rec_processor
14
  from surya.settings import settings
15
  from surya.model.ordering.processor import load_processor as load_order_processor
16
- # Removemos a importação problemática e usaremos uma alternativa
17
 
18
- # Configuração de logging
19
- logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
20
- logger = logging.getLogger(__name__)
21
-
22
- # Configuração do TorchDynamo
23
- torch._dynamo.config.capture_scalar_outputs = True
24
-
25
- # Configuração de variáveis de ambiente
26
- os.environ["RECOGNITION_BATCH_SIZE"] = "512"
27
- os.environ["DETECTOR_BATCH_SIZE"] = "36"
28
- os.environ["ORDER_BATCH_SIZE"] = "32"
29
- os.environ["RECOGNITION_STATIC_CACHE"] = "true"
30
-
31
- # Carregamento de modelos
32
- logger.info("Iniciando carregamento dos modelos...")
33
- det_processor, det_model = load_det_processor(), load_det_model()
34
- rec_model, rec_processor = load_rec_model(), load_rec_processor()
35
- layout_model = load_det_model(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT)
36
- layout_processor = load_det_processor(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT)
37
- order_processor = load_order_processor()
38
- # Vamos tentar carregar o modelo de ordenação de uma maneira diferente
39
- from surya.model.ordering import model as order_model_module
40
- order_model = order_model_module.Model()
41
-
42
- # Compilação do modelo de reconhecimento
43
- logger.info("Compilando modelo de reconhecimento...")
44
- rec_model.decoder.model = torch.compile(rec_model.decoder.model)
45
 
46
  class CustomJSONEncoder(json.JSONEncoder):
47
  def default(self, obj):
 
 
48
  if hasattr(obj, '__dict__'):
49
  return obj.__dict__
50
- return str(obj)
51
 
52
  def serialize_result(result):
53
  return json.dumps(result, cls=CustomJSONEncoder, indent=2)
54
 
55
- def draw_boxes(image, predictions):
56
  draw = ImageDraw.Draw(image)
57
- font = ImageFont.load_default()
58
- for idx, pred in enumerate(predictions[0]['text_lines']):
59
- bbox = pred['bbox']
60
- draw.rectangle(bbox, outline="red", width=2)
61
- draw.text((bbox[0], bbox[1] - 10), f"{idx+1}", font=font, fill="red")
62
  return image
63
 
64
- def format_ocr_text(predictions):
65
- formatted_text = ""
66
- for idx, pred in enumerate(predictions[0]['text_lines']):
67
- formatted_text += f"{idx+1}. {pred['text']} (Confidence: {pred['confidence']:.2f})\n"
68
- return formatted_text
69
-
70
  def ocr_workflow(image, langs):
71
  logger.info(f"Iniciando workflow OCR com idiomas: {langs}")
72
  try:
73
- image_pil = Image.open(image.name)
74
- predictions = run_ocr([image_pil], [langs.split(',')], det_model, det_processor, rec_model, rec_processor)
75
- logger.info("Workflow OCR concluído com sucesso")
76
 
77
- # Desenhar caixas na imagem
78
- image_with_boxes = draw_boxes(image_pil.copy(), predictions)
79
 
80
- # Formatar texto OCR
81
- formatted_text = format_ocr_text(predictions)
82
 
 
83
  return serialize_result(predictions), image_with_boxes, formatted_text
84
  except Exception as e:
85
  logger.error(f"Erro durante o workflow OCR: {e}")
86
- return serialize_result({"error": str(e)}), None, str(e)
87
 
88
  def text_detection_workflow(image):
89
  logger.info("Iniciando workflow de detecção de texto")
90
  try:
91
- image_pil = Image.open(image.name)
92
- predictions = batch_text_detection([image_pil], det_model, det_processor)
93
- logger.info("Workflow de detecção de texto concluído com sucesso")
94
 
95
- # Desenhar caixas na imagem
96
- image_with_boxes = draw_boxes(image_pil.copy(), [{"text_lines": predictions[0].bboxes}])
97
 
 
98
  return serialize_result(predictions), image_with_boxes
99
  except Exception as e:
100
  logger.error(f"Erro durante o workflow de detecção de texto: {e}")
@@ -103,14 +74,16 @@ def text_detection_workflow(image):
103
  def layout_analysis_workflow(image):
104
  logger.info("Iniciando workflow de análise de layout")
105
  try:
106
- image_pil = Image.open(image.name)
107
- line_predictions = batch_text_detection([image_pil], det_model, det_processor)
108
- layout_predictions = batch_layout_detection([image_pil], layout_model, layout_processor, line_predictions)
109
- logger.info("Workflow de análise de layout concluído com sucesso")
 
110
 
111
- # Desenhar caixas na imagem
112
- image_with_boxes = draw_boxes(image_pil.copy(), [{"text_lines": layout_predictions[0].bboxes}])
113
 
 
114
  return serialize_result(layout_predictions), image_with_boxes
115
  except Exception as e:
116
  logger.error(f"Erro durante o workflow de análise de layout: {e}")
@@ -119,22 +92,24 @@ def layout_analysis_workflow(image):
119
  def reading_order_workflow(image):
120
  logger.info("Iniciando workflow de ordem de leitura")
121
  try:
122
- image_pil = Image.open(image.name)
123
- line_predictions = batch_text_detection([image_pil], det_model, det_processor)
124
- layout_predictions = batch_layout_detection([image_pil], layout_model, layout_processor, line_predictions)
 
 
 
125
  bboxes = [pred.bbox for pred in layout_predictions[0].bboxes]
126
- order_predictions = batch_ordering([image_pil], [bboxes], order_model, order_processor)
127
- logger.info("Workflow de ordem de leitura concluído com sucesso")
128
 
129
- # Desenhar caixas na imagem com a ordem de leitura
130
- image_with_order = image_pil.copy()
131
- draw = ImageDraw.Draw(image_with_order)
132
- font = ImageFont.load_default()
133
- for idx, bbox in enumerate(order_predictions[0]['bboxes']):
134
- draw.rectangle(bbox['bbox'], outline="blue", width=2)
135
- draw.text((bbox['bbox'][0], bbox['bbox'][1] - 10), f"{idx+1}", font=font, fill="blue")
136
 
137
- return serialize_result(order_predictions), image_with_order
 
138
  except Exception as e:
139
  logger.error(f"Erro durante o workflow de ordem de leitura: {e}")
140
  return serialize_result({"error": str(e)}), None
@@ -148,19 +123,17 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
148
  ocr_input = gr.File(label="Carregar Imagem ou PDF")
149
  ocr_langs = gr.Textbox(label="Idiomas (separados por vírgula)", value="en")
150
  ocr_button = gr.Button("Executar OCR")
151
- with gr.Row():
152
- ocr_output = gr.JSON(label="Resultados OCR")
153
- ocr_image = gr.Image(label="Imagem com Caixas")
154
- ocr_text = gr.Textbox(label="Texto Reconhecido", lines=10)
155
  ocr_button.click(ocr_workflow, inputs=[ocr_input, ocr_langs], outputs=[ocr_output, ocr_image, ocr_text])
156
 
157
  with gr.Tab("Detecção de Texto"):
158
  gr.Markdown("## Detecção de Linhas de Texto")
159
  det_input = gr.File(label="Carregar Imagem ou PDF")
160
  det_button = gr.Button("Executar Detecção de Texto")
161
- with gr.Row():
162
- det_output = gr.JSON(label="Resultados da Detecção de Texto")
163
- det_image = gr.Image(label="Imagem com Caixas")
164
  det_button.click(text_detection_workflow, inputs=det_input, outputs=[det_output, det_image])
165
 
166
  with gr.Tab("Análise de Layout"):
@@ -168,12 +141,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
168
  layout_input = gr.File(label="Carregar Imagem ou PDF")
169
  layout_button = gr.Button("Executar Análise de Layout")
170
  order_button = gr.Button("Determinar Ordem de Leitura")
171
- with gr.Row():
172
- layout_output = gr.JSON(label="Resultados da Análise de Layout")
173
- layout_image = gr.Image(label="Imagem com Layout")
174
- with gr.Row():
175
- order_output = gr.JSON(label="Resultados da Ordem de Leitura")
176
- order_image = gr.Image(label="Imagem com Ordem de Leitura")
177
  layout_button.click(layout_analysis_workflow, inputs=layout_input, outputs=[layout_output, layout_image])
178
  order_button.click(reading_order_workflow, inputs=layout_input, outputs=[order_output, order_image])
179
 
 
2
  import logging
3
  import os
4
  import json
5
+ from PIL import Image, ImageDraw
6
  import torch
7
  from surya.ocr import run_ocr
8
  from surya.detection import batch_text_detection
 
13
  from surya.model.recognition.processor import load_processor as load_rec_processor
14
  from surya.settings import settings
15
  from surya.model.ordering.processor import load_processor as load_order_processor
16
+ from surya.model.ordering.model import load_model as load_order_model
17
 
18
+ # ... (rest of the imports and configurations remain the same)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  class CustomJSONEncoder(json.JSONEncoder):
21
  def default(self, obj):
22
+ if isinstance(obj, Image.Image):
23
+ return "Image object (not serializable)"
24
  if hasattr(obj, '__dict__'):
25
  return obj.__dict__
26
+ return super().default(obj)
27
 
28
  def serialize_result(result):
29
  return json.dumps(result, cls=CustomJSONEncoder, indent=2)
30
 
31
+ def draw_boxes(image, predictions, color=(255, 0, 0)):
32
  draw = ImageDraw.Draw(image)
33
+ for pred in predictions:
34
+ bbox = pred.get('bbox') or pred.get('polygon')
35
+ if bbox:
36
+ draw.rectangle(bbox, outline=color, width=2)
 
37
  return image
38
 
 
 
 
 
 
 
39
  def ocr_workflow(image, langs):
40
  logger.info(f"Iniciando workflow OCR com idiomas: {langs}")
41
  try:
42
+ image = Image.open(image.name)
43
+ logger.debug(f"Imagem carregada: {image.size}")
44
+ predictions = run_ocr([image], [langs.split(',')], det_model, det_processor, rec_model, rec_processor)
45
 
46
+ # Draw bounding boxes on the image
47
+ image_with_boxes = draw_boxes(image.copy(), predictions[0]['text_lines'])
48
 
49
+ # Format the OCR results
50
+ formatted_text = "\n".join([line['text'] for line in predictions[0]['text_lines']])
51
 
52
+ logger.info("Workflow OCR concluído com sucesso")
53
  return serialize_result(predictions), image_with_boxes, formatted_text
54
  except Exception as e:
55
  logger.error(f"Erro durante o workflow OCR: {e}")
56
+ return serialize_result({"error": str(e)}), None, ""
57
 
58
  def text_detection_workflow(image):
59
  logger.info("Iniciando workflow de detecção de texto")
60
  try:
61
+ image = Image.open(image.name)
62
+ logger.debug(f"Imagem carregada: {image.size}")
63
+ predictions = batch_text_detection([image], det_model, det_processor)
64
 
65
+ # Draw bounding boxes on the image
66
+ image_with_boxes = draw_boxes(image.copy(), predictions[0].bboxes)
67
 
68
+ logger.info("Workflow de detecção de texto concluído com sucesso")
69
  return serialize_result(predictions), image_with_boxes
70
  except Exception as e:
71
  logger.error(f"Erro durante o workflow de detecção de texto: {e}")
 
74
  def layout_analysis_workflow(image):
75
  logger.info("Iniciando workflow de análise de layout")
76
  try:
77
+ image = Image.open(image.name)
78
+ logger.debug(f"Imagem carregada: {image.size}")
79
+ line_predictions = batch_text_detection([image], det_model, det_processor)
80
+ logger.debug(f"Detecção de linhas concluída. Número de linhas detectadas: {len(line_predictions[0].bboxes)}")
81
+ layout_predictions = batch_layout_detection([image], layout_model, layout_processor, line_predictions)
82
 
83
+ # Draw bounding boxes on the image
84
+ image_with_boxes = draw_boxes(image.copy(), layout_predictions[0].bboxes, color=(0, 255, 0))
85
 
86
+ logger.info("Workflow de análise de layout concluído com sucesso")
87
  return serialize_result(layout_predictions), image_with_boxes
88
  except Exception as e:
89
  logger.error(f"Erro durante o workflow de análise de layout: {e}")
 
92
  def reading_order_workflow(image):
93
  logger.info("Iniciando workflow de ordem de leitura")
94
  try:
95
+ image = Image.open(image.name)
96
+ logger.debug(f"Imagem carregada: {image.size}")
97
+ line_predictions = batch_text_detection([image], det_model, det_processor)
98
+ logger.debug(f"Detecção de linhas concluída. Número de linhas detectadas: {len(line_predictions[0].bboxes)}")
99
+ layout_predictions = batch_layout_detection([image], layout_model, layout_processor, line_predictions)
100
+ logger.debug(f"Análise de layout concluída. Número de elementos de layout: {len(layout_predictions[0].bboxes)}")
101
  bboxes = [pred.bbox for pred in layout_predictions[0].bboxes]
102
+ order_predictions = batch_ordering([image], [bboxes], order_model, order_processor)
 
103
 
104
+ # Draw bounding boxes on the image
105
+ image_with_boxes = image.copy()
106
+ for i, bbox in enumerate(order_predictions[0]['bboxes']):
107
+ draw = ImageDraw.Draw(image_with_boxes)
108
+ draw.rectangle(bbox['bbox'], outline=(0, 0, 255), width=2)
109
+ draw.text((bbox['bbox'][0], bbox['bbox'][1]), str(bbox['position']), fill=(255, 0, 0))
 
110
 
111
+ logger.info("Workflow de ordem de leitura concluído com sucesso")
112
+ return serialize_result(order_predictions), image_with_boxes
113
  except Exception as e:
114
  logger.error(f"Erro durante o workflow de ordem de leitura: {e}")
115
  return serialize_result({"error": str(e)}), None
 
123
  ocr_input = gr.File(label="Carregar Imagem ou PDF")
124
  ocr_langs = gr.Textbox(label="Idiomas (separados por vírgula)", value="en")
125
  ocr_button = gr.Button("Executar OCR")
126
+ ocr_output = gr.JSON(label="Resultados OCR")
127
+ ocr_image = gr.Image(label="Imagem com Bounding Boxes")
128
+ ocr_text = gr.Textbox(label="Texto Extraído", lines=10)
 
129
  ocr_button.click(ocr_workflow, inputs=[ocr_input, ocr_langs], outputs=[ocr_output, ocr_image, ocr_text])
130
 
131
  with gr.Tab("Detecção de Texto"):
132
  gr.Markdown("## Detecção de Linhas de Texto")
133
  det_input = gr.File(label="Carregar Imagem ou PDF")
134
  det_button = gr.Button("Executar Detecção de Texto")
135
+ det_output = gr.JSON(label="Resultados da Detecção de Texto")
136
+ det_image = gr.Image(label="Imagem com Bounding Boxes")
 
137
  det_button.click(text_detection_workflow, inputs=det_input, outputs=[det_output, det_image])
138
 
139
  with gr.Tab("Análise de Layout"):
 
141
  layout_input = gr.File(label="Carregar Imagem ou PDF")
142
  layout_button = gr.Button("Executar Análise de Layout")
143
  order_button = gr.Button("Determinar Ordem de Leitura")
144
+ layout_output = gr.JSON(label="Resultados da Análise de Layout")
145
+ layout_image = gr.Image(label="Imagem com Layout")
146
+ order_output = gr.JSON(label="Resultados da Ordem de Leitura")
147
+ order_image = gr.Image(label="Imagem com Ordem de Leitura")
 
 
148
  layout_button.click(layout_analysis_workflow, inputs=layout_input, outputs=[layout_output, layout_image])
149
  order_button.click(reading_order_workflow, inputs=layout_input, outputs=[order_output, order_image])
150