artificialguybr commited on
Commit
f51fa47
·
verified ·
1 Parent(s): ed816c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -126
app.py CHANGED
@@ -1,129 +1,45 @@
1
  import gradio as gr
2
- import json
3
- import subprocess
4
  from PIL import Image
5
- import os
6
- import tempfile
7
- import logging
8
-
9
- # Load language mappings from JSON file
10
- with open("languages.json", "r", encoding='utf-8') as file:
11
- language_map = json.load(file)
12
-
13
- # Configuração básica de logging
14
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
15
-
16
- def save_temp_image(img):
17
- temp_dir = tempfile.mkdtemp()
18
- img_path = os.path.join(temp_dir, "input_image.png")
19
- img.save(img_path)
20
- logging.info(f"Imagem salva em {img_path}")
21
- return img_path, temp_dir
22
-
23
- def run_command(command):
24
- logging.info(f"Executing command: {command}") # Adiciona o log do comando
25
- try:
26
- result = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT, encoding='utf-8')
27
- logging.info("Command Output: " + result)
28
- return result
29
- except subprocess.CalledProcessError as e:
30
- logging.error(f"Command failed with error: {e.output}")
31
- return None
32
-
33
-
34
- def ocr_function_cli(img, lang_name):
35
- img_path, temp_dir = save_temp_image(img)
36
-
37
- # Get language abbreviation from language_map
38
- lang_code = language_map.get(lang_name, "en") # Default to English if not found
39
-
40
- command = f"surya_ocr {img_path} --langs {lang_code} --images --results_dir {temp_dir}"
41
- if run_command(command) is None:
42
- return img, "OCR failed"
43
-
44
- result_img_path = os.path.join(temp_dir, "image_with_text.png")
45
- result_text_path = os.path.join(temp_dir, "results.json")
46
-
47
- if os.path.exists(result_img_path):
48
- result_img = Image.open(result_img_path)
49
- else:
50
- result_img = img
51
-
52
- if os.path.exists(result_text_path):
53
- with open(result_text_path, "r", encoding='utf-8') as file:
54
- result_text = json.load(file)
55
- text_output = "\n".join([str(page) for page in result_text.values()])
56
- else:
57
- text_output = "No text detected"
58
-
59
- # Limpeza movida para depois da leitura dos resultados
60
- os.remove(img_path)
61
- logging.info(f"Limpeza concluída para {img_path}")
62
- return result_img, text_output
63
-
64
- def text_line_detection_function_cli(img):
65
- img_path, temp_dir = save_temp_image(img)
66
- command = f"surya_detect {img_path} --images --results_dir {temp_dir}"
67
- if run_command(command) is None:
68
- return img, {"error": "Detection failed"}
69
-
70
- result_img_path = os.path.join(temp_dir, "image_with_lines.png")
71
- result_json_path = os.path.join(temp_dir, "results.json")
72
-
73
- if os.path.exists(result_img_path):
74
- result_img = Image.open(result_img_path)
75
  else:
76
- result_img = img
77
-
78
- if os.path.exists(result_json_path):
79
- with open(result_json_path, "r", encoding='utf-8') as file:
80
- result_json = json.load(file)
81
- print(result_json) # Add this line
82
- else:
83
- result_json = {"error": "No detection results found"}
84
-
85
- # Limpeza movida para depois da leitura dos resultados
86
- os.remove(img_path)
87
- logging.info(f"Limpeza concluída para {img_path}")
88
- print(result_img_path) # Add this line
89
- print(result_json_path) # Add this line
90
- return result_img, result_json
91
-
92
- with gr.Blocks() as app:
93
- gr.Markdown("# Surya OCR and Text Line Detection via CLI")
94
-
95
- with gr.Tab("OCR"):
96
- with gr.Column():
97
- ocr_input_image = gr.Image(label="Input Image for OCR", type="pil")
98
-
99
- # Use language names for display in the dropdown
100
- ocr_language_selector = gr.Dropdown(
101
- label="Select Language for OCR",
102
- choices=list(language_map.keys()), # Use language names
103
- value="English"
104
- )
105
- ocr_run_button = gr.Button("Run OCR")
106
-
107
- with gr.Column():
108
- ocr_output_image = gr.Image(label="OCR Output Image", type="pil", interactive=False)
109
- ocr_text_output = gr.TextArea(label="Recognized Text")
110
-
111
- ocr_run_button.click(
112
- fn=ocr_function_cli, inputs=[ocr_input_image, ocr_language_selector], outputs=[ocr_output_image, ocr_text_output]
113
- )
114
-
115
- with gr.Tab("Text Line Detection"):
116
- with gr.Column():
117
- detection_input_image = gr.Image(label="Input Image for Detection", type="pil")
118
- detection_run_button = gr.Button("Run Text Line Detection")
119
-
120
- with gr.Column():
121
- detection_output_image = gr.Image(label="Detection Output Image", type="pil", interactive=False)
122
- detection_json_output = gr.JSON(label="Detection JSON Output")
123
-
124
- detection_run_button.click(
125
- fn=text_line_detection_function_cli, inputs=detection_input_image, outputs=[detection_output_image, detection_json_output]
126
- )
127
-
128
- if __name__ == "__main__":
129
- app.launch()
 
1
  import gradio as gr
 
 
2
  from PIL import Image
3
+ import io
4
+ from surya.ocr import run_ocr
5
+ from surya.model.detection.model import load_model as load_det_model, load_processor as load_det_processor
6
+ from surya.model.recognition.model import load_model as load_rec_model
7
+ from surya.model.recognition.processor import load_processor as load_rec_processor
8
+
9
+ # Load models and processors
10
+ det_processor, det_model = load_det_processor(), load_det_model()
11
+ rec_model, rec_processor = load_rec_model(), load_rec_processor()
12
+
13
+ def perform_ocr(image, language):
14
+ # Convert gradio image to PIL Image
15
+ if image is not None:
16
+ image = Image.fromarray(image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  else:
18
+ return "No image uploaded"
19
+
20
+ # Perform OCR
21
+ langs = [language] # You can expand this to support multiple languages
22
+ predictions = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)
23
+
24
+ # Extract text from predictions
25
+ result = ""
26
+ for page in predictions[0]: # Assuming single image input
27
+ for line in page['text_lines']:
28
+ result += line['text'] + "\n"
29
+
30
+ return result
31
+
32
+ # Define the Gradio interface
33
+ iface = gr.Interface(
34
+ fn=perform_ocr,
35
+ inputs=[
36
+ gr.Image(type="numpy", label="Upload an image"),
37
+ gr.Dropdown(choices=["en", "fr", "de", "es", "it"], label="Select language", value="en")
38
+ ],
39
+ outputs=gr.Textbox(label="Extracted Text"),
40
+ title="OCR with Surya",
41
+ description="Upload an image to extract text using Optical Character Recognition."
42
+ )
43
+
44
+ # Launch the app
45
+ iface.launch()