Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,10 @@ import os
|
|
6 |
import tempfile
|
7 |
import logging
|
8 |
|
|
|
|
|
|
|
|
|
9 |
# Configuração básica de logging
|
10 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
11 |
|
@@ -29,7 +33,11 @@ def run_command(command):
|
|
29 |
|
30 |
def ocr_function_cli(img, lang_name):
|
31 |
img_path, temp_dir = save_temp_image(img)
|
32 |
-
|
|
|
|
|
|
|
|
|
33 |
if run_command(command) is None:
|
34 |
return img, "OCR failed"
|
35 |
|
@@ -84,103 +92,11 @@ with gr.Blocks() as app:
|
|
84 |
with gr.Tab("OCR"):
|
85 |
with gr.Column():
|
86 |
ocr_input_image = gr.Image(label="Input Image for OCR", type="pil")
|
|
|
|
|
87 |
ocr_language_selector = gr.Dropdown(
|
88 |
label="Select Language for OCR",
|
89 |
-
choices=
|
90 |
-
"Afrikaans",
|
91 |
-
"Amharic",
|
92 |
-
"Arabic",
|
93 |
-
"Assamese",
|
94 |
-
"Azerbaijani",
|
95 |
-
"Belarusian",
|
96 |
-
"Bulgarian",
|
97 |
-
"Bengali",
|
98 |
-
"Breton",
|
99 |
-
"Bosnian",
|
100 |
-
"Catalan",
|
101 |
-
"Czech",
|
102 |
-
"Welsh",
|
103 |
-
"Danish",
|
104 |
-
"German",
|
105 |
-
"Greek",
|
106 |
-
"English",
|
107 |
-
"Esperanto",
|
108 |
-
"Spanish",
|
109 |
-
"Estonian",
|
110 |
-
"Basque",
|
111 |
-
"Persian",
|
112 |
-
"Finnish",
|
113 |
-
"French",
|
114 |
-
"Western Frisian",
|
115 |
-
"Irish",
|
116 |
-
"Scottish Gaelic",
|
117 |
-
"Galician",
|
118 |
-
"Gujarati",
|
119 |
-
"Hausa",
|
120 |
-
"Hebrew",
|
121 |
-
"Hindi",
|
122 |
-
"Croatian",
|
123 |
-
"Hungarian",
|
124 |
-
"Armenian",
|
125 |
-
"Indonesian",
|
126 |
-
"Icelandic",
|
127 |
-
"Italian",
|
128 |
-
"Japanese",
|
129 |
-
"Javanese",
|
130 |
-
"Georgian",
|
131 |
-
"Kazakh",
|
132 |
-
"Khmer",
|
133 |
-
"Kannada",
|
134 |
-
"Korean",
|
135 |
-
"Kurdish",
|
136 |
-
"Kyrgyz",
|
137 |
-
"Latin",
|
138 |
-
"Lao",
|
139 |
-
"Lithuanian",
|
140 |
-
"Latvian",
|
141 |
-
"Malagasy",
|
142 |
-
"Macedonian",
|
143 |
-
"Malayalam",
|
144 |
-
"Mongolian",
|
145 |
-
"Marathi",
|
146 |
-
"Malay",
|
147 |
-
"Burmese",
|
148 |
-
"Nepali",
|
149 |
-
"Dutch",
|
150 |
-
"Norwegian",
|
151 |
-
"Oromo",
|
152 |
-
"Oriya",
|
153 |
-
"Punjabi",
|
154 |
-
"Polish",
|
155 |
-
"Pashto",
|
156 |
-
"Portuguese",
|
157 |
-
"Romanian",
|
158 |
-
"Russian",
|
159 |
-
"Sanskrit",
|
160 |
-
"Sindhi",
|
161 |
-
"Sinhala",
|
162 |
-
"Slovak",
|
163 |
-
"Slovenian",
|
164 |
-
"Somali",
|
165 |
-
"Albanian",
|
166 |
-
"Serbian",
|
167 |
-
"Sundanese",
|
168 |
-
"Swedish",
|
169 |
-
"Swahili",
|
170 |
-
"Tamil",
|
171 |
-
"Telugu",
|
172 |
-
"Thai",
|
173 |
-
"Tagalog",
|
174 |
-
"Turkish",
|
175 |
-
"Uyghur",
|
176 |
-
"Ukrainian",
|
177 |
-
"Urdu",
|
178 |
-
"Uzbek",
|
179 |
-
"Vietnamese",
|
180 |
-
"Xhosa",
|
181 |
-
"Yiddish",
|
182 |
-
"Chinese"
|
183 |
-
],
|
184 |
value="English"
|
185 |
)
|
186 |
ocr_run_button = gr.Button("Run OCR")
|
|
|
6 |
import tempfile
|
7 |
import logging
|
8 |
|
9 |
+
# Load language mappings from JSON file
|
10 |
+
with open("languages.json", "r", encoding='utf-8') as file:
|
11 |
+
language_map = json.load(file)
|
12 |
+
|
13 |
# Configuração básica de logging
|
14 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
15 |
|
|
|
33 |
|
34 |
def ocr_function_cli(img, lang_name):
|
35 |
img_path, temp_dir = save_temp_image(img)
|
36 |
+
|
37 |
+
# Get language abbreviation from language_map
|
38 |
+
lang_code = language_map.get(lang_name, "en") # Default to English if not found
|
39 |
+
|
40 |
+
command = f"surya_ocr {img_path} --langs {lang_code} --images --results_dir {temp_dir}"
|
41 |
if run_command(command) is None:
|
42 |
return img, "OCR failed"
|
43 |
|
|
|
92 |
with gr.Tab("OCR"):
|
93 |
with gr.Column():
|
94 |
ocr_input_image = gr.Image(label="Input Image for OCR", type="pil")
|
95 |
+
|
96 |
+
# Use language names for display in the dropdown
|
97 |
ocr_language_selector = gr.Dropdown(
|
98 |
label="Select Language for OCR",
|
99 |
+
choices=list(language_map.keys()), # Use language names
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
value="English"
|
101 |
)
|
102 |
ocr_run_button = gr.Button("Run OCR")
|