artificialguybr commited on
Commit
1fa53a1
·
verified ·
1 Parent(s): 2c60ec4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -96
app.py CHANGED
@@ -6,6 +6,10 @@ import os
6
  import tempfile
7
  import logging
8
 
 
 
 
 
9
  # Configuração básica de logging
10
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
11
 
@@ -29,7 +33,11 @@ def run_command(command):
29
 
30
  def ocr_function_cli(img, lang_name):
31
  img_path, temp_dir = save_temp_image(img)
32
- command = f"surya_ocr {img_path} --langs {lang_name} --images --results_dir {temp_dir}"
 
 
 
 
33
  if run_command(command) is None:
34
  return img, "OCR failed"
35
 
@@ -84,103 +92,11 @@ with gr.Blocks() as app:
84
  with gr.Tab("OCR"):
85
  with gr.Column():
86
  ocr_input_image = gr.Image(label="Input Image for OCR", type="pil")
 
 
87
  ocr_language_selector = gr.Dropdown(
88
  label="Select Language for OCR",
89
- choices=[
90
- "Afrikaans",
91
- "Amharic",
92
- "Arabic",
93
- "Assamese",
94
- "Azerbaijani",
95
- "Belarusian",
96
- "Bulgarian",
97
- "Bengali",
98
- "Breton",
99
- "Bosnian",
100
- "Catalan",
101
- "Czech",
102
- "Welsh",
103
- "Danish",
104
- "German",
105
- "Greek",
106
- "English",
107
- "Esperanto",
108
- "Spanish",
109
- "Estonian",
110
- "Basque",
111
- "Persian",
112
- "Finnish",
113
- "French",
114
- "Western Frisian",
115
- "Irish",
116
- "Scottish Gaelic",
117
- "Galician",
118
- "Gujarati",
119
- "Hausa",
120
- "Hebrew",
121
- "Hindi",
122
- "Croatian",
123
- "Hungarian",
124
- "Armenian",
125
- "Indonesian",
126
- "Icelandic",
127
- "Italian",
128
- "Japanese",
129
- "Javanese",
130
- "Georgian",
131
- "Kazakh",
132
- "Khmer",
133
- "Kannada",
134
- "Korean",
135
- "Kurdish",
136
- "Kyrgyz",
137
- "Latin",
138
- "Lao",
139
- "Lithuanian",
140
- "Latvian",
141
- "Malagasy",
142
- "Macedonian",
143
- "Malayalam",
144
- "Mongolian",
145
- "Marathi",
146
- "Malay",
147
- "Burmese",
148
- "Nepali",
149
- "Dutch",
150
- "Norwegian",
151
- "Oromo",
152
- "Oriya",
153
- "Punjabi",
154
- "Polish",
155
- "Pashto",
156
- "Portuguese",
157
- "Romanian",
158
- "Russian",
159
- "Sanskrit",
160
- "Sindhi",
161
- "Sinhala",
162
- "Slovak",
163
- "Slovenian",
164
- "Somali",
165
- "Albanian",
166
- "Serbian",
167
- "Sundanese",
168
- "Swedish",
169
- "Swahili",
170
- "Tamil",
171
- "Telugu",
172
- "Thai",
173
- "Tagalog",
174
- "Turkish",
175
- "Uyghur",
176
- "Ukrainian",
177
- "Urdu",
178
- "Uzbek",
179
- "Vietnamese",
180
- "Xhosa",
181
- "Yiddish",
182
- "Chinese"
183
- ],
184
  value="English"
185
  )
186
  ocr_run_button = gr.Button("Run OCR")
 
6
  import tempfile
7
  import logging
8
 
9
+ # Load language mappings from JSON file
10
+ with open("languages.json", "r", encoding='utf-8') as file:
11
+ language_map = json.load(file)
12
+
13
  # Configuração básica de logging
14
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
15
 
 
33
 
34
  def ocr_function_cli(img, lang_name):
35
  img_path, temp_dir = save_temp_image(img)
36
+
37
+ # Get language abbreviation from language_map
38
+ lang_code = language_map.get(lang_name, "en") # Default to English if not found
39
+
40
+ command = f"surya_ocr {img_path} --langs {lang_code} --images --results_dir {temp_dir}"
41
  if run_command(command) is None:
42
  return img, "OCR failed"
43
 
 
92
  with gr.Tab("OCR"):
93
  with gr.Column():
94
  ocr_input_image = gr.Image(label="Input Image for OCR", type="pil")
95
+
96
+ # Use language names for display in the dropdown
97
  ocr_language_selector = gr.Dropdown(
98
  label="Select Language for OCR",
99
+ choices=list(language_map.keys()), # Use language names
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  value="English"
101
  )
102
  ocr_run_button = gr.Button("Run OCR")