Spaces:

artificialguybr
/

Surya-OCR

Running on T4

App Files Files Community

Surya-OCR / app.py

artificialguybr

Update app.py

2c60ec4 verified about 1 year ago

raw

history blame

7.14 kB

	import gradio as gr
	import json
	import subprocess
	from PIL import Image
	import os
	import tempfile
	import logging

	# Configuração básica de logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	def save_temp_image(img):
	temp_dir = tempfile.mkdtemp()
	img_path = os.path.join(temp_dir, "input_image.png")
	img.save(img_path)
	logging.info(f"Imagem salva em {img_path}")
	return img_path, temp_dir

	def run_command(command):
	logging.info(f"Executing command: {command}") # Adiciona o log do comando
	try:
	result = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT, encoding='utf-8')
	logging.info("Command Output: " + result)
	return result
	except subprocess.CalledProcessError as e:
	logging.error(f"Command failed with error: {e.output}")
	return None


	def ocr_function_cli(img, lang_name):
	img_path, temp_dir = save_temp_image(img)
	command = f"surya_ocr {img_path} --langs {lang_name} --images --results_dir {temp_dir}"
	if run_command(command) is None:
	return img, "OCR failed"

	result_img_path = os.path.join(temp_dir, "image_with_text.png")
	result_text_path = os.path.join(temp_dir, "results.json")

	if os.path.exists(result_img_path):
	result_img = Image.open(result_img_path)
	else:
	result_img = img

	if os.path.exists(result_text_path):
	with open(result_text_path, "r", encoding='utf-8') as file:
	result_text = json.load(file)
	text_output = "\n".join([str(page) for page in result_text.values()])
	else:
	text_output = "No text detected"

	# Limpeza movida para depois da leitura dos resultados
	os.remove(img_path)
	logging.info(f"Limpeza concluída para {img_path}")
	return result_img, text_output

	def text_line_detection_function_cli(img):
	img_path, temp_dir = save_temp_image(img)
	command = f"surya_detect {img_path} --images --results_dir {temp_dir}"
	if run_command(command) is None:
	return img, {"error": "Detection failed"}

	result_img_path = os.path.join(temp_dir, "image_with_lines.png")
	result_json_path = os.path.join(temp_dir, "results.json")

	if os.path.exists(result_img_path):
	result_img = Image.open(result_img_path)
	else:
	result_img = img

	if os.path.exists(result_json_path):
	with open(result_json_path, "r", encoding='utf-8') as file:
	result_json = json.load(file)
	else:
	result_json = {"error": "No detection results found"}

	# Limpeza movida para depois da leitura dos resultados
	os.remove(img_path)
	logging.info(f"Limpeza concluída para {img_path}")
	return result_img, result_json

	with gr.Blocks() as app:
	gr.Markdown("# Surya OCR and Text Line Detection via CLI")

	with gr.Tab("OCR"):
	with gr.Column():
	ocr_input_image = gr.Image(label="Input Image for OCR", type="pil")
	ocr_language_selector = gr.Dropdown(
	label="Select Language for OCR",
	choices=[
	"Afrikaans",
	"Amharic",
	"Arabic",
	"Assamese",
	"Azerbaijani",
	"Belarusian",
	"Bulgarian",
	"Bengali",
	"Breton",
	"Bosnian",
	"Catalan",
	"Czech",
	"Welsh",
	"Danish",
	"German",
	"Greek",
	"English",
	"Esperanto",
	"Spanish",
	"Estonian",
	"Basque",
	"Persian",
	"Finnish",
	"French",
	"Western Frisian",
	"Irish",
	"Scottish Gaelic",
	"Galician",
	"Gujarati",
	"Hausa",
	"Hebrew",
	"Hindi",
	"Croatian",
	"Hungarian",
	"Armenian",
	"Indonesian",
	"Icelandic",
	"Italian",
	"Japanese",
	"Javanese",
	"Georgian",
	"Kazakh",
	"Khmer",
	"Kannada",
	"Korean",
	"Kurdish",
	"Kyrgyz",
	"Latin",
	"Lao",
	"Lithuanian",
	"Latvian",
	"Malagasy",
	"Macedonian",
	"Malayalam",
	"Mongolian",
	"Marathi",
	"Malay",
	"Burmese",
	"Nepali",
	"Dutch",
	"Norwegian",
	"Oromo",
	"Oriya",
	"Punjabi",
	"Polish",
	"Pashto",
	"Portuguese",
	"Romanian",
	"Russian",
	"Sanskrit",
	"Sindhi",
	"Sinhala",
	"Slovak",
	"Slovenian",
	"Somali",
	"Albanian",
	"Serbian",
	"Sundanese",
	"Swedish",
	"Swahili",
	"Tamil",
	"Telugu",
	"Thai",
	"Tagalog",
	"Turkish",
	"Uyghur",
	"Ukrainian",
	"Urdu",
	"Uzbek",
	"Vietnamese",
	"Xhosa",
	"Yiddish",
	"Chinese"
	],
	value="English"
	)
	ocr_run_button = gr.Button("Run OCR")

	with gr.Column():
	ocr_output_image = gr.Image(label="OCR Output Image", type="pil", interactive=False)
	ocr_text_output = gr.TextArea(label="Recognized Text")

	ocr_run_button.click(
	fn=ocr_function_cli, inputs=[ocr_input_image, ocr_language_selector], outputs=[ocr_output_image, ocr_text_output]
	)

	with gr.Tab("Text Line Detection"):
	with gr.Column():
	detection_input_image = gr.Image(label="Input Image for Detection", type="pil")
	detection_run_button = gr.Button("Run Text Line Detection")

	with gr.Column():
	detection_output_image = gr.Image(label="Detection Output Image", type="pil", interactive=False)
	detection_json_output = gr.JSON(label="Detection JSON Output")

	detection_run_button.click(
	fn=text_line_detection_function_cli, inputs=detection_input_image, outputs=[detection_output_image, detection_json_output]
	)

	if __name__ == "__main__":
	app.launch()