Spaces:

CallMeDaniel
/

TaiwanOCR_CertificateofDiagnosis

Running

TaiwanOCR_CertificateofDiagnosis / app.py

Danieldu

update .gitignore

c883606 9 months ago

3.14 kB

	import os, io
	from paddleocr import PaddleOCR, draw_ocr,PPStructure
	from ppocr.utils.visual import draw_ser_results
	from PIL import Image, ImageDraw
	import gradio as gr


	def inference__ppocr(img_path):

	ocr = PaddleOCR(
	rec_char_dict_path='zhtw_common_dict.txt',
	use_gpu=False,
	rec_image_shape="3, 48, 320"
	)

	result = ocr.ocr(img_path)

	for idx in range(len(result)):
	res = result[idx]
	for line in res:
	print(line)

	result = result[0]
	image = Image.open(img_path).convert('RGB')
	boxes = [line[0] for line in result]
	txts = [line[1][0] if line[1] else '' for line in result] # 確保在無文字時 txts 還是個空字串
	scores = [line[1][1] for line in result]
	im_show_pil = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf")

	return im_show_pil, "\n".join(txts)


	def inference__ppstructure(img_path):

	ppsutructure = PPStructure(
	rec_char_dict_path='zhtw_common_dict.txt',
	use_gpu=False,
	rec_image_shape="3, 48, 320",
	ser_dict_path='ppocr/utils/dict/kie/clinical_class_list.txt'
	)
	samples = ['病歷','身份','姓名',' Medical','No.','Name','性別','中華民國','002480','身分','Attending','M.D','ID','Medical','by','續上頁診斷書內容','出生地','列印時間','以上','年齡','特予']
	result,_ = ppsutructure.__call__(img_path)

	for element in result:
	for sample in samples:
	if sample in element['transcription']:
	element['pred_id'] = 0
	element['pred'] ='O'
	image = draw_ser_results(img_path,result,font_path='./simfang.ttf')
	result = [''.join(f"{element['pred']}:{element['transcription']}") for element in result if element['pred']!='O']
	return image, "\n".join(result)

	with gr.Blocks() as demo:
	gr.Markdown("Form Understanding Project - Certificate of Diagnosis")
	gr.Markdown("Support languages：traditinonal chinese")
	gr.Markdown("""
	## Usage Description
	This interface is designed to process and extract information from Certificates of Diagnosis.
	To use this tool:
	1. Upload an image of a Certificate of Diagnosis using the 'Upload Image' button.
	2. Optionally, enter the image URL if the certificate is available online.
	3. Click 'Process' to extract information from the uploaded certificate.
	4. The processed image and extracted text will be displayed on the right.
	""")
	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type='filepath', label='Upload Image')
	url_input = gr.Textbox(label='Or enter Image URL')
	submit_btn = gr.Button("Process")
	with gr.Column():
	gr.Markdown("#### Processed Image")
	image_output = gr.Image(type="pil", label="Processed Image")
	gr.Markdown("#### Extracted Text")
	text_output = gr.Textbox(label="Extracted Text")

	submit_btn.click(
	inference__ppstructure,
	inputs=[image_input],
	outputs=[image_output, text_output]
	)

	demo.launch(debug=True)