bangla-word-ocr / app.py
ovi054's picture
Update app.py
2314289 verified
# from __future__ import division, print_function
# coding=utf-8
# import sys
import os
# import glob
# import re
import numpy as np
# import datetime
# Keras
# from tensorflow.keras.models import load_model
#from tensorflow.keras.preprocessing import image
# Flask utils
# from flask import Flask, redirect, url_for, request, render_template
# from werkzeug.utils import secure_filename
# from gevent.pywsgi import WSGIServer
#import everytnimg
# from skimage.io import imread, imshow
# from skimage.filters import gaussian, threshold_otsu
# from skimage.feature import canny
# from skimage.transform import probabilistic_hough_line, rotate
# from process_image import process_image
# import glob
# import math
import cv2
# import numpy as np
# from PIL import Image
# from matplotlib import pyplot as plt
# from matplotlib.patches import Rectangle
#%matplotlib inline
# from collections import OrderedDict
# from PIL import Image
# import pandas as pd
# import seaborn as sns
# import math
#import all from Hough transfrom cell
# from skimage.transform import hough_line, hough_line_peaks
# from skimage.transform import rotate
# from skimage.feature import canny
# from skimage.io import imread
# from skimage.color import rgb2gray
# import matplotlib.pyplot as plt
# from scipy.stats import mode as md
# from myhough import deskew, deskew2
# from segment_words import sortit,words,createk,hpf,bps,wps,baw
# from myverify import verify
#from detect_frame import detect_frame
# import pathlib
from PIL import ImageFont, ImageDraw, Image
font = ImageFont.truetype("kalpurush.ttf", 60) #https://img.shields.io/badge/IEEE-10499463-0072bc.svg
citation_text = """<div>
<br><p>This is a demo space for the paper: <i>Improving Character Recognition in Bangla Handwritten Words: A Two-Stage Single Shot Detector Approach</i>.</p>
<a href='https://ieeexplore.ieee.org/document/10499463' style='text-decoration: none;'>
<img src='https://img.shields.io/static/v1?label=%E2%80%8E&logo=ieee&logoSize=auto&message=10499463&color=0072bc&labelColor=323634' alt='IEEE Badge' style='max-width: 100%; height: auto; margin-bottom: 10px;'>
</a>
<p>Direct Link to the Paper:
<a href='https://ieeexplore.ieee.org/document/10499463' style='text-decoration: none; color: blue;'>https://ieeexplore.ieee.org/document/10499463</a></p>
<p>ResearchGate Link to the Paper:
<a href='https://www.researchgate.net/publication/380009779_Improving_Character_Recognition_in_Bangla_Handwritten_Words_A_Two-Stage_Single_Shot_Detector_Approach' style='text-decoration: none; color: blue;'>https://bit.ly/ResearchGatePaperLink</a></p>
<p>Please cite this paper with BibTeX as follows:</p>
</div>
<div>
<pre><code style="overflow-x: auto;">@inproceedings{pal2024improving,
title={Improving Character Recognition in Bangla Handwritten Words: A Two-Stage Single Shot Detector Approach},
author={Pal, Avi and Hasan, Md Sajid and Ahsan, Sk Md Masudul},
booktitle={2024 International Conference on Advances in Computing, Communication, Electrical, and Smart Systems (iCACCESS)},
pages={1--6},
year={2024},
organization={IEEE}
}
</code></pre>
</div>"""
#import more
import tensorflow as tf
from object_detection.utils import config_util
# from object_detection.protos import pipeline_pb2
# from google.protobuf import text_format
# import os
from object_detection.utils import label_map_util
# from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder
# Load pipeline config and build a detection model
WORKSPACE_PATH = 'Tensorflow/workspace'
# SCRIPTS_PATH = 'Tensorflow/scripts'
#APIMODEL_PATH = 'Tensorflow/models'
ANNOTATION_PATH = WORKSPACE_PATH+'/annotations'
# IMAGE_PATH = WORKSPACE_PATH+'/images'
MODEL_PATH = WORKSPACE_PATH+'/models'
PRETRAINED_MODEL_PATH = WORKSPACE_PATH+'/pre-trained-models'
CONFIG_PATH = MODEL_PATH+'/my_ssd_mobnet/pipeline.config'
CHECKPOINT_PATH = MODEL_PATH+'/my_ssd_mobnet/'
# INPUT_IMAGE_PATH = 'Tensorflow/myimages'
# MODEL_PATH = 'E:/RealTimeObjectDetection/model.best.hdf5'
configs = config_util.get_configs_from_pipeline_file(CONFIG_PATH)
detection_model = model_builder.build(model_config=configs['model'], is_training=False)
# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(CHECKPOINT_PATH, 'ckpt-51')).expect_partial()
@tf.function
def detect_fn(image):
image, shapes = detection_model.preprocess(image)
prediction_dict = detection_model.predict(image, shapes)
detections = detection_model.postprocess(prediction_dict, shapes)
return detections
def detect_frame(frame,isRealTime = False):
image_np = np.array(frame)
cpimg = frame.copy()
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
detections = detect_fn(input_tensor)
print(len(detections))
num_detections = int(detections.pop('num_detections'))
#print("hello")
#print(num_detections)
#print(len(detections['detection_scores']))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
row,col,dummy = image_np.shape
# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
#print(detections['detection_classes'])
mark = [0]*15
myletters = []
for i in range(0,15):
curi=detections['detection_classes'][i]
classi=classes[curi]
print(classes[curi],end='-')
cur=detections['detection_scores'][i]
if(cur<0.2):
continue
print(cur,end=' ')
print(detections['detection_boxes'][i], end=' ')
x0=(detections['detection_boxes'][i][0])
y0=(detections['detection_boxes'][i][1])
x1=(detections['detection_boxes'][i][2])
y1=(detections['detection_boxes'][i][3])
curarea=(x1-x0)*(y1-y0)
ok=1
for j in range(0,i):
#print(mark[j])
if mark[j]==0:
continue
curj=detections['detection_classes'][j]
classj=classes[curj]
if classi=='ি' or classj=='ি':
if classi!=classj:
continue
if classi=='ী' or classj=='ী':
if classi!=classj:
continue
x2=(detections['detection_boxes'][j][0])
y2=(detections['detection_boxes'][j][1])
x3=(detections['detection_boxes'][j][2])
y3=(detections['detection_boxes'][j][3])
x4=max(x0,x2)
y4=max(y0,y2)
x5=min(x1,x3)
y5=min(y1,y3)
if x4>x5 or y4>y5:
continue
prevarea=(x3-x2)*(y3-y2)
commonarea=(x5-x4)*(y5-y4)
ins1=curarea/commonarea
ins2=prevarea/commonarea
ins=commonarea/(curarea+prevarea-commonarea)
print(ins1,end=' ')
if(ins>=0.5):
ok=0
cur=detections['detection_classes'][j]
print(classes[cur])
break
if ok==1:
mark[i]=1
cur=detections['detection_classes'][i]
#myletters.append(classes[cur])
print(ok)
#verification
for i in range(0,15):
if mark[i]==0 or avver==0:
continue
if detections['detection_classes'][i]>38:
continue
x0=int(detections['detection_boxes'][i][0]*row)
y0=int(detections['detection_boxes'][i][1]*col)
x1=int(detections['detection_boxes'][i][2]*row)
y1=int(detections['detection_boxes'][i][3]*col)
#print(y0,y1,x0,x1)
currImg = cpimg[x0:x1,y0:y1]
curscore = detections['detection_scores'][i]
curclass = detections['detection_classes'][i]
label,conf = verify(currImg)
#print(ulta[label],conf)
#print(curclass,curscore)
if conf>curscore and ulta[label]!=curclass and ulta[label]!=-1:
detections['detection_classes'][i]=ulta[label]
detections['detection_scores'][i]=conf
for i in range(0,15):
if(detections['detection_scores'][i]<0.2):
continue
if mark[i]==0:
continue
cur=detections['detection_classes'][i]
cur=classes[cur]
y0=(detections['detection_boxes'][i][1])
y1=(detections['detection_boxes'][i][3])
pair = (y0,cur,y1)
myletters.append(pair)
myletters.sort(key = lambda x: x[0])
#print(myletters)
for i in range(len(myletters)-1,-1,-1):
y0=myletters[i][0]
curr=myletters[i][1]
y1=myletters[i][2]
if curr=='ু' or curr=='্র':
mxarea=0
mxno=i-1
for j in range(0,len(myletters)):
if i==j:
continue
y2=myletters[j][0]
y3=myletters[j][2]
curcommon = min(y3,y1)-max(y0,y2)
if curcommon>mxarea:
mxarea = curcommon
mxno=j
if mxno!=(i-1):
myletters[i],myletters[i+1]=myletters[i+1],myletters[i]
res_list = [x[1] for x in myletters]
print(res_list)
for i in range(len(res_list)-2, -1, -1):
x=res_list[i]
y=res_list[i+1]
if x=='ে' or x=='ি':
res_list[i],res_list[i+1]=res_list[i+1],res_list[i]
for i in range(len(res_list)-2, -1, -1):
x=res_list[i]
y=res_list[i+1]
print(x,y)
if x=='অ' and y=='া':
print('yo')
res_list[i]='আ'
res_list.pop(i+1)
print(res_list)
for i in res_list:
print(i,end='')
print(' ')
return res_list
# Define a flask app
# app = Flask(__name__)
# Model saved with Keras model.save()
# Load your trained model
# model = load_model(MODEL_PATH)
#model._make_predict_function() # Necessary
# print('Model loaded. Start serving...')
# You can also use pretrained model from Keras
# Check https://keras.io/applications/
#from keras.applications.resnet50 import ResNet50
#model = ResNet50(weights='imagenet')
#model.save('')
# print('Model loaded. Check http://127.0.0.1:5000/')
avver=0
clicked=1
wp = None; bp = None;
category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt')
classes=['অ','ই','উ','এ','ও','ক','খ','গ','ঘ','চ','ছ','জ','ঝ','ট','ঠ','ড','ত','থ','দ','ধ','ন','প','ফ','ব','ভ','ম','য','র','ল','শ','ষ','স','হ','ড়','য়','ৎ','ং','ঁ','০','১','২','৩','৪','৫','৭','৮','া','ি','ী','ে','ু','্র','্য']
labels=[1,2,4,7,9,11,12,13,14,16,17,18,19,21,22,23,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,45,46,47,49,50,51,52,53,54,55,57,58,60,61,62,63,64,66,67]
ulta=[0,-1,1,-1,2,-1,-1,3,-1,4,-1,5,6,7,8,-1,9,10,11,12,-1,13,14,15,-1,-1,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,-1,34,35,36,-1,37,38,39,40,41,42,43,-1,44,45,-1,46,47,48,49,50,-1,51,52]
def model_predict(word):
#img = cv2.imread(img_path,cv2.IMREAD_GRAYSCALE)
'''
if clicked==1:
bp = 66
wp = 160
mode = "GCMODE"
if mode == "GCMODE":
img= hpf(img,kSize = 51)
wp = 127
img = wps(img,wp)
img = bps(img)
elif mode == "RMODE":
bps()
wps()
elif mode == "SMODE":
bps()
wps()
baw()
img = cv2.fastNlMeansDenoising(img, img, 50.0, 7, 21)
print("\ndone.")
xs=img.shape
if len(xs)==3:
img = img[:,:,0]
img = cv.adaptiveThreshold(img,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,cv.THRESH_BINARY,11,2)
angeel = deskew(img)
if angeel!=0:
img = deskew2(img,angeel)
ho,wo=img.shape
area=ho*wo
ara=words(img,25,11,7,area/5000)
ara.reverse()
#cv2.imshow('input image',img)
sz=len(ara)
for i in range(0,sz):
ara[i]=sorted(ara[i], key=lambda entry:entry[0][0])
cnt2=0
files = glob.glob('Tensorflow/myimages/*')
for f in files:
os.remove(f)
for i in range(0,sz):
#print(ara[i].shape)
tmp=ara[i]
sz2=len(tmp)
if i%10==0:
cnt2=cnt2+1
for j in range(0,sz2):
a,b=tmp[j]
b = cv2.adaptiveThreshold(b,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
if j<10:
cnt3=0
elif j<20:
cnt3=1
else:
cnt3=2
cv2.imwrite('Tensorflow/myimages/ocr %d%d%d%d.jpg' % (cnt2,i,cnt3,j), b)
#cv2.imshow('Crop %d%d' % (i,j), b)
cv2.waitKey(0)
PATH_TO_TEST_IMAGES_DIR = pathlib.Path('Tensorflow/myimages')
TEST_IMAGE_PATHS = (list(PATH_TO_TEST_IMAGES_DIR.glob("*.jpg"))+list(PATH_TO_TEST_IMAGES_DIR.glob("*.jpeg"))) #+list(PATH_TO_TEST_IMAGES_DIR.glob("*.png"))
print(len(TEST_IMAGE_PATHS))
final = []
for image_path in TEST_IMAGE_PATHS:
print("ovi")
print(image_path)
frame = cv2.imread(str(image_path))
x=str(image_path)
print(x[25])
# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
final.append((detect_frame(frame),x[25]))
'''
frame = cv2.fastNlMeansDenoising(word,word, 50.0, 7, 21)
xs = frame.shape
if(len(xs)==3):
frame = frame[:,:,0]
frame= cv2.adaptiveThreshold(frame,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)
# x=str(img_path)
#print(x[25])
# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
image_np = np.array(frame)
cpimg = frame.copy()
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
image_t, shapes = detection_model.preprocess(input_tensor)
prediction_dict = detection_model.predict(image_t, shapes)
detections = detection_model.postprocess(prediction_dict, shapes)
# print(len(detections))
num_detections = int(detections.pop('num_detections'))
#print("hello")
#print(num_detections)
#print(len(detections['detection_scores']))
detections = {key: value[0, :num_detections].numpy()
for key, value in detections.items()}
detections['num_detections'] = num_detections
row,col,dummy = image_np.shape
# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
#print(detections['detection_classes'])
mark = [0]*15
myletters = []
for i in range(0,15):
curi=detections['detection_classes'][i]
classi=classes[curi]
# print(classes[curi],end='-')
cur=detections['detection_scores'][i]
if(cur<0.2):
continue
# print(cur,end=' ')
# print(detections['detection_boxes'][i], end=' ')
x0=(detections['detection_boxes'][i][0])
y0=(detections['detection_boxes'][i][1])
x1=(detections['detection_boxes'][i][2])
y1=(detections['detection_boxes'][i][3])
curarea=(x1-x0)*(y1-y0)
ok=1
for j in range(0,i):
#print(mark[j])
if mark[j]==0:
continue
curj=detections['detection_classes'][j]
classj=classes[curj]
if classi=='ি' or classj=='ি':
if classi!=classj:
continue
if classi=='ী' or classj=='ী':
if classi!=classj:
continue
x2=(detections['detection_boxes'][j][0])
y2=(detections['detection_boxes'][j][1])
x3=(detections['detection_boxes'][j][2])
y3=(detections['detection_boxes'][j][3])
x4=max(x0,x2)
y4=max(y0,y2)
x5=min(x1,x3)
y5=min(y1,y3)
if x4>x5 or y4>y5:
continue
prevarea=(x3-x2)*(y3-y2)
commonarea=(x5-x4)*(y5-y4)
ins1=curarea/commonarea
ins2=prevarea/commonarea
ins=commonarea/(curarea+prevarea-commonarea)
# print(ins1,end=' ')
if(ins>=0.5):
ok=0
cur=detections['detection_classes'][j]
# print(classes[cur])
break
if ok==1:
mark[i]=1
cur=detections['detection_classes'][i]
#myletters.append(classes[cur])
# print(ok)
#verification
for i in range(0,15):
if mark[i]==0 or avver==0:
continue
if detections['detection_classes'][i]>38:
continue
x0=int(detections['detection_boxes'][i][0]*row)
y0=int(detections['detection_boxes'][i][1]*col)
x1=int(detections['detection_boxes'][i][2]*row)
y1=int(detections['detection_boxes'][i][3]*col)
#print(y0,y1,x0,x1)
currImg = cpimg[x0:x1,y0:y1]
curscore = detections['detection_scores'][i]
curclass = detections['detection_classes'][i]
label,conf = verify(currImg)
#print(ulta[label],conf)
#print(curclass,curscore)
if conf>curscore and ulta[label]!=curclass and ulta[label]!=-1:
detections['detection_classes'][i]=ulta[label]
detections['detection_scores'][i]=conf
for i in range(0,15):
if(detections['detection_scores'][i]<0.2):
continue
if mark[i]==0:
continue
cur=detections['detection_classes'][i]
cur=classes[cur]
y0=(detections['detection_boxes'][i][1])
y1=(detections['detection_boxes'][i][3])
pair = (y0,cur,y1)
myletters.append(pair)
myletters.sort(key = lambda x: x[0])
#print(myletters)
for i in range(len(myletters)-1,-1,-1):
y0=myletters[i][0]
curr=myletters[i][1]
y1=myletters[i][2]
if curr=='ু' or curr=='্র':
mxarea=0
mxno=i-1
for j in range(0,len(myletters)):
if i==j:
continue
y2=myletters[j][0]
y3=myletters[j][2]
curcommon = min(y3,y1)-max(y0,y2)
if curcommon>mxarea:
mxarea = curcommon
mxno=j
if mxno!=(i-1):
myletters[i],myletters[i+1]=myletters[i+1],myletters[i]
res_list = [x[1] for x in myletters]
# print(res_list)
for i in range(len(res_list)-2, -1, -1):
x=res_list[i]
y=res_list[i+1]
if x=='ে' or x=='ি':
res_list[i],res_list[i+1]=res_list[i+1],res_list[i]
for i in range(len(res_list)-2, -1, -1):
x=res_list[i]
y=res_list[i+1]
# print(x,y)
if x=='অ' and y=='া':
# print('yo')
res_list[i]='আ'
res_list.pop(i+1)
# print(res_list)
output=''
for i in res_list:
output=output+i
# print(' ')
# time_now = datetime.datetime.now().strftime('%m_%d_%Y_%I_%M_%S_%p')
# # print(time_now)
# date = datetime.date.today().strftime('%Y_%m_%d')
# # print(date)
# folderName = "created/"+date
# if(not os.path.isdir(folderName)):
# os.makedirs(folderName)
# fileName = folderName+ "/" + time_now + ".png"
# cv2.imwrite(fileName,word)
pil_image = Image.fromarray(word)
#pil_image.convert("RGBA")
for i in range(0,15):
if mark[i]==0:
continue
x0=(detections['detection_boxes'][i][0])*row
y0=(detections['detection_boxes'][i][1])*col
x1=(detections['detection_boxes'][i][2])*row
y1=(detections['detection_boxes'][i][3])*col
pt1 = (y0,x0)
pt2 = (y1,x1)
# color = (0, 0, 255) # Red color in BGR format
# thickness = 2 # Border thickness in pixels
# word = cv2.rectangle(word, pt1, pt2, color, thickness)
draw = ImageDraw.Draw(pil_image,"RGBA")
curi=detections['detection_classes'][i]
classi=classes[curi]
shape = [(y0,x0), (y1, x1)]
draw.rectangle(shape,fill=(0, 100, 200, 127))
draw.rectangle(shape, outline=(0, 0, 0, 127), width=3)
bbox = draw.textbbox(pt1, classi, font=font)
draw.rectangle(bbox, fill=(200, 100, 0, 200))
draw.text(pt1, classi, font=font, fill=(0,0,0,255))
newWordImg = np.asarray(pil_image)
return output, newWordImg
'''
output=''
for i in range(0,len(final)):
ara=final[i][0]
numb=final[i][1]
if i>0 and numb!=final[i-1][1]:
output= output+'\n'
word = ''.join(ara)
#corrected_word = get_campaign(word)
output= output + word
#print(corrected_word,end='')
output = output + ' '
return output
'''
import gradio as gr
HF_TOKEN = os.getenv("SECRET_TOKEN")
hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "word-flag-data")
demo = gr.Interface(fn=model_predict, inputs= "paint", outputs=["text","image"],
# title= "Bangla Word OCR",
description="Reduce pen ink size from Pen Icon(🖋️) for better results!",
# examples=[
# ["Tensorflow/workspace/images/tmpbvc06xxf.png"],
# ["Tensorflow/workspace/images/tmpfhin6fzg.png"],
# ["Tensorflow/workspace/images/tmprhqli3yl.png"],
# ],
article=citation_text,allow_flagging="auto",flagging_callback=hf_writer)
demo.launch()