Spaces:
Running
Running
# from __future__ import division, print_function | |
# coding=utf-8 | |
# import sys | |
import os | |
# import glob | |
# import re | |
import numpy as np | |
# import datetime | |
# Keras | |
# from tensorflow.keras.models import load_model | |
#from tensorflow.keras.preprocessing import image | |
# Flask utils | |
# from flask import Flask, redirect, url_for, request, render_template | |
# from werkzeug.utils import secure_filename | |
# from gevent.pywsgi import WSGIServer | |
#import everytnimg | |
# from skimage.io import imread, imshow | |
# from skimage.filters import gaussian, threshold_otsu | |
# from skimage.feature import canny | |
# from skimage.transform import probabilistic_hough_line, rotate | |
# from process_image import process_image | |
# import glob | |
# import math | |
import cv2 | |
# import numpy as np | |
# from PIL import Image | |
# from matplotlib import pyplot as plt | |
# from matplotlib.patches import Rectangle | |
#%matplotlib inline | |
# from collections import OrderedDict | |
# from PIL import Image | |
# import pandas as pd | |
# import seaborn as sns | |
# import math | |
#import all from Hough transfrom cell | |
# from skimage.transform import hough_line, hough_line_peaks | |
# from skimage.transform import rotate | |
# from skimage.feature import canny | |
# from skimage.io import imread | |
# from skimage.color import rgb2gray | |
# import matplotlib.pyplot as plt | |
# from scipy.stats import mode as md | |
# from myhough import deskew, deskew2 | |
# from segment_words import sortit,words,createk,hpf,bps,wps,baw | |
# from myverify import verify | |
#from detect_frame import detect_frame | |
# import pathlib | |
from PIL import ImageFont, ImageDraw, Image | |
font = ImageFont.truetype("kalpurush.ttf", 60) #https://img.shields.io/badge/IEEE-10499463-0072bc.svg | |
citation_text = """<div> | |
<br><p>This is a demo space for the paper: <i>Improving Character Recognition in Bangla Handwritten Words: A Two-Stage Single Shot Detector Approach</i>.</p> | |
<a href='https://ieeexplore.ieee.org/document/10499463' style='text-decoration: none;'> | |
<img src='https://img.shields.io/static/v1?label=%E2%80%8E&logo=ieee&logoSize=auto&message=10499463&color=0072bc&labelColor=323634' alt='IEEE Badge' style='max-width: 100%; height: auto; margin-bottom: 10px;'> | |
</a> | |
<p>Direct Link to the Paper: | |
<a href='https://ieeexplore.ieee.org/document/10499463' style='text-decoration: none; color: blue;'>https://ieeexplore.ieee.org/document/10499463</a></p> | |
<p>ResearchGate Link to the Paper: | |
<a href='https://www.researchgate.net/publication/380009779_Improving_Character_Recognition_in_Bangla_Handwritten_Words_A_Two-Stage_Single_Shot_Detector_Approach' style='text-decoration: none; color: blue;'>https://bit.ly/ResearchGatePaperLink</a></p> | |
<p>Please cite this paper with BibTeX as follows:</p> | |
</div> | |
<div> | |
<pre><code style="overflow-x: auto;">@inproceedings{pal2024improving, | |
title={Improving Character Recognition in Bangla Handwritten Words: A Two-Stage Single Shot Detector Approach}, | |
author={Pal, Avi and Hasan, Md Sajid and Ahsan, Sk Md Masudul}, | |
booktitle={2024 International Conference on Advances in Computing, Communication, Electrical, and Smart Systems (iCACCESS)}, | |
pages={1--6}, | |
year={2024}, | |
organization={IEEE} | |
} | |
</code></pre> | |
</div>""" | |
#import more | |
import tensorflow as tf | |
from object_detection.utils import config_util | |
# from object_detection.protos import pipeline_pb2 | |
# from google.protobuf import text_format | |
# import os | |
from object_detection.utils import label_map_util | |
# from object_detection.utils import visualization_utils as viz_utils | |
from object_detection.builders import model_builder | |
# Load pipeline config and build a detection model | |
WORKSPACE_PATH = 'Tensorflow/workspace' | |
# SCRIPTS_PATH = 'Tensorflow/scripts' | |
#APIMODEL_PATH = 'Tensorflow/models' | |
ANNOTATION_PATH = WORKSPACE_PATH+'/annotations' | |
# IMAGE_PATH = WORKSPACE_PATH+'/images' | |
MODEL_PATH = WORKSPACE_PATH+'/models' | |
PRETRAINED_MODEL_PATH = WORKSPACE_PATH+'/pre-trained-models' | |
CONFIG_PATH = MODEL_PATH+'/my_ssd_mobnet/pipeline.config' | |
CHECKPOINT_PATH = MODEL_PATH+'/my_ssd_mobnet/' | |
# INPUT_IMAGE_PATH = 'Tensorflow/myimages' | |
# MODEL_PATH = 'E:/RealTimeObjectDetection/model.best.hdf5' | |
configs = config_util.get_configs_from_pipeline_file(CONFIG_PATH) | |
detection_model = model_builder.build(model_config=configs['model'], is_training=False) | |
# Restore checkpoint | |
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model) | |
ckpt.restore(os.path.join(CHECKPOINT_PATH, 'ckpt-51')).expect_partial() | |
def detect_fn(image): | |
image, shapes = detection_model.preprocess(image) | |
prediction_dict = detection_model.predict(image, shapes) | |
detections = detection_model.postprocess(prediction_dict, shapes) | |
return detections | |
def detect_frame(frame,isRealTime = False): | |
image_np = np.array(frame) | |
cpimg = frame.copy() | |
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32) | |
detections = detect_fn(input_tensor) | |
print(len(detections)) | |
num_detections = int(detections.pop('num_detections')) | |
#print("hello") | |
#print(num_detections) | |
#print(len(detections['detection_scores'])) | |
detections = {key: value[0, :num_detections].numpy() | |
for key, value in detections.items()} | |
detections['num_detections'] = num_detections | |
row,col,dummy = image_np.shape | |
# detection_classes should be ints. | |
detections['detection_classes'] = detections['detection_classes'].astype(np.int64) | |
#print(detections['detection_classes']) | |
mark = [0]*15 | |
myletters = [] | |
for i in range(0,15): | |
curi=detections['detection_classes'][i] | |
classi=classes[curi] | |
print(classes[curi],end='-') | |
cur=detections['detection_scores'][i] | |
if(cur<0.2): | |
continue | |
print(cur,end=' ') | |
print(detections['detection_boxes'][i], end=' ') | |
x0=(detections['detection_boxes'][i][0]) | |
y0=(detections['detection_boxes'][i][1]) | |
x1=(detections['detection_boxes'][i][2]) | |
y1=(detections['detection_boxes'][i][3]) | |
curarea=(x1-x0)*(y1-y0) | |
ok=1 | |
for j in range(0,i): | |
#print(mark[j]) | |
if mark[j]==0: | |
continue | |
curj=detections['detection_classes'][j] | |
classj=classes[curj] | |
if classi=='ি' or classj=='ি': | |
if classi!=classj: | |
continue | |
if classi=='ী' or classj=='ী': | |
if classi!=classj: | |
continue | |
x2=(detections['detection_boxes'][j][0]) | |
y2=(detections['detection_boxes'][j][1]) | |
x3=(detections['detection_boxes'][j][2]) | |
y3=(detections['detection_boxes'][j][3]) | |
x4=max(x0,x2) | |
y4=max(y0,y2) | |
x5=min(x1,x3) | |
y5=min(y1,y3) | |
if x4>x5 or y4>y5: | |
continue | |
prevarea=(x3-x2)*(y3-y2) | |
commonarea=(x5-x4)*(y5-y4) | |
ins1=curarea/commonarea | |
ins2=prevarea/commonarea | |
ins=commonarea/(curarea+prevarea-commonarea) | |
print(ins1,end=' ') | |
if(ins>=0.5): | |
ok=0 | |
cur=detections['detection_classes'][j] | |
print(classes[cur]) | |
break | |
if ok==1: | |
mark[i]=1 | |
cur=detections['detection_classes'][i] | |
#myletters.append(classes[cur]) | |
print(ok) | |
#verification | |
for i in range(0,15): | |
if mark[i]==0 or avver==0: | |
continue | |
if detections['detection_classes'][i]>38: | |
continue | |
x0=int(detections['detection_boxes'][i][0]*row) | |
y0=int(detections['detection_boxes'][i][1]*col) | |
x1=int(detections['detection_boxes'][i][2]*row) | |
y1=int(detections['detection_boxes'][i][3]*col) | |
#print(y0,y1,x0,x1) | |
currImg = cpimg[x0:x1,y0:y1] | |
curscore = detections['detection_scores'][i] | |
curclass = detections['detection_classes'][i] | |
label,conf = verify(currImg) | |
#print(ulta[label],conf) | |
#print(curclass,curscore) | |
if conf>curscore and ulta[label]!=curclass and ulta[label]!=-1: | |
detections['detection_classes'][i]=ulta[label] | |
detections['detection_scores'][i]=conf | |
for i in range(0,15): | |
if(detections['detection_scores'][i]<0.2): | |
continue | |
if mark[i]==0: | |
continue | |
cur=detections['detection_classes'][i] | |
cur=classes[cur] | |
y0=(detections['detection_boxes'][i][1]) | |
y1=(detections['detection_boxes'][i][3]) | |
pair = (y0,cur,y1) | |
myletters.append(pair) | |
myletters.sort(key = lambda x: x[0]) | |
#print(myletters) | |
for i in range(len(myletters)-1,-1,-1): | |
y0=myletters[i][0] | |
curr=myletters[i][1] | |
y1=myletters[i][2] | |
if curr=='ু' or curr=='্র': | |
mxarea=0 | |
mxno=i-1 | |
for j in range(0,len(myletters)): | |
if i==j: | |
continue | |
y2=myletters[j][0] | |
y3=myletters[j][2] | |
curcommon = min(y3,y1)-max(y0,y2) | |
if curcommon>mxarea: | |
mxarea = curcommon | |
mxno=j | |
if mxno!=(i-1): | |
myletters[i],myletters[i+1]=myletters[i+1],myletters[i] | |
res_list = [x[1] for x in myletters] | |
print(res_list) | |
for i in range(len(res_list)-2, -1, -1): | |
x=res_list[i] | |
y=res_list[i+1] | |
if x=='ে' or x=='ি': | |
res_list[i],res_list[i+1]=res_list[i+1],res_list[i] | |
for i in range(len(res_list)-2, -1, -1): | |
x=res_list[i] | |
y=res_list[i+1] | |
print(x,y) | |
if x=='অ' and y=='া': | |
print('yo') | |
res_list[i]='আ' | |
res_list.pop(i+1) | |
print(res_list) | |
for i in res_list: | |
print(i,end='') | |
print(' ') | |
return res_list | |
# Define a flask app | |
# app = Flask(__name__) | |
# Model saved with Keras model.save() | |
# Load your trained model | |
# model = load_model(MODEL_PATH) | |
#model._make_predict_function() # Necessary | |
# print('Model loaded. Start serving...') | |
# You can also use pretrained model from Keras | |
# Check https://keras.io/applications/ | |
#from keras.applications.resnet50 import ResNet50 | |
#model = ResNet50(weights='imagenet') | |
#model.save('') | |
# print('Model loaded. Check http://127.0.0.1:5000/') | |
avver=0 | |
clicked=1 | |
wp = None; bp = None; | |
category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt') | |
classes=['অ','ই','উ','এ','ও','ক','খ','গ','ঘ','চ','ছ','জ','ঝ','ট','ঠ','ড','ত','থ','দ','ধ','ন','প','ফ','ব','ভ','ম','য','র','ল','শ','ষ','স','হ','ড়','য়','ৎ','ং','ঁ','০','১','২','৩','৪','৫','৭','৮','া','ি','ী','ে','ু','্র','্য'] | |
labels=[1,2,4,7,9,11,12,13,14,16,17,18,19,21,22,23,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,45,46,47,49,50,51,52,53,54,55,57,58,60,61,62,63,64,66,67] | |
ulta=[0,-1,1,-1,2,-1,-1,3,-1,4,-1,5,6,7,8,-1,9,10,11,12,-1,13,14,15,-1,-1,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,-1,34,35,36,-1,37,38,39,40,41,42,43,-1,44,45,-1,46,47,48,49,50,-1,51,52] | |
def model_predict(word): | |
#img = cv2.imread(img_path,cv2.IMREAD_GRAYSCALE) | |
''' | |
if clicked==1: | |
bp = 66 | |
wp = 160 | |
mode = "GCMODE" | |
if mode == "GCMODE": | |
img= hpf(img,kSize = 51) | |
wp = 127 | |
img = wps(img,wp) | |
img = bps(img) | |
elif mode == "RMODE": | |
bps() | |
wps() | |
elif mode == "SMODE": | |
bps() | |
wps() | |
baw() | |
img = cv2.fastNlMeansDenoising(img, img, 50.0, 7, 21) | |
print("\ndone.") | |
xs=img.shape | |
if len(xs)==3: | |
img = img[:,:,0] | |
img = cv.adaptiveThreshold(img,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,cv.THRESH_BINARY,11,2) | |
angeel = deskew(img) | |
if angeel!=0: | |
img = deskew2(img,angeel) | |
ho,wo=img.shape | |
area=ho*wo | |
ara=words(img,25,11,7,area/5000) | |
ara.reverse() | |
#cv2.imshow('input image',img) | |
sz=len(ara) | |
for i in range(0,sz): | |
ara[i]=sorted(ara[i], key=lambda entry:entry[0][0]) | |
cnt2=0 | |
files = glob.glob('Tensorflow/myimages/*') | |
for f in files: | |
os.remove(f) | |
for i in range(0,sz): | |
#print(ara[i].shape) | |
tmp=ara[i] | |
sz2=len(tmp) | |
if i%10==0: | |
cnt2=cnt2+1 | |
for j in range(0,sz2): | |
a,b=tmp[j] | |
b = cv2.adaptiveThreshold(b,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2) | |
if j<10: | |
cnt3=0 | |
elif j<20: | |
cnt3=1 | |
else: | |
cnt3=2 | |
cv2.imwrite('Tensorflow/myimages/ocr %d%d%d%d.jpg' % (cnt2,i,cnt3,j), b) | |
#cv2.imshow('Crop %d%d' % (i,j), b) | |
cv2.waitKey(0) | |
PATH_TO_TEST_IMAGES_DIR = pathlib.Path('Tensorflow/myimages') | |
TEST_IMAGE_PATHS = (list(PATH_TO_TEST_IMAGES_DIR.glob("*.jpg"))+list(PATH_TO_TEST_IMAGES_DIR.glob("*.jpeg"))) #+list(PATH_TO_TEST_IMAGES_DIR.glob("*.png")) | |
print(len(TEST_IMAGE_PATHS)) | |
final = [] | |
for image_path in TEST_IMAGE_PATHS: | |
print("ovi") | |
print(image_path) | |
frame = cv2.imread(str(image_path)) | |
x=str(image_path) | |
print(x[25]) | |
# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
final.append((detect_frame(frame),x[25])) | |
''' | |
frame = cv2.fastNlMeansDenoising(word,word, 50.0, 7, 21) | |
xs = frame.shape | |
if(len(xs)==3): | |
frame = frame[:,:,0] | |
frame= cv2.adaptiveThreshold(frame,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2) | |
frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR) | |
# x=str(img_path) | |
#print(x[25]) | |
# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
image_np = np.array(frame) | |
cpimg = frame.copy() | |
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32) | |
image_t, shapes = detection_model.preprocess(input_tensor) | |
prediction_dict = detection_model.predict(image_t, shapes) | |
detections = detection_model.postprocess(prediction_dict, shapes) | |
# print(len(detections)) | |
num_detections = int(detections.pop('num_detections')) | |
#print("hello") | |
#print(num_detections) | |
#print(len(detections['detection_scores'])) | |
detections = {key: value[0, :num_detections].numpy() | |
for key, value in detections.items()} | |
detections['num_detections'] = num_detections | |
row,col,dummy = image_np.shape | |
# detection_classes should be ints. | |
detections['detection_classes'] = detections['detection_classes'].astype(np.int64) | |
#print(detections['detection_classes']) | |
mark = [0]*15 | |
myletters = [] | |
for i in range(0,15): | |
curi=detections['detection_classes'][i] | |
classi=classes[curi] | |
# print(classes[curi],end='-') | |
cur=detections['detection_scores'][i] | |
if(cur<0.2): | |
continue | |
# print(cur,end=' ') | |
# print(detections['detection_boxes'][i], end=' ') | |
x0=(detections['detection_boxes'][i][0]) | |
y0=(detections['detection_boxes'][i][1]) | |
x1=(detections['detection_boxes'][i][2]) | |
y1=(detections['detection_boxes'][i][3]) | |
curarea=(x1-x0)*(y1-y0) | |
ok=1 | |
for j in range(0,i): | |
#print(mark[j]) | |
if mark[j]==0: | |
continue | |
curj=detections['detection_classes'][j] | |
classj=classes[curj] | |
if classi=='ি' or classj=='ি': | |
if classi!=classj: | |
continue | |
if classi=='ী' or classj=='ী': | |
if classi!=classj: | |
continue | |
x2=(detections['detection_boxes'][j][0]) | |
y2=(detections['detection_boxes'][j][1]) | |
x3=(detections['detection_boxes'][j][2]) | |
y3=(detections['detection_boxes'][j][3]) | |
x4=max(x0,x2) | |
y4=max(y0,y2) | |
x5=min(x1,x3) | |
y5=min(y1,y3) | |
if x4>x5 or y4>y5: | |
continue | |
prevarea=(x3-x2)*(y3-y2) | |
commonarea=(x5-x4)*(y5-y4) | |
ins1=curarea/commonarea | |
ins2=prevarea/commonarea | |
ins=commonarea/(curarea+prevarea-commonarea) | |
# print(ins1,end=' ') | |
if(ins>=0.5): | |
ok=0 | |
cur=detections['detection_classes'][j] | |
# print(classes[cur]) | |
break | |
if ok==1: | |
mark[i]=1 | |
cur=detections['detection_classes'][i] | |
#myletters.append(classes[cur]) | |
# print(ok) | |
#verification | |
for i in range(0,15): | |
if mark[i]==0 or avver==0: | |
continue | |
if detections['detection_classes'][i]>38: | |
continue | |
x0=int(detections['detection_boxes'][i][0]*row) | |
y0=int(detections['detection_boxes'][i][1]*col) | |
x1=int(detections['detection_boxes'][i][2]*row) | |
y1=int(detections['detection_boxes'][i][3]*col) | |
#print(y0,y1,x0,x1) | |
currImg = cpimg[x0:x1,y0:y1] | |
curscore = detections['detection_scores'][i] | |
curclass = detections['detection_classes'][i] | |
label,conf = verify(currImg) | |
#print(ulta[label],conf) | |
#print(curclass,curscore) | |
if conf>curscore and ulta[label]!=curclass and ulta[label]!=-1: | |
detections['detection_classes'][i]=ulta[label] | |
detections['detection_scores'][i]=conf | |
for i in range(0,15): | |
if(detections['detection_scores'][i]<0.2): | |
continue | |
if mark[i]==0: | |
continue | |
cur=detections['detection_classes'][i] | |
cur=classes[cur] | |
y0=(detections['detection_boxes'][i][1]) | |
y1=(detections['detection_boxes'][i][3]) | |
pair = (y0,cur,y1) | |
myletters.append(pair) | |
myletters.sort(key = lambda x: x[0]) | |
#print(myletters) | |
for i in range(len(myletters)-1,-1,-1): | |
y0=myletters[i][0] | |
curr=myletters[i][1] | |
y1=myletters[i][2] | |
if curr=='ু' or curr=='্র': | |
mxarea=0 | |
mxno=i-1 | |
for j in range(0,len(myletters)): | |
if i==j: | |
continue | |
y2=myletters[j][0] | |
y3=myletters[j][2] | |
curcommon = min(y3,y1)-max(y0,y2) | |
if curcommon>mxarea: | |
mxarea = curcommon | |
mxno=j | |
if mxno!=(i-1): | |
myletters[i],myletters[i+1]=myletters[i+1],myletters[i] | |
res_list = [x[1] for x in myletters] | |
# print(res_list) | |
for i in range(len(res_list)-2, -1, -1): | |
x=res_list[i] | |
y=res_list[i+1] | |
if x=='ে' or x=='ি': | |
res_list[i],res_list[i+1]=res_list[i+1],res_list[i] | |
for i in range(len(res_list)-2, -1, -1): | |
x=res_list[i] | |
y=res_list[i+1] | |
# print(x,y) | |
if x=='অ' and y=='া': | |
# print('yo') | |
res_list[i]='আ' | |
res_list.pop(i+1) | |
# print(res_list) | |
output='' | |
for i in res_list: | |
output=output+i | |
# print(' ') | |
# time_now = datetime.datetime.now().strftime('%m_%d_%Y_%I_%M_%S_%p') | |
# # print(time_now) | |
# date = datetime.date.today().strftime('%Y_%m_%d') | |
# # print(date) | |
# folderName = "created/"+date | |
# if(not os.path.isdir(folderName)): | |
# os.makedirs(folderName) | |
# fileName = folderName+ "/" + time_now + ".png" | |
# cv2.imwrite(fileName,word) | |
pil_image = Image.fromarray(word) | |
#pil_image.convert("RGBA") | |
for i in range(0,15): | |
if mark[i]==0: | |
continue | |
x0=(detections['detection_boxes'][i][0])*row | |
y0=(detections['detection_boxes'][i][1])*col | |
x1=(detections['detection_boxes'][i][2])*row | |
y1=(detections['detection_boxes'][i][3])*col | |
pt1 = (y0,x0) | |
pt2 = (y1,x1) | |
# color = (0, 0, 255) # Red color in BGR format | |
# thickness = 2 # Border thickness in pixels | |
# word = cv2.rectangle(word, pt1, pt2, color, thickness) | |
draw = ImageDraw.Draw(pil_image,"RGBA") | |
curi=detections['detection_classes'][i] | |
classi=classes[curi] | |
shape = [(y0,x0), (y1, x1)] | |
draw.rectangle(shape,fill=(0, 100, 200, 127)) | |
draw.rectangle(shape, outline=(0, 0, 0, 127), width=3) | |
bbox = draw.textbbox(pt1, classi, font=font) | |
draw.rectangle(bbox, fill=(200, 100, 0, 200)) | |
draw.text(pt1, classi, font=font, fill=(0,0,0,255)) | |
newWordImg = np.asarray(pil_image) | |
return output, newWordImg | |
''' | |
output='' | |
for i in range(0,len(final)): | |
ara=final[i][0] | |
numb=final[i][1] | |
if i>0 and numb!=final[i-1][1]: | |
output= output+'\n' | |
word = ''.join(ara) | |
#corrected_word = get_campaign(word) | |
output= output + word | |
#print(corrected_word,end='') | |
output = output + ' ' | |
return output | |
''' | |
import gradio as gr | |
HF_TOKEN = os.getenv("SECRET_TOKEN") | |
hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "word-flag-data") | |
demo = gr.Interface(fn=model_predict, inputs= "paint", outputs=["text","image"], | |
# title= "Bangla Word OCR", | |
description="Reduce pen ink size from Pen Icon(🖋️) for better results!", | |
# examples=[ | |
# ["Tensorflow/workspace/images/tmpbvc06xxf.png"], | |
# ["Tensorflow/workspace/images/tmpfhin6fzg.png"], | |
# ["Tensorflow/workspace/images/tmprhqli3yl.png"], | |
# ], | |
article=citation_text,allow_flagging="auto",flagging_callback=hf_writer) | |
demo.launch() |