Spaces:
Sleeping
Sleeping
sabari
commited on
Commit
ยท
442387d
1
Parent(s):
4c0bce2
initial commit
Browse files- .gitignore +7 -0
- app.py +124 -0
- config.json +1 -0
- get_coordinate.py +66 -0
- models/model-ocr-0.1829.h5 +3 -0
- requirements.txt +7 -0
.gitignore
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
flagged/
|
2 |
+
*.pt
|
3 |
+
*.png
|
4 |
+
*.jpg
|
5 |
+
*.mp4
|
6 |
+
*.mkv
|
7 |
+
gradio_cached_examples/
|
app.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import cv2
|
3 |
+
import math
|
4 |
+
import argparse
|
5 |
+
from tensorflow.keras.models import load_model
|
6 |
+
from flask import Flask, request, jsonify
|
7 |
+
import cv2
|
8 |
+
import json
|
9 |
+
import numpy as np
|
10 |
+
from tensorflow.keras import backend as K
|
11 |
+
from get_coordinate import get_object_coordinates
|
12 |
+
import requests
|
13 |
+
import gradio as gr
|
14 |
+
import os
|
15 |
+
|
16 |
+
file_urls = [
|
17 |
+
'https://www.dropbox.com/scl/fi/skt4o9a37ccrxvruojk3o/2.png?rlkey=kxppvdnvbs9852rj6ly123xfk&dl=0',
|
18 |
+
'https://www.dropbox.com/scl/fi/3opkr5aoca1fq0wrudlcx/3.png?rlkey=wm4vog7yyk5naoqu68vr6v48s&dl=0',
|
19 |
+
'https://www.dropbox.com/scl/fi/t74nd09fod52x0gua93ty/1.png?rlkey=er4ktuephlapzyvh5glkym5b4&dl=0']
|
20 |
+
|
21 |
+
def download_file(url, save_name):
|
22 |
+
url = url
|
23 |
+
if not os.path.exists(save_name):
|
24 |
+
file = requests.get(url)
|
25 |
+
open(save_name, 'wb').write(file.content)
|
26 |
+
|
27 |
+
for i, url in enumerate(file_urls):
|
28 |
+
if 'mp4' in file_urls[i]:
|
29 |
+
download_file(
|
30 |
+
file_urls[i],
|
31 |
+
f"video.mp4"
|
32 |
+
)
|
33 |
+
else:
|
34 |
+
download_file(
|
35 |
+
file_urls[i],
|
36 |
+
f"image_{i}.jpg"
|
37 |
+
)
|
38 |
+
|
39 |
+
|
40 |
+
class OCR():
|
41 |
+
|
42 |
+
def __init__(self,path="model-ocr-0.1829.h5",config_path="config.json"):
|
43 |
+
|
44 |
+
# Read the config JSON file
|
45 |
+
with open(config_path, 'r',encoding="utf-8") as file:
|
46 |
+
self.config_data = json.load(file)
|
47 |
+
|
48 |
+
# Get the threshold value
|
49 |
+
self.threshold = self.config_data['hiragana']['threshold']
|
50 |
+
|
51 |
+
# Get the label dictionary
|
52 |
+
self.label_dict = self.config_data['hiragana']['label']
|
53 |
+
|
54 |
+
# load the model from local
|
55 |
+
self.model = load_model(path,custom_objects={"K": K})
|
56 |
+
|
57 |
+
def run(self,image):
|
58 |
+
# extract the character coordinates using the cv2 contours
|
59 |
+
coordinate,thresholdedImage = get_object_coordinates(image)
|
60 |
+
|
61 |
+
image_batch = np.zeros((1,64,64,1))
|
62 |
+
output =[]
|
63 |
+
|
64 |
+
for row in range(len(coordinate)):
|
65 |
+
temp = {}
|
66 |
+
# crop the image
|
67 |
+
cropImg = thresholdedImage[coordinate[row][1]:coordinate[row][3],coordinate[row][0]:coordinate[row][2]]
|
68 |
+
# resize the image
|
69 |
+
image_batch[0,:,:,0] = cv2.resize(cropImg,(64,64))*255
|
70 |
+
|
71 |
+
# predict the results
|
72 |
+
predict = self.model.predict(image_batch)
|
73 |
+
position = np.argmax(predict)
|
74 |
+
|
75 |
+
label_name = self.label_dict[str(position)]
|
76 |
+
temp["text"] = label_name
|
77 |
+
temp["prob"] = predict[position]
|
78 |
+
temp["coord"] = coordinate[row] # Xmin,Ymin,Xmax,Ymax
|
79 |
+
|
80 |
+
output.append(temp)
|
81 |
+
|
82 |
+
return output
|
83 |
+
|
84 |
+
def getOCRResults(image_path):
|
85 |
+
|
86 |
+
temp0 =[]
|
87 |
+
for i in range(len(image_path)):
|
88 |
+
|
89 |
+
image = cv2.imread(image_path[i])
|
90 |
+
|
91 |
+
results0 = ocrAPP.run(image)
|
92 |
+
temp0.append(results0)
|
93 |
+
result_json={}
|
94 |
+
result_json["result"] = temp0
|
95 |
+
response = jsonify(result_json)
|
96 |
+
response.headers['Content-Type'] = 'application/json; charset=utf-8'
|
97 |
+
return response
|
98 |
+
|
99 |
+
|
100 |
+
ocrAPP = OCR()
|
101 |
+
|
102 |
+
video_path = [['video.mp4']]
|
103 |
+
path = [['image_0.jpg'], ['image_1.jpg']]
|
104 |
+
|
105 |
+
|
106 |
+
inputs_image = [
|
107 |
+
gr.components.Image(type="filepath", label="Input Image"),
|
108 |
+
]
|
109 |
+
outputs_image = [
|
110 |
+
gr.components.Image(type="json", label="Output Json"),
|
111 |
+
]
|
112 |
+
interface_image = gr.Interface(
|
113 |
+
fn=getOCRResults,
|
114 |
+
inputs=inputs_image,
|
115 |
+
outputs=outputs_image,
|
116 |
+
title="Hiragana Character Recognition",
|
117 |
+
examples=path,
|
118 |
+
cache_examples=False,
|
119 |
+
)
|
120 |
+
|
121 |
+
gr.TabbedInterface(
|
122 |
+
[interface_image],
|
123 |
+
tab_names=['Image inference']
|
124 |
+
).queue().launch()
|
config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"hiragana": {"threshold": 0.5, "label": {"0": "ใ", "1": "ใ", "2": "ใ", "3": "ใ", "4": "ใ", "5": "ใ", "6": "ใ", "7": "ใ", "8": "ใ", "9": "ใ", "10": "ใ", "11": "ใ", "12": "ใ", "13": "ใ", "14": "ใ", "15": "ใ", "16": "ใก", "17": "ใค", "18": "ใฆ", "19": "ใจ", "20": "ใช", "21": "ใซ", "22": "ใฌ", "23": "ใญ", "24": "ใฎ", "25": "ใฏ", "26": "ใฒ", "27": "ใต", "28": "ใธ", "29": "ใป", "30": "ใพ", "31": "ใฟ", "32": "ใ", "33": "ใ", "34": "ใ", "35": "ใ", "36": "ใ", "37": "ใ", "38": "ใ", "39": "ใ", "40": "ใ", "41": "ใ", "42": "ใ", "43": "ใ", "44": "ใ", "45": "ใ", "46": "ใ", "47": "ใ", "48": "ใ"}}}
|
get_coordinate.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Created on Sat Jul 22 14:22:34 2023
|
4 |
+
|
5 |
+
@author: SABARI
|
6 |
+
"""
|
7 |
+
|
8 |
+
import cv2
|
9 |
+
import numpy as np
|
10 |
+
|
11 |
+
from skimage.filters import threshold_sauvola
|
12 |
+
|
13 |
+
import tensorflow as tf
|
14 |
+
from tensorflow.keras import backend as K
|
15 |
+
import json
|
16 |
+
|
17 |
+
def sauvola_thresholding(grayImage_,window_size=15):
|
18 |
+
|
19 |
+
""""
|
20 |
+
Sauvola thresholds are local thresholding techniques that are
|
21 |
+
useful for images where the background is not uniform, especially for text recognition
|
22 |
+
|
23 |
+
grayImage--- Input image should be in 2-Dimension Gray Scale format
|
24 |
+
window_size --- It represents the filter window size
|
25 |
+
|
26 |
+
"""
|
27 |
+
thresh_sauvolavalue = threshold_sauvola(grayImage_, window_size=window_size)
|
28 |
+
|
29 |
+
thresholdImage_=(grayImage_>thresh_sauvolavalue)
|
30 |
+
|
31 |
+
return 1- np.uint8(np.array(thresholdImage_)*1)
|
32 |
+
|
33 |
+
# Function to get coordinates of the object
|
34 |
+
def get_object_coordinates(image):
|
35 |
+
|
36 |
+
# Convert the image from BGR to GRAY color space
|
37 |
+
grayImage=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
|
38 |
+
|
39 |
+
# Create a mask using the specified color range
|
40 |
+
thresholdedImage=sauvola_thresholding(grayImage)
|
41 |
+
|
42 |
+
kernel = np.ones((35, 1), np.uint8)
|
43 |
+
dilated_image = cv2.dilate(thresholdedImage, kernel, iterations=1)
|
44 |
+
# Find contours in the mask
|
45 |
+
contours, _ = cv2.findContours(dilated_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
46 |
+
|
47 |
+
coordinate = []
|
48 |
+
# Check if any contours were found
|
49 |
+
if len(contours) > 0:
|
50 |
+
for i in range(len(contours)):
|
51 |
+
|
52 |
+
# Get the largest contour (assuming it's the object of interest)
|
53 |
+
# largest_contour = max(contours, key=cv2.contourArea)
|
54 |
+
|
55 |
+
# Get the bounding box of the contour
|
56 |
+
x, y, w, h = cv2.boundingRect(contours[i])
|
57 |
+
|
58 |
+
coordinate.append([x,y,x+w,y+h])
|
59 |
+
# Calculate the center coordinates of the object
|
60 |
+
# center_x = x + w // 2
|
61 |
+
# center_y = y + h // 2
|
62 |
+
|
63 |
+
return coordinate,thresholdedImage
|
64 |
+
else:
|
65 |
+
# Return None if no object was found
|
66 |
+
return None,thresholdedImage
|
models/model-ocr-0.1829.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f52ee58d51590ebd2143a66d23030bdf930692962507410b0af96db3e4c15d24
|
3 |
+
size 10458528
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
opencv-python
|
2 |
+
tensorflow==2.7.0
|
3 |
+
numpy
|
4 |
+
flask
|
5 |
+
scikit-image
|
6 |
+
|
7 |
+
|