File size: 3,564 Bytes
442387d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2855b2
 
442387d
 
 
 
a2855b2
442387d
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125

import cv2
import math
import argparse
from tensorflow.keras.models import load_model
from flask import Flask, request, jsonify
import cv2
import json
import numpy as np
from tensorflow.keras import backend as K
from get_coordinate import get_object_coordinates
import requests
import gradio as gr
import os

file_urls = [
        'https://www.dropbox.com/scl/fi/skt4o9a37ccrxvruojk3o/2.png?rlkey=kxppvdnvbs9852rj6ly123xfk&dl=0',
        'https://www.dropbox.com/scl/fi/3opkr5aoca1fq0wrudlcx/3.png?rlkey=wm4vog7yyk5naoqu68vr6v48s&dl=0',
        'https://www.dropbox.com/scl/fi/t74nd09fod52x0gua93ty/1.png?rlkey=er4ktuephlapzyvh5glkym5b4&dl=0']

def download_file(url, save_name):
    url = url
    if not os.path.exists(save_name):
        file = requests.get(url)
        open(save_name, 'wb').write(file.content)
 
for i, url in enumerate(file_urls):
    if 'mp4' in file_urls[i]:
        download_file(
            file_urls[i],
            f"video.mp4"
        )
    else:
        download_file(
            file_urls[i],
            f"image_{i}.jpg"
        )


class OCR():
    
    def __init__(self,path="model-ocr-0.1829.h5",config_path="config.json"):

        # Read the config JSON file
        with open(config_path, 'r',encoding="utf-8") as file:
            self.config_data = json.load(file)

        # Get the threshold value
        self.threshold = self.config_data['hiragana']['threshold']
        
        # Get the label dictionary
        self.label_dict = self.config_data['hiragana']['label']           
        
        # load the model from local        
        self.model = load_model(path,custom_objects={"K": K})

    def run(self,image):
        # extract the character coordinates using the cv2 contours
        coordinate,thresholdedImage = get_object_coordinates(image)
        
        image_batch = np.zeros((1,64,64,1))
        output =[]
        
        for row in range(len(coordinate)):
           temp = {}
           # crop the image 
           cropImg = thresholdedImage[coordinate[row][1]:coordinate[row][3],coordinate[row][0]:coordinate[row][2]]
           # resize the image 
           image_batch[0,:,:,0] = cv2.resize(cropImg,(64,64))*255
          
           # predict the results
           predict = self.model.predict(image_batch)
           position = np.argmax(predict)
           
           label_name = self.label_dict[str(position)]                    
           temp["text"] = label_name
           temp["prob"] = predict[position]
           temp["coord"] = coordinate[row]  # Xmin,Ymin,Xmax,Ymax
           
           output.append(temp)
           
        return output

def getOCRResults(image_path):
    
    temp0 =[]
    for i in range(len(image_path)):

        image =  cv2.imread(image_path[i])
        
        results0 = ocrAPP.run(image)
        temp0.append(results0)
    result_json={}
    result_json["result"] = temp0
    response = jsonify(result_json)
    response.headers['Content-Type'] = 'application/json; charset=utf-8'
    return response
   

ocrAPP = OCR()

video_path = [['video.mp4']]
path  = [['image_0.jpg'], ['image_1.jpg']]

        
inputs_image = [
    gr.components.Image(type="filepath", label="Input Image"),
]
outputs = [
    gr.components.JSON(label="Output Json"),
]
interface_image = gr.Interface(
    fn=getOCRResults,
    inputs=inputs_image,
    outputs=outputs,
    title="Hiragana Character Recognition",
    examples=path,
    cache_examples=False,
)

gr.TabbedInterface(
    [interface_image],
    tab_names=['Image inference']
).queue().launch()