File size: 2,821 Bytes
1f72938
 
 
 
9312707
e029c8d
 
1f72938
 
 
 
9312707
 
 
 
1f72938
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9312707
1f72938
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9312707
 
 
1f72938
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e029c8d
 
 
 
 
1f72938
 
e029c8d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from model1 import model1
from model2 import model2
import checkTool as ct
import extract_pdf as pf
import extraction_data as ed
import get_chinese_name as cn
import search_engine as se

# get info from hkid card

def string_similarity(s1, s2): # Levenshtein distance algorithm
    s1 = s1.replace(' ', '')
    s1 = s1.lower()
    s2 = s2.replace(' ', '')
    s2 = s2.lower()
    if s1 == s2:
        return 100.0
    
    len1 = len(s1)
    len2 = len(s2)
    matrix = [[0] * (len2 + 1) for _ in range(len1 + 1)]

    for i in range(len1 + 1):
        matrix[i][0] = i
    
    for j in range(len2 + 1):
        matrix[0][j] = j
    
    for i in range(1, len1 + 1):
        for j in range(1, len2 + 1):
            if s1[i - 1] == s2[j - 1]:
                cost = 0
            else:
                cost = 1
            matrix[i][j] = min(matrix[i - 1][j] + 1,          # deletion
                               matrix[i][j - 1] + 1,          # insertion
                               matrix[i - 1][j - 1] + cost)   # substitution
    
    similarity = (1 - matrix[len1][len2] / max(len1, len2)) * 100
    return round(similarity, 1)

def get_data(img1_path, img2_path):

    # img_fp = 'IMG_4495.jpg'

    info1 = model1(img1_path)
    info2 = model2(img1_path)

    def print_info(name, valid_hkid, hkid, issuedate):
        print(f'Name: {name}') # name is without space
        print(f'HKID: {hkid} and validity: {valid_hkid}')
        print(f'Date of issue: {issuedate}')

    cinfo = ct.combine_info(info1, info2)

    # get info from bank

    # images = r'hangseng_page-0001.jpg'
    # bank_list = ['bankofchina','hangsengbank','hsbc','sc']
    # image_path = 'hangseng_page-0001.jpg'
    # post_url = r''

    # name = pf.get_info_from_bank(img2_path)
    # name = pf.check_mr(name)
    # name = name.replace(' ', '')
    # name = name.lower()

    # data = pf.get_info_from_bank(img2_path, file_name)
    data = ed.get_info_from_bank(img2_path)
    name = data["nameStatement"]


    ############# Similarity check ##############

    # img_fp = 'IMG_1234.jpg'
    name1 = cinfo[0]
    threshold = 85
    # print(f'Name in HKID: {name1}')
    # print(f'Nmae in bank statement: {name}')
    similarity_score = string_similarity(name,name1)
    # print(f'Similarity: {similarity_score}')
    # if (similarity_score >= threshold): # Above threshold
    #     print('It is the same person')
    # else: # Below threshold
    #     print('It is not the same person')
    data["similarity_score"] = similarity_score
    data["name_on_id"] = name1
    data["hkid"] = cinfo[2]
    data["validity"] = cinfo[1]
    data["issue_date"] = cinfo[3]

    # Search Engine
    # chi_name = cn.get_chiname(img1_path)["Chinese Name"]
    chi_name = "allen lau"
    data["chi_name_id"] = chi_name
    
    return data