Spaces:
Sleeping
Sleeping
from model1 import model1 | |
from model2 import model2 | |
import checkTool as ct | |
import extract_pdf as pf | |
import extraction_data as ed | |
import get_chinese_name as cn | |
import search_engine as se | |
# get info from hkid card | |
def string_similarity(s1, s2): # Levenshtein distance algorithm | |
s1 = s1.replace(' ', '') | |
s1 = s1.lower() | |
s2 = s2.replace(' ', '') | |
s2 = s2.lower() | |
if s1 == s2: | |
return 100.0 | |
len1 = len(s1) | |
len2 = len(s2) | |
matrix = [[0] * (len2 + 1) for _ in range(len1 + 1)] | |
for i in range(len1 + 1): | |
matrix[i][0] = i | |
for j in range(len2 + 1): | |
matrix[0][j] = j | |
for i in range(1, len1 + 1): | |
for j in range(1, len2 + 1): | |
if s1[i - 1] == s2[j - 1]: | |
cost = 0 | |
else: | |
cost = 1 | |
matrix[i][j] = min(matrix[i - 1][j] + 1, # deletion | |
matrix[i][j - 1] + 1, # insertion | |
matrix[i - 1][j - 1] + cost) # substitution | |
similarity = (1 - matrix[len1][len2] / max(len1, len2)) * 100 | |
return round(similarity, 1) | |
def get_data(img1_path, img2_path): | |
# img_fp = 'IMG_4495.jpg' | |
info1 = model1(img1_path) | |
info2 = model2(img1_path) | |
def print_info(name, valid_hkid, hkid, issuedate): | |
print(f'Name: {name}') # name is without space | |
print(f'HKID: {hkid} and validity: {valid_hkid}') | |
print(f'Date of issue: {issuedate}') | |
cinfo = ct.combine_info(info1, info2) | |
# get info from bank | |
# images = r'hangseng_page-0001.jpg' | |
# bank_list = ['bankofchina','hangsengbank','hsbc','sc'] | |
# image_path = 'hangseng_page-0001.jpg' | |
# post_url = r'' | |
# name = pf.get_info_from_bank(img2_path) | |
# name = pf.check_mr(name) | |
# name = name.replace(' ', '') | |
# name = name.lower() | |
# data = pf.get_info_from_bank(img2_path, file_name) | |
data = ed.get_info_from_bank(img2_path) | |
name = data["nameStatement"] | |
############# Similarity check ############## | |
# img_fp = 'IMG_1234.jpg' | |
name1 = cinfo[0] | |
threshold = 85 | |
# print(f'Name in HKID: {name1}') | |
# print(f'Nmae in bank statement: {name}') | |
similarity_score = string_similarity(name,name1) | |
# print(f'Similarity: {similarity_score}') | |
# if (similarity_score >= threshold): # Above threshold | |
# print('It is the same person') | |
# else: # Below threshold | |
# print('It is not the same person') | |
data["similarity_score"] = similarity_score | |
data["name_on_id"] = name1 | |
data["hkid"] = cinfo[2] | |
data["validity"] = cinfo[1] | |
data["issue_date"] = cinfo[3] | |
# Search Engine | |
# chi_name = cn.get_chiname(img1_path)["Chinese Name"] | |
chi_name = "allen lau" | |
data["chi_name_id"] = chi_name | |
return data | |