File size: 1,813 Bytes
c310e19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def weighted_edit_distance(word1, word2, scores):
    m = len(word1)
    n = len(word2)
    dp = [[0 for __ in range(m + 1)] for __ in range(n + 1)]
    for j in range(m + 1):
        dp[0][j] = j
    for i in range(n + 1):
        dp[i][0] = i
    for i in range(1, n + 1):  ## word2
        for j in range(1, m + 1): ## word1
            delect_cost = ed_delect_cost(j-1, i-1, word1, word2, scores)  ## delect a[i]
            insert_cost = ed_insert_cost(j-1, i-1, word1, word2, scores)  ## insert b[j]
            if word1[j - 1] != word2[i - 1]:
                replace_cost = ed_replace_cost(j-1, i-1, word1, word2, scores) ## replace a[i] with b[j]
            else:
                replace_cost = 0
            dp[i][j] = min(dp[i-1][j] + insert_cost, dp[i][j-1] + delect_cost, dp[i-1][j-1] + replace_cost)

    return dp[n][m]

def ed_delect_cost(j, i, word1, word2, scores):
    ## delect a[i]
    c = char2num(word1[j])
    return scores[c][j]


def ed_insert_cost(i, j, word1, word2, scores):
    ## insert b[j]
    if i < len(word1) - 1:
        c1 = char2num(word1[i])
        c2 = char2num(word1[i + 1])
        return (scores[c1][i] + scores[c2][i+1])/2
    else:
        c1 = char2num(word1[i])
        return scores[c1][i]


def ed_replace_cost(i, j, word1, word2, scores):
    ## replace a[i] with b[j]
    c1 = char2num(word1[i])
    c2 = char2num(word2[j])
    # if word1 == "eeatpisaababarait".upper():
    #     print(scores[c2][i]/scores[c1][i])

    return max(1 - scores[c2][i]/scores[c1][i]*5, 0)

def char2num(char):
    if char in '0123456789':
        num = ord(char) - ord('0') + 1
    elif char in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
        num = ord(char.lower()) - ord('a') + 11
    else:
        print('error symbol', char)
        exit()
    return num - 1