Spaces:
Running
Running
Update overlap.py
Browse files- overlap.py +39 -0
overlap.py
CHANGED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### Utilities to get overlap between strings
|
2 |
+
|
3 |
+
def get_overlap_length(left: str, right: str):
|
4 |
+
good_length, overlap = 0, ""
|
5 |
+
for i in range(min(len(left), len(right))):
|
6 |
+
if left[-i:] == right[:i]:
|
7 |
+
good_length = i
|
8 |
+
overlap = left[-i:]
|
9 |
+
return good_length, overlap
|
10 |
+
|
11 |
+
def get_overlap_list(strings):
|
12 |
+
"""
|
13 |
+
Returns a list of tuples of the form (overlap_length, overlap), one tuple for each pair of strings in the input list.
|
14 |
+
"""
|
15 |
+
overlaps = []
|
16 |
+
for i in range(len(strings) - 1):
|
17 |
+
overlaps.append(get_overlap_length(strings[i], strings[i+1]))
|
18 |
+
return overlaps
|
19 |
+
|
20 |
+
def unoverlap_list(strings):
|
21 |
+
"""
|
22 |
+
Returns a list of tuples of the form (content, is_overlap), where is_overlap is a boolean indicating whether the content is an overlap or not.
|
23 |
+
"""
|
24 |
+
overlaps = get_overlap_list(strings)
|
25 |
+
new_list = []
|
26 |
+
for index, string in enumerate(strings):
|
27 |
+
# Add the last overlap when needed
|
28 |
+
if index > 0 and len(overlaps[index-1][1]) > 0:
|
29 |
+
new_list.append((overlaps[index-1][1], True))
|
30 |
+
|
31 |
+
# prune the string with left and right overlaps
|
32 |
+
left_overlap_length, right_overlap_length = 0, 0
|
33 |
+
if index > 0:
|
34 |
+
left_overlap_length = overlaps[index-1][0]
|
35 |
+
if index < len(strings) - 1:
|
36 |
+
right_overlap_length = overlaps[index][0]
|
37 |
+
|
38 |
+
new_list.append((string[left_overlap_length:len(string)-right_overlap_length], False))
|
39 |
+
return new_list
|