Spaces:
Runtime error
Runtime error
import re | |
def generate_annotated_text(text, keyw_list): | |
''' | |
:param text: str | |
:param keyw_list: list of str | |
:return: str | |
e.g. input1 = 'I like an apple. Do you like apples too?' | |
input2 = ["like", "apple"] | |
output = annotated_text("I ", ("like", ""), " an ", ("apple", ""), ". Do you ", ("like", ""), " ", ("apple", ""), "s too?") | |
''' | |
def find_keyword_index(text, keyw_list): | |
indices = [] | |
for keyword in keyw_list: | |
for match in re.finditer(keyword, text): | |
indices.append((keyword, match.start())) | |
return indices | |
indices = find_keyword_index(text, keyw_list) | |
# 1. indices๋ฅผ index ๊ฐ์ ๋ฐ๋ผ ์ค๋ฆ์ฐจ์์ผ๋ก ์ ๋ ฌ | |
sorted_indices = sorted(indices, key=lambda x: x[1]) | |
output = 'annotated_text(' | |
last_index = 0 | |
# 2. input1์์ ๊ฐ ํค์๋์ ์์น๋ฅผ ํ์ฉํ์ฌ ๋ฌธ์์ด ๋ถํ ๋ฐ ์ฌ์ฒ๋ฆฌ | |
for word, start_idx in sorted_indices: | |
output += f'"{text[last_index:start_idx]}", ("{word}", ""), ' | |
last_index = start_idx + len(word) | |
# 3. ๊ฒฐ๊ณผ ๋ฌธ์์ด ์์ฑ | |
output += f'"{text[last_index:]}"' + ')' | |
# 4. ์ฐ์ ๋ฌธ์์ด ๋์น | |
string_cont_type1 = re.compile(r'", ""\), " ", \("') | |
string_cont_type2 = re.compile(r'", ""\), "", \("') | |
output = string_cont_type1.sub(' ', output) | |
output = string_cont_type2.sub(' ', output) | |
return output | |