|
|
|
|
|
import re |
|
import string |
|
|
|
|
|
def remove_space_between_cn_en(text): |
|
splits = re.split(" ", text) |
|
if len(splits) < 2: |
|
return text |
|
|
|
result = "" |
|
for t in splits: |
|
if t == "": |
|
continue |
|
if re.search(f"[a-zA-Z0-9{string.punctuation}]$", result) and re.search("^[a-zA-Z0-9]", t): |
|
result += " " |
|
result += t |
|
else: |
|
if not result == "": |
|
result += t |
|
else: |
|
result = t |
|
|
|
if text.endswith(" "): |
|
result += " " |
|
return result |
|
|
|
|
|
def main(): |
|
s = "sk . asdf ," |
|
result = remove_space_between_cn_en(s) |
|
print(result) |
|
return |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|