import re import math def cleaning(text): if isinstance(text, str): text = text.lower() text = re.sub(r'[^ ,.?!a-z0-9àáảãạăắằẳẵặâấầẩẫậèéẻẽẹêếềểễệđìíỉĩịòóỏõọôốồổỗộơớờởỡợùúủũụưứừửữựỳýỷỹỵ]', '', text) text = re.sub(r'[.!?]+', '.', text) text = re.sub(r'([,.!?])\1+', r'\1', text) text = re.sub(r'[,]+', ' , ', text) text = re.sub(r'[.]+', ' . ', text) text = re.sub(r'([ ])\1+', r'\1', text) return text return text