import re , string from textacy.preprocessing.resources import (RE_EMAIL , RE_URL , RE_NUMBER , RE_NUMBER , RE_EMOJI , RE_SHORT_URL , RE_PHONE_NUMBER ) NON_ARABIC_RE = re.compile(r"[%s]"%string.ascii_letters) def clean_text(text:str)->str: '''remove unwanted data''' patterns = [RE_EMAIL , RE_EMOJI , RE_NUMBER , RE_PHONE_NUMBER , RE_SHORT_URL , RE_URL , NON_ARABIC_RE] for pattern in patterns: text = pattern.sub("" , text) return text