Spaces:
Sleeping
Sleeping
File size: 542 Bytes
37e2bde |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
import re
def clean_text(text):
# Remove HTML tags
text = re.sub(r'<[^>]*?>', '', text)
# Remove URLs
text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
# Remove special characters
text = re.sub(r'[^a-zA-Z0-9 ]', '', text)
# Replace multiple spaces with a single space
text = re.sub(r'\s{2,}', ' ', text)
# Trim leading and trailing whitespace
text = text.strip()
# Remove extra whitespace
text = ' '.join(text.split())
return text |