ToS-Summarization / keyword_extraction.py
EE21's picture
Update keyword_extraction.py
8a84637
raw
history blame
503 Bytes
import nltk
from rake_nltk import Rake
# Download NLTK data (if not already downloaded)
nltk.download('punkt')
nltk.download('stopwords')
def extract_keywords(text):
# Initialize Rake with stopwords set to None (to keep all words)
rake = Rake(max_length=1, include_repeated_phrases=False)
# Extract keywords from the input text
rake.extract_keywords_from_text(text)
# Get the ranked keywords
ranked_keywords = rake.get_ranked_phrases()
return ranked_keywords