Spaces:
Runtime error
Runtime error
from rake_nltk import Rake | |
import nltk | |
import re | |
# Download NLTK data | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
# Define a list of obligation words | |
obligation_words = [ | |
# English words | |
"must", "will", "use", "may", "provides", 'is obliged to', | |
'has to', 'needs to', 'is required to', "shall", | |
"should", "ought to", "required", "obligated", "duty", | |
"agrees to", "has a duty to", "is expected to", "commits to", | |
# German words | |
"muss", "wird", "nutzen", "darf", "stellt bereit", | |
"ist verpflichtet", "ist erforderlich", "soll", | |
"sollte", "erforderlich", "verpflichtet", "Pflicht", | |
"stimmt zu", "hat die Pflicht", "wird erwartet", "verpflichtet sich" | |
] | |
def extract_sentences_with_obligations(text): | |
# Initialize Rake with stopwords set to None (to keep all words) | |
rake = Rake() | |
# Split the text into sentences | |
sentences = re.split(r'(?<=[.!?])\s+', text) | |
# Initialize a list to store sentences with obligation words | |
obligation_sentences = [] | |
# Iterate through the sentences | |
for sentence in sentences: | |
# Extract keyphrases from the sentence | |
rake.extract_keywords_from_text(sentence) | |
# Get the ranked keyphrases | |
ranked_keyphrases = rake.get_ranked_phrases() | |
# Check if any of the ranked keyphrases contain obligation words | |
if any(any(word in kp.lower() for word in obligation_words) for kp in ranked_keyphrases): | |
obligation_sentences.append(sentence) | |
# Join the sentences into a single string | |
return ' '.join(obligation_sentences) |