Spaces:

EE21
/

ToS-Summarization

Runtime error

ToS-Summarization / keyphrase_extraction.py

Update keyphrase_extraction.py

c68f9df verified 10 months ago

1.6 kB

	from rake_nltk import Rake
	import nltk
	import re

	# Download NLTK data
	nltk.download('punkt')
	nltk.download('stopwords')

	# Define a list of obligation words
	obligation_words = [
	# English words
	"must", "will", "use", "may", "provides", 'is obliged to',
	'has to', 'needs to', 'is required to', "shall",
	"should", "ought to", "required", "obligated", "duty",
	"agrees to", "has a duty to", "is expected to", "commits to",

	# German words
	"muss", "wird", "nutzen", "darf", "stellt bereit",
	"ist verpflichtet", "ist erforderlich", "soll",
	"sollte", "erforderlich", "verpflichtet", "Pflicht",
	"stimmt zu", "hat die Pflicht", "wird erwartet", "verpflichtet sich"
	]


	def extract_sentences_with_obligations(text):
	# Initialize Rake with stopwords set to None (to keep all words)
	rake = Rake()

	# Split the text into sentences
	sentences = re.split(r'(?<=[.!?])\s+', text)

	# Initialize a list to store sentences with obligation words
	obligation_sentences = []

	# Iterate through the sentences
	for sentence in sentences:
	# Extract keyphrases from the sentence
	rake.extract_keywords_from_text(sentence)

	# Get the ranked keyphrases
	ranked_keyphrases = rake.get_ranked_phrases()

	# Check if any of the ranked keyphrases contain obligation words
	if any(any(word in kp.lower() for word in obligation_words) for kp in ranked_keyphrases):
	obligation_sentences.append(sentence)

	# Join the sentences into a single string
	return ' '.join(obligation_sentences)