EmreYY20 commited on
Commit
d7485e8
1 Parent(s): fa8d330

add keyphrase extraction

Browse files
Files changed (3) hide show
  1. app.py +8 -2
  2. keyphrase_extraction.py +24 -0
  3. requirements.txt +0 -0
app.py CHANGED
@@ -3,6 +3,7 @@ import PyPDF2
3
  from extractive_model import summarize_with_textrank
4
  from abstractive_model import summarize_with_bart
5
  from keyword_extraction import extract_keywords
 
6
  #from blanc import BlancHelp
7
 
8
  # Set page to wide mode
@@ -25,7 +26,7 @@ def main():
25
 
26
  # Left column: Radio buttons for summarizer choice
27
  with col1:
28
- radio_options = ['Abstractive', 'Extractive', 'Keyword Extraction']
29
  radio_selection = st.radio("Choose type of summarizer:", radio_options)
30
 
31
  # Middle column: Text input and File uploader
@@ -56,11 +57,16 @@ def main():
56
  summary = summarize_with_bart(file_content)
57
  st.session_state.summary = summary
58
 
59
- # Perform extractive summarization
60
  if radio_selection == "Keyword Extraction":
61
  summary = extract_keywords(file_content)
62
  st.session_state.summary = summary
63
 
 
 
 
 
 
64
  # Right column: Displaying text after pressing 'Summarize'
65
  with col3:
66
  st.write("Summary:")
 
3
  from extractive_model import summarize_with_textrank
4
  from abstractive_model import summarize_with_bart
5
  from keyword_extraction import extract_keywords
6
+ from keyphrase_extraction import extract_keyphrase
7
  #from blanc import BlancHelp
8
 
9
  # Set page to wide mode
 
26
 
27
  # Left column: Radio buttons for summarizer choice
28
  with col1:
29
+ radio_options = ['Abstractive', 'Extractive', 'Keyword Extraction', 'Keyphrase Extraction']
30
  radio_selection = st.radio("Choose type of summarizer:", radio_options)
31
 
32
  # Middle column: Text input and File uploader
 
57
  summary = summarize_with_bart(file_content)
58
  st.session_state.summary = summary
59
 
60
+ # Perform Keyword Extraction
61
  if radio_selection == "Keyword Extraction":
62
  summary = extract_keywords(file_content)
63
  st.session_state.summary = summary
64
 
65
+ # Perform Keyphrase Extraction
66
+ if radio_selection == "Keyphrase Extraction":
67
+ summary = extract_keyphrase(file_content)
68
+ st.session_state.summary = summary
69
+
70
  # Right column: Displaying text after pressing 'Summarize'
71
  with col3:
72
  st.write("Summary:")
keyphrase_extraction.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+
3
+ # Load the English language model
4
+ nlp = spacy.load("en_core_web_sm")
5
+
6
+ # Define a list of obligation words
7
+ obligation_words = ["must", "will", "use", "may", "provides", 'is obliged to',
8
+ 'has to', 'needs to', 'is required to',
9
+ "shall", "should", "ought to", "required", "obligated", "duty"]
10
+
11
+ def extract_keyphrase(text):
12
+ # Parse the input text with SpaCy
13
+ doc = nlp(text)
14
+
15
+ # Initialize a list to store sentences with obligation words
16
+ obligation_sentences = []
17
+
18
+ # Iterate through the sentences in the document
19
+ for sentence in doc.sents:
20
+ # Check if any of the obligation words appear in the sentence
21
+ if any(word.text.lower() in obligation_words for word in sentence):
22
+ obligation_sentences.append(sentence.text)
23
+
24
+ return obligation_sentences
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ