pritamdeka
commited on
Commit
β’
eb9492e
1
Parent(s):
1df7d15
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,6 @@ import nltkmodule
|
|
4 |
from newspaper import Article
|
5 |
from newspaper import fulltext
|
6 |
import requests
|
7 |
-
|
8 |
from nltk.tokenize import word_tokenize
|
9 |
from sentence_transformers import SentenceTransformer
|
10 |
import pandas as pd
|
@@ -31,7 +30,9 @@ import en_core_sci_lg
|
|
31 |
import string
|
32 |
from nltk.stem.wordnet import WordNetLemmatizer
|
33 |
import gradio as gr
|
|
|
34 |
|
|
|
35 |
nlp = en_core_sci_lg.load()
|
36 |
sp = en_core_sci_lg.load()
|
37 |
all_stopwords = sp.Defaults.stop_words
|
@@ -102,6 +103,7 @@ def keyphrase_generator(article_link, model_1, model_2, max_num_keywords):
|
|
102 |
text_doc.append([X.text for X in doc_1.ents])
|
103 |
entity_list = [item for sublist in text_doc for item in sublist]
|
104 |
entity_list = [word for word in entity_list if not word in all_stopwords]
|
|
|
105 |
entity_list=list(dict.fromkeys(entity_list))
|
106 |
doc_embedding = model_2.encode([doc])
|
107 |
candidates=entity_list
|
|
|
4 |
from newspaper import Article
|
5 |
from newspaper import fulltext
|
6 |
import requests
|
|
|
7 |
from nltk.tokenize import word_tokenize
|
8 |
from sentence_transformers import SentenceTransformer
|
9 |
import pandas as pd
|
|
|
30 |
import string
|
31 |
from nltk.stem.wordnet import WordNetLemmatizer
|
32 |
import gradio as gr
|
33 |
+
import inflect
|
34 |
|
35 |
+
inflect_op = inflect.engine()
|
36 |
nlp = en_core_sci_lg.load()
|
37 |
sp = en_core_sci_lg.load()
|
38 |
all_stopwords = sp.Defaults.stop_words
|
|
|
103 |
text_doc.append([X.text for X in doc_1.ents])
|
104 |
entity_list = [item for sublist in text_doc for item in sublist]
|
105 |
entity_list = [word for word in entity_list if not word in all_stopwords]
|
106 |
+
entity_list = [word_entity for word_entity in entity_list if(inflect_op.singular_noun(word_entity) == False)]
|
107 |
entity_list=list(dict.fromkeys(entity_list))
|
108 |
doc_embedding = model_2.encode([doc])
|
109 |
candidates=entity_list
|