Spaces:
Paused
Paused
File size: 1,064 Bytes
0fdb130 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
from typing import TYPE_CHECKING, List, Tuple
if TYPE_CHECKING:
from spacy.tokens import Doc
class AspectExtractor:
def __init__(self, spacy_model: str) -> None:
super().__init__()
import spacy
self.nlp = spacy.load(spacy_model)
def find_groups(self, aspect_mask: List[bool]):
start = None
for idx, flag in enumerate(aspect_mask):
if flag:
if start is None:
start = idx
else:
if start is not None:
yield slice(start, idx)
start = None
if start is not None:
yield slice(start, idx + 1)
def __call__(self, texts: List[str]) -> Tuple[List["Doc"], List[slice]]:
aspects_list = []
docs = list(self.nlp.pipe(texts))
for doc in docs:
aspect_mask = [token.pos_ in ("NOUN", "PROPN") for token in doc]
aspects_list.append(list(self.find_groups(aspect_mask)))
return docs, aspects_list
|