File size: 1,064 Bytes
0fdb130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from typing import TYPE_CHECKING, List, Tuple


if TYPE_CHECKING:
    from spacy.tokens import Doc


class AspectExtractor:
    def __init__(self, spacy_model: str) -> None:
        super().__init__()
        import spacy

        self.nlp = spacy.load(spacy_model)

    def find_groups(self, aspect_mask: List[bool]):
        start = None
        for idx, flag in enumerate(aspect_mask):
            if flag:
                if start is None:
                    start = idx
            else:
                if start is not None:
                    yield slice(start, idx)
                    start = None
        if start is not None:
            yield slice(start, idx + 1)

    def __call__(self, texts: List[str]) -> Tuple[List["Doc"], List[slice]]:
        aspects_list = []
        docs = list(self.nlp.pipe(texts))
        for doc in docs:
            aspect_mask = [token.pos_ in ("NOUN", "PROPN") for token in doc]
            aspects_list.append(list(self.find_groups(aspect_mask)))
        return docs, aspects_list