|
from __future__ import annotations |
|
|
|
from dataclasses import dataclass |
|
from typing import List, NamedTuple, Optional |
|
|
|
from relik.reader.pytorch_modules.hf.modeling_relik import RelikReaderSample |
|
|
|
|
|
@dataclass |
|
class Word: |
|
""" |
|
A word representation that includes text, index in the sentence, POS tag, lemma, |
|
dependency relation, and similar information. |
|
|
|
# Parameters |
|
text : `str`, optional |
|
The text representation. |
|
index : `int`, optional |
|
The word offset in the sentence. |
|
lemma : `str`, optional |
|
The lemma of this word. |
|
pos : `str`, optional |
|
The coarse-grained part of speech of this word. |
|
dep : `str`, optional |
|
The dependency relation for this word. |
|
|
|
input_id : `int`, optional |
|
Integer representation of the word, used to pass it to a model. |
|
token_type_id : `int`, optional |
|
Token type id used by some transformers. |
|
attention_mask: `int`, optional |
|
Attention mask used by transformers, indicates to the model which tokens should |
|
be attended to, and which should not. |
|
""" |
|
|
|
text: str |
|
index: int |
|
start_char: Optional[int] = None |
|
end_char: Optional[int] = None |
|
|
|
lemma: Optional[str] = None |
|
pos: Optional[str] = None |
|
dep: Optional[str] = None |
|
head: Optional[int] = None |
|
|
|
def __str__(self): |
|
return self.text |
|
|
|
def __repr__(self): |
|
return self.__str__() |
|
|
|
|
|
class EntitySpan(NamedTuple): |
|
start: int |
|
end: int |
|
label: str |
|
text: str |
|
|
|
|
|
@dataclass |
|
class RelikOutput: |
|
text: str |
|
labels: List[EntitySpan] |
|
windows: Optional[List[RelikReaderSample]] = None |
|
|