riccorl's picture
first commit
626eca0
raw
history blame
1.67 kB
from __future__ import annotations
from dataclasses import dataclass
from typing import List, NamedTuple, Optional
from relik.reader.pytorch_modules.hf.modeling_relik import RelikReaderSample
@dataclass
class Word:
"""
A word representation that includes text, index in the sentence, POS tag, lemma,
dependency relation, and similar information.
# Parameters
text : `str`, optional
The text representation.
index : `int`, optional
The word offset in the sentence.
lemma : `str`, optional
The lemma of this word.
pos : `str`, optional
The coarse-grained part of speech of this word.
dep : `str`, optional
The dependency relation for this word.
input_id : `int`, optional
Integer representation of the word, used to pass it to a model.
token_type_id : `int`, optional
Token type id used by some transformers.
attention_mask: `int`, optional
Attention mask used by transformers, indicates to the model which tokens should
be attended to, and which should not.
"""
text: str
index: int
start_char: Optional[int] = None
end_char: Optional[int] = None
# preprocessing fields
lemma: Optional[str] = None
pos: Optional[str] = None
dep: Optional[str] = None
head: Optional[int] = None
def __str__(self):
return self.text
def __repr__(self):
return self.__str__()
class EntitySpan(NamedTuple):
start: int
end: int
label: str
text: str
@dataclass
class RelikOutput:
text: str
labels: List[EntitySpan]
windows: Optional[List[RelikReaderSample]] = None