3ie-intervention-outcome-entity-linking

Sleeping

App Files Files Community

3ie-intervention-outcome-entity-linking / relik /inference /data /objects.py

CarlosMalaga

Upload 201 files

2f044c1 verified 6 months ago

raw

history blame

2.07 kB

	from __future__ import annotations

	from dataclasses import dataclass
	from typing import Dict, List, NamedTuple, Optional

	from relik.reader.pytorch_modules.hf.modeling_relik import RelikReaderSample
	from relik.retriever.indexers.document import Document


	@dataclass
	class Word:
	"""
	A word representation that includes text, index in the sentence, POS tag, lemma,
	dependency relation, and similar information.

	# Parameters
	text : `str`, optional
	The text representation.
	index : `int`, optional
	The word offset in the sentence.
	lemma : `str`, optional
	The lemma of this word.
	pos : `str`, optional
	The coarse-grained part of speech of this word.
	dep : `str`, optional
	The dependency relation for this word.

	input_id : `int`, optional
	Integer representation of the word, used to pass it to a model.
	token_type_id : `int`, optional
	Token type id used by some transformers.
	attention_mask: `int`, optional
	Attention mask used by transformers, indicates to the model which tokens should
	be attended to, and which should not.
	"""

	text: str
	i: int
	idx: Optional[int] = None
	idx_end: Optional[int] = None
	# preprocessing fields
	lemma: Optional[str] = None
	pos: Optional[str] = None
	dep: Optional[str] = None
	head: Optional[int] = None

	def __str__(self):
	return self.text

	def __repr__(self):
	return self.__str__()


	class Span(NamedTuple):
	start: int
	end: int
	label: str
	text: str


	class Triples(NamedTuple):
	subject: Span
	label: str
	object: Span
	confidence: float

	@dataclass
	class RelikOutput:
	text: str
	tokens: List[str]
	spans: List[Span]
	triples: List[Triples]
	candidates: Dict[TaskType, List[Document]]
	windows: Optional[List[RelikReaderSample]] = None


	from enum import Enum


	class AnnotationType(Enum):
	CHAR = "char"
	WORD = "word"


	class TaskType(Enum):
	SPAN = "span"
	TRIPLET = "triplet"
	BOTH = "both"