fillmorle-app / sftp /data_reader /concrete_reader.py
gossminn's picture
First version
6680682
raw
history blame
1.53 kB
import logging
from collections import defaultdict
from typing import *
import os
from allennlp.data.dataset_readers.dataset_reader import DatasetReader
from allennlp.data.instance import Instance
from concrete import SituationMention
from concrete.util import CommunicationReader
from .span_reader import SpanReader
from .srl_reader import SRLDatasetReader
from .concrete_srl import collect_concrete_srl
from ..utils import Span, BIOSmoothing
logger = logging.getLogger(__name__)
@DatasetReader.register('concrete')
class ConcreteDatasetReader(SRLDatasetReader):
def __init__(
self,
event_only: bool = False,
event_smoothing_factor: float = 0.,
arg_smoothing_factor: float = 0.,
**extra
):
super().__init__(**extra)
self.event_only = event_only
self.event_only = event_only
self.event_smooth_factor = event_smoothing_factor
self.arg_smooth_factor = arg_smoothing_factor
def _read(self, file_path: str) -> Iterable[Instance]:
if os.path.isdir(file_path):
for fn in os.listdir(file_path):
yield from self._read(os.path.join(file_path, fn))
all_files = CommunicationReader(file_path)
for comm, fn in all_files:
sentences = collect_concrete_srl(comm)
for tokens, vr in sentences:
yield self.text_to_instance(tokens, vr)
logger.warning(f'{self.n_span_removed} spans were removed')
self.n_span_removed = 0