CarlosMalaga's picture
Upload 201 files
2f044c1 verified
raw
history blame
1.72 kB
from typing import List, Union
class BaseSentenceSplitter:
"""
A `BaseSentenceSplitter` splits strings into sentences.
"""
def __call__(self, *args, **kwargs):
"""
Calls :meth:`split_sentences`.
"""
return self.split_sentences(*args, **kwargs)
def split_sentences(
self, text: str, max_len: int = 0, *args, **kwargs
) -> List[str]:
"""
Splits a `text` :class:`str` paragraph into a list of :class:`str`, where each is a sentence.
"""
raise NotImplementedError
def split_sentences_batch(
self, texts: List[str], *args, **kwargs
) -> List[List[str]]:
"""
Default implementation is to just iterate over the texts and call `split_sentences`.
"""
return [self.split_sentences(text) for text in texts]
@staticmethod
def check_is_batched(
texts: Union[str, List[str], List[List[str]]], is_split_into_words: bool
):
"""
Check if input is batched or a single sample.
Args:
texts (:obj:`str`, :obj:`List[str]`, :obj:`List[List[str]]`):
Text to check.
is_split_into_words (:obj:`bool`):
If :obj:`True` and the input is a string, the input is split on spaces.
Returns:
:obj:`bool`: ``True`` if ``texts`` is batched, ``False`` otherwise.
"""
return bool(
(not is_split_into_words and isinstance(texts, (list, tuple)))
or (
is_split_into_words
and isinstance(texts, (list, tuple))
and texts
and isinstance(texts[0], (list, tuple))
)
)