CarlosMalaga's picture
Upload 201 files
2f044c1 verified
import json
import numpy as np
from typing import Iterable
class NpEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
if isinstance(obj, np.floating):
return float(obj)
if isinstance(obj, np.ndarray):
return obj.tolist()
return super(NpEncoder, self).default(obj)
class RelikReaderSample:
def __init__(self, **kwargs):
super().__setattr__("_d", {})
self._d = kwargs
def __getattribute__(self, item):
return super(RelikReaderSample, self).__getattribute__(item)
def __getattr__(self, item):
if item.startswith("__") and item.endswith("__"):
# this is likely some python library-specific variable (such as __deepcopy__ for copy)
# better follow standard behavior here
raise AttributeError(item)
elif item in self._d:
return self._d[item]
else:
return None
def __setattr__(self, key, value):
if key in self._d:
self._d[key] = value
else:
super().__setattr__(key, value)
def to_jsons(self) -> str:
if "predicted_window_labels" in self._d:
new_obj = {
k: v
for k, v in self._d.items()
if k != "predicted_window_labels" and k != "span_title_probabilities"
}
new_obj["predicted_window_labels"] = [
[ss, se, pred_title]
for (ss, se, pred_title) in self.predicted_window_labels_chars
]
return new_obj
else:
return json.dumps(self._d, cls=NpEncoder)
def load_relik_reader_samples(path: str) -> Iterable[RelikReaderSample]:
with open(path) as f:
for line in f:
jsonl_line = json.loads(line.strip())
relik_reader_sample = RelikReaderSample(**jsonl_line)
yield relik_reader_sample