from __future__ import annotations import dataclasses import uuid from typing import Union import hashlib import proto.chunk_pb2 as chunk_pb2 from domain.domain_protocol import DomainProtocol @dataclasses.dataclass(frozen=True) class DocumentD(DomainProtocol[chunk_pb2.Document]): file_path: str authors: str publish_date: str @property def id(self) -> str: return hashlib.sha256(self.to_proto().SerializeToString()).hexdigest() @classmethod def _from_proto(cls, proto: chunk_pb2.Document) -> DocumentD: return cls(file_path=proto.file_path, authors=proto.authors, publish_date=proto.publish_date) def to_proto(self) -> chunk_pb2.Document: return chunk_pb2.Document(file_path=self.file_path, authors=self.authors, publish_date=self.publish_date) @dataclasses.dataclass(frozen=True) class ChunkD(DomainProtocol[chunk_pb2.Chunk]): @property def id(self) -> str: return str(self.chunk_id) chunk_text: str chunk_type: chunk_pb2.ChunkType chunk_index: int parent_reference: Union[uuid.UUID, DocumentD] chunk_id: uuid.UUID = dataclasses.field(default_factory=uuid.uuid4) def __post_init__(self): if self.chunk_type == chunk_pb2.ChunkType.CHUNK_TYPE_PAGE: if not isinstance(self.parent_reference, DocumentD): raise ValueError( f"Chunk (id: {self.chunk_id}) with type {self.chunk_type} must have a DocumentD parent_reference." ) elif not isinstance(self.parent_reference, uuid.UUID): raise ValueError( f"Chunk (id: {self.chunk_id}) with type {self.chunk_type} must have a uuid.UUID parent_reference." ) @classmethod def _from_proto(cls, proto: chunk_pb2.Chunk) -> ChunkD: if proto.HasField('parent_chunk_id'): return cls(chunk_id=uuid.UUID(proto.chunk_id), parent_reference=uuid.UUID(proto.parent_chunk_id), chunk_text=proto.chunk_text, chunk_type=proto.chunk_type, chunk_index=proto.chunk_index) elif proto.HasField('document'): return cls(chunk_id=uuid.UUID(proto.chunk_id), parent_reference=DocumentD._from_proto(proto.document), chunk_text=proto.chunk_text, chunk_type=proto.chunk_type, chunk_index=proto.chunk_index) else: raise ValueError( f"Chunk proto (id: {proto.chunk_id}) has no 'parent' or 'document' field.") def to_proto(self) -> chunk_pb2.Chunk: chunk_proto = chunk_pb2.Chunk() chunk_proto.chunk_id = str(self.chunk_id) chunk_proto.chunk_text = self.chunk_text chunk_proto.chunk_type = self.chunk_type chunk_proto.chunk_index = self.chunk_index if isinstance(self.parent_reference, uuid.UUID): chunk_proto.parent_chunk_id = str(self.parent_reference) elif isinstance(self.parent_reference, DocumentD): chunk_proto.document.CopyFrom(self.parent_reference.to_proto()) else: raise ValueError( f"Chunk (id: {self.chunk_id}) parent_reference is of unknown type: {type(self.parent_reference)}" ) return chunk_proto