import json from abc import ABC from typing import List from langchain.docstore.document import Document from langchain.document_loaders.base import BaseLoader class Person: def __init__(self, name, age): self.name = name self.age = age class Dialogue: """ Build an abstract dialogue model using classes and methods to represent different dialogue elements. This class serves as a fundamental framework for constructing dialogue models. """ def __init__(self, file_path: str): self.file_path = file_path self.turns = [] def add_turn(self, turn): """ Create an instance of a conversation participant :param turn: :return: """ self.turns.append(turn) def parse_dialogue(self): """ The parse_dialogue function reads the specified dialogue file and parses each dialogue turn line by line. For each turn, the function extracts the name of the speaker and the message content from the text, creating a Turn instance. If the speaker is not already present in the participants dictionary, a new Person instance is created. Finally, the parsed Turn instance is added to the Dialogue object. Please note that this sample code assumes that each line in the file follows a specific format: :\r\n\r\n\r\n. If your file has a different format or includes other metadata, you may need to adjust the parsing logic accordingly. """ participants = {} speaker_name = None message = None with open(self.file_path, encoding='utf-8') as file: lines = file.readlines() for i, line in enumerate(lines): line = line.strip() if not line: continue if speaker_name is None: speaker_name, _ = line.split(':', 1) elif message is None: message = line if speaker_name not in participants: participants[speaker_name] = Person(speaker_name, None) speaker = participants[speaker_name] turn = Turn(speaker, message) self.add_turn(turn) # Reset speaker_name and message for the next turn speaker_name = None message = None def display(self): for turn in self.turns: print(f"{turn.speaker.name}: {turn.message}") def export_to_file(self, file_path): with open(file_path, 'w', encoding='utf-8') as file: for turn in self.turns: file.write(f"{turn.speaker.name}: {turn.message}\n") def to_dict(self): dialogue_dict = {"turns": []} for turn in self.turns: turn_dict = { "speaker": turn.speaker.name, "message": turn.message } dialogue_dict["turns"].append(turn_dict) return dialogue_dict def to_json(self): dialogue_dict = self.to_dict() return json.dumps(dialogue_dict, ensure_ascii=False, indent=2) def participants_to_export(self): """ participants_to_export :return: """ participants = set() for turn in self.turns: participants.add(turn.speaker.name) return ', '.join(participants) class Turn: def __init__(self, speaker, message): self.speaker = speaker self.message = message class DialogueLoader(BaseLoader, ABC): """Load dialogue.""" def __init__(self, file_path: str): """Initialize with dialogue.""" self.file_path = file_path dialogue = Dialogue(file_path=file_path) dialogue.parse_dialogue() self.dialogue = dialogue def load(self) -> List[Document]: """Load from dialogue.""" documents = [] participants = self.dialogue.participants_to_export() for turn in self.dialogue.turns: metadata = {"source": f"Dialogue File:{self.dialogue.file_path}," f"speaker:{turn.speaker.name}," f"participant:{participants}"} turn_document = Document(page_content=turn.message, metadata=metadata.copy()) documents.append(turn_document) return documents