File size: 4,455 Bytes
af9251e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import json
from abc import ABC
from typing import List
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader


class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age


class Dialogue:
    """
    Build an abstract dialogue model using classes and methods to represent different dialogue elements.
    This class serves as a fundamental framework for constructing dialogue models.
    """

    def __init__(self, file_path: str):
        self.file_path = file_path
        self.turns = []

    def add_turn(self, turn):
        """
        Create an instance of a conversation participant
        :param turn:
        :return:
        """
        self.turns.append(turn)

    def parse_dialogue(self):
        """
        The parse_dialogue function reads the specified dialogue file and parses each dialogue turn line by line.
        For each turn, the function extracts the name of the speaker and the message content from the text,
        creating a Turn instance. If the speaker is not already present in the participants dictionary,
        a new Person instance is created. Finally, the parsed Turn instance is added to the Dialogue object.

        Please note that this sample code assumes that each line in the file follows a specific format:
        <speaker>:\r\n<message>\r\n\r\n. If your file has a different format or includes other metadata,
         you may need to adjust the parsing logic accordingly.
        """
        participants = {}
        speaker_name = None
        message = None

        with open(self.file_path, encoding='utf-8') as file:
            lines = file.readlines()
            for i, line in enumerate(lines):
                line = line.strip()
                if not line:
                    continue

                if speaker_name is None:
                    speaker_name, _ = line.split(':', 1)
                elif message is None:
                    message = line
                    if speaker_name not in participants:
                        participants[speaker_name] = Person(speaker_name, None)

                    speaker = participants[speaker_name]
                    turn = Turn(speaker, message)
                    self.add_turn(turn)

                    # Reset speaker_name and message for the next turn
                    speaker_name = None
                    message = None

    def display(self):
        for turn in self.turns:
            print(f"{turn.speaker.name}: {turn.message}")

    def export_to_file(self, file_path):
        with open(file_path, 'w', encoding='utf-8') as file:
            for turn in self.turns:
                file.write(f"{turn.speaker.name}: {turn.message}\n")

    def to_dict(self):
        dialogue_dict = {"turns": []}
        for turn in self.turns:
            turn_dict = {
                "speaker": turn.speaker.name,
                "message": turn.message
            }
            dialogue_dict["turns"].append(turn_dict)
        return dialogue_dict

    def to_json(self):
        dialogue_dict = self.to_dict()
        return json.dumps(dialogue_dict, ensure_ascii=False, indent=2)

    def participants_to_export(self):
        """
        participants_to_export
        :return:
        """
        participants = set()
        for turn in self.turns:
            participants.add(turn.speaker.name)
        return ', '.join(participants)


class Turn:
    def __init__(self, speaker, message):
        self.speaker = speaker
        self.message = message


class DialogueLoader(BaseLoader, ABC):
    """Load dialogue."""

    def __init__(self, file_path: str):
        """Initialize with dialogue."""
        self.file_path = file_path
        dialogue = Dialogue(file_path=file_path)
        dialogue.parse_dialogue()
        self.dialogue = dialogue

    def load(self) -> List[Document]:
        """Load from dialogue."""
        documents = []
        participants = self.dialogue.participants_to_export()

        for turn in self.dialogue.turns:
            metadata = {"source": f"Dialogue File:{self.dialogue.file_path},"
                                  f"speaker:{turn.speaker.name},"
                                  f"participant:{participants}"}
            turn_document = Document(page_content=turn.message, metadata=metadata.copy())
            documents.append(turn_document)

        return documents