navalnica commited on
Commit
1e32511
·
1 Parent(s): 659f5b7

improve split prompt

Browse files
data/samples_to_split.py CHANGED
@@ -25,6 +25,37 @@ bed, and lay down stiffly—was instantly asleep.
25
  """
26
 
27
  GATSBY_2 = """\
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  “If you’ll get up.”
29
 
30
  “I will. Good night, Mr. Carraway. See you anon.”
@@ -50,4 +81,18 @@ of weekends out here this summer. I think the home influence will be
50
  very good for her.”
51
 
52
  Daisy and Tom looked at each other for a moment in silence.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  """
 
25
  """
26
 
27
  GATSBY_2 = """\
28
+ Inside, the crimson room bloomed with light. Tom and Miss Baker sat at
29
+ either end of the long couch and she read aloud to him from the
30
+ Saturday Evening Post—the words, murmurous and uninflected, running
31
+ together in a soothing tune. The lamplight, bright on his boots and
32
+ dull on the autumn-leaf yellow of her hair, glinted along the paper as
33
+ she turned a page with a flutter of slender muscles in her arms.
34
+
35
+ When we came in she held us silent for a moment with a lifted hand.
36
+
37
+ “To be continued,” she said, tossing the magazine on the table, “in
38
+ our very next issue.”
39
+
40
+ Her body asserted itself with a restless movement of her knee, and she
41
+ stood up.
42
+
43
+ “Ten o’clock,” she remarked, apparently finding the time on the
44
+ ceiling. “Time for this good girl to go to bed.”
45
+
46
+ “Jordan’s going to play in the tournament tomorrow,” explained Daisy,
47
+ “over at Westchester.”
48
+
49
+ “Oh—you’re Jordan Baker.”
50
+
51
+ I knew now why her face was familiar—its pleasing contemptuous
52
+ expression had looked out at me from many rotogravure pictures of the
53
+ sporting life at Asheville and Hot Springs and Palm Beach. I had heard
54
+ some story of her too, a critical, unpleasant story, but what it was I
55
+ had forgotten long ago.
56
+
57
+ “Good night,” she said softly. “Wake me at eight, won’t you.”
58
+
59
  “If you’ll get up.”
60
 
61
  “I will. Good night, Mr. Carraway. See you anon.”
 
81
  very good for her.”
82
 
83
  Daisy and Tom looked at each other for a moment in silence.
84
+
85
+ “Is she from New York?” I asked quickly.
86
+
87
+ “From Louisville. Our white girlhood was passed together there. Our
88
+ beautiful white—”
89
+
90
+ “Did you give Nick a little heart to heart talk on the veranda?”
91
+ demanded Tom suddenly.
92
+
93
+ “Did I?” She looked at me. “I can’t seem to remember, but I think we
94
+ talked about the Nordic race. Yes, I’m sure we did. It sort of crept
95
+ up on us and first thing you know—”
96
+
97
+ “Don’t believe everything you hear, Nick,” he advised me.
98
  """
pg.ipynb CHANGED
@@ -50,86 +50,125 @@
50
  "outputs": [],
51
  "source": [
52
  "import data.samples_to_split as samples\n",
53
- "from utils import GPTModels, create_split_text_chain"
 
54
  ]
55
  },
56
  {
57
  "cell_type": "code",
58
- "execution_count": 5,
59
  "metadata": {},
60
- "outputs": [
61
- {
62
- "name": "stdout",
63
- "output_type": "stream",
64
- "text": [
65
- "characters: ['narrator', 'Mr. Carraway', 'Daisy', 'Miss Baker', 'Tom', 'Nick']\n",
66
- "[narrator] “If you’ll get up.”\n",
67
- "[Mr. Carraway] “I will. Good night, Mr. Carraway. See you anon.”\n",
68
- "[Daisy] “Of course you will,” confirmed Daisy. “In fact I think I’ll arrange a marriage. Come over often, Nick, and I’ll sort of—oh—fling you together. You know—lock you up accidentally in linen closets and push you out to sea in a boat, and all that sort of thing—”\n",
69
- "[Miss Baker] “Good night,” called Miss Baker from the stairs. “I haven’t heard a word.”\n",
70
- "[Tom] “She’s a nice girl,” said Tom after a moment. “They oughtn’t to let her run around the country this way.”\n",
71
- "[Daisy] “Who oughtn’t to?” inquired Daisy coldly.\n",
72
- "[narrator] “Her family.”\n",
73
- "[narrator] “Her family is one aunt about a thousand years old. Besides, Nick’s going to look after her, aren’t you, Nick? She’s going to spend lots of weekends out here this summer. I think the home influence will be very good for her.”\n",
74
- "[narrator] Daisy and Tom looked at each other for a moment in silence.\n"
75
- ]
76
- }
77
- ],
78
  "source": [
79
- "chain = create_split_text_chain(llm_model=GPTModels.GPT_4_TURBO_2024_04_09)\n",
 
80
  "with get_openai_callback() as cb:\n",
81
- " res = chain.invoke({'text': samples.GATSBY_2})\n",
82
- "print(res.to_pretty_text())"
83
  ]
84
  },
85
  {
86
  "cell_type": "code",
87
- "execution_count": 6,
88
  "metadata": {},
89
  "outputs": [
90
  {
91
  "data": {
92
  "text/plain": [
93
- "Tokens Used: 1579\n",
94
- "\tPrompt Tokens: 1253\n",
95
- "\tCompletion Tokens: 326\n",
96
- "Successful Requests: 1\n",
97
- "Total Cost (USD): $0.02231"
98
  ]
99
  },
100
- "execution_count": 6,
101
  "metadata": {},
102
  "output_type": "execute_result"
103
  }
104
  ],
105
  "source": [
106
- "cb"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  ]
108
  },
109
  {
110
  "cell_type": "code",
111
- "execution_count": 7,
112
  "metadata": {},
113
  "outputs": [
114
  {
115
  "name": "stdout",
116
  "output_type": "stream",
117
  "text": [
118
- "characters: ['narrator', 'Mr. Carraway', 'Daisy', 'Miss Baker', 'Tom', 'Nick']\n",
119
- "[narrator] “If you’ll get up.”\n",
120
- "[Mr. Carraway] “I will. Good night, Mr. Carraway. See you anon.”\n",
121
- "[Daisy] “Of course you will,” confirmed Daisy. “In fact I think I’ll arrange a marriage. Come over often, Nick, and I’ll sort of—oh—fling you together. You know—lock you up accidentally in linen closets and push you out to sea in a boat, and all that sort of thing—”\n",
122
- "[Miss Baker] “Good night,” called Miss Baker from the stairs. “I haven’t heard a word.”\n",
123
- "[Tom] “She’s a nice girl,” said Tom after a moment. “They oughtn’t to let her run around the country this way.”\n",
124
- "[Daisy] “Who oughtn’t to?” inquired Daisy coldly.\n",
125
- "[narrator] “Her family.”\n",
126
- "[narrator] “Her family is one aunt about a thousand years old. Besides, Nick’s going to look after her, aren’t you, Nick? She’s going to spend lots of weekends out here this summer. I think the home influence will be very good for her.”\n",
127
- "[narrator] Daisy and Tom looked at each other for a moment in silence.\n"
128
  ]
129
  }
130
  ],
131
  "source": [
132
- "print(res.to_pretty_text())"
133
  ]
134
  },
135
  {
 
50
  "outputs": [],
51
  "source": [
52
  "import data.samples_to_split as samples\n",
53
+ "from src.utils import GPTModels\n",
54
+ "from src.text_split_chain import create_split_text_chain_v2"
55
  ]
56
  },
57
  {
58
  "cell_type": "code",
59
+ "execution_count": 4,
60
  "metadata": {},
61
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  "source": [
63
+ "chain = create_split_text_chain_v2(llm_model=GPTModels.GPT_4o)\n",
64
+ "# chain = create_split_text_chain_v2(llm_model=GPTModels.GPT_4_TURBO_2024_04_09)\n",
65
  "with get_openai_callback() as cb:\n",
66
+ " res = chain.invoke({'text': samples.GATSBY_2})"
 
67
  ]
68
  },
69
  {
70
  "cell_type": "code",
71
+ "execution_count": 5,
72
  "metadata": {},
73
  "outputs": [
74
  {
75
  "data": {
76
  "text/plain": [
77
+ "SplitTextOutputV2(text_raw='<narrator>Inside, the crimson room bloomed with light. Tom and Miss Baker sat at either end of the long couch and she read aloud to him from the Saturday Evening Post—the words, murmurous and uninflected, running together in a soothing tune. The lamplight, bright on his boots and dull on the autumn-leaf yellow of her hair, glinted along the paper as she turned a page with a flutter of slender muscles in her arms.</narrator>\\n\\n<narrator>When we came in she held us silent for a moment with a lifted hand.</narrator>\\n\\n<Jordan>“To be continued,”</Jordan> <narrator>she said, tossing the magazine on the table,</narrator> <Jordan>“in our very next issue.”</Jordan>\\n\\n<narrator>Her body asserted itself with a restless movement of her knee, and she stood up.</narrator>\\n\\n<Jordan>“Ten o’clock,”</Jordan> <narrator>she remarked, apparently finding the time on the ceiling.</narrator> <Jordan>“Time for this good girl to go to bed.”</Jordan>\\n\\n<Daisy>“Jordan’s going to play in the tournament tomorrow,”</Daisy> <narrator>explained Daisy,</narrator> <Daisy>“over at Westchester.”</Daisy>\\n\\n<narrator>“Oh—you’re Jordan Baker.”</narrator>\\n\\n<narrator>I knew now why her face was familiar—its pleasing contemptuous expression had looked out at me from many rotogravure pictures of the sporting life at Asheville and Hot Springs and Palm Beach. I had heard some story of her too, a critical, unpleasant story, but what it was I had forgotten long ago.</narrator>\\n\\n<Jordan>“Good night,”</Jordan> <narrator>she said softly.</narrator> <Jordan>“Wake me at eight, won’t you.”</Jordan>\\n\\n<c1>“If you’ll get up.”</c1>\\n\\n<Jordan>“I will. Good night, Mr. Carraway. See you anon.”</Jordan>\\n\\n<Daisy>“Of course you will,”</Daisy> <narrator>confirmed Daisy.</narrator> <Daisy>“In fact I think I’ll arrange a marriage. Come over often, Nick, and I’ll sort of—oh—fling you together. You know—lock you up accidentally in linen closets and push you out to sea in a boat, and all that sort of thing—”</Daisy>\\n\\n<Jordan>“Good night,”</Jordan> <narrator>called Miss Baker from the stairs.</narrator> <Jordan>“I haven’t heard a word.”</Jordan>\\n\\n<Tom>“She’s a nice girl,”</Tom> <narrator>said Tom after a moment.</narrator> <Tom>“They oughtn’t to let her run around the country this way.”</Tom>\\n\\n<Daisy>“Who oughtn’t to?”</Daisy> <narrator>inquired Daisy coldly.</narrator>\\n\\n<Tom>“Her family.”</Tom>\\n\\n<Daisy>“Her family is one aunt about a thousand years old. Besides, Nick’s going to look after her, aren’t you, Nick? She’s going to spend lots of weekends out here this summer. I think the home influence will be very good for her.”</Daisy>\\n\\n<narrator>Daisy and Tom looked at each other for a moment in silence.</narrator>\\n\\n<narrator>“Is she from New York?”</narrator> <narrator>I asked quickly.</narrator>\\n\\n<Daisy>“From Louisville. Our white girlhood was passed together there. Our beautiful white—”</Daisy>\\n\\n<Tom>“Did you give Nick a little heart to heart talk on the veranda?”</Tom> <narrator>demanded Tom suddenly.</narrator>\\n\\n<Daisy>“Did I?”</Daisy> <narrator>She looked at me.</narrator> <Daisy>“I can’t seem to remember, but I think we talked about the Nordic race. Yes, I’m sure we did. It sort of crept up on us and first thing you know—”</Daisy>\\n\\n<Tom>“Don’t believe everything you hear, Nick,”</Tom> <narrator>he advised me.</narrator>')"
 
 
 
 
78
  ]
79
  },
80
+ "execution_count": 5,
81
  "metadata": {},
82
  "output_type": "execute_result"
83
  }
84
  ],
85
  "source": [
86
+ "res"
87
+ ]
88
+ },
89
+ {
90
+ "cell_type": "code",
91
+ "execution_count": 6,
92
+ "metadata": {},
93
+ "outputs": [
94
+ {
95
+ "name": "stdout",
96
+ "output_type": "stream",
97
+ "text": [
98
+ "characters: ['c1', 'Daisy', 'Jordan', 'Tom', 'narrator']\n",
99
+ "--------------------\n",
100
+ "[narrator] Inside, the crimson room bloomed with light. Tom and Miss Baker sat at either end of the long couch and she read aloud to him from the Saturday Evening Post—the words, murmurous and uninflected, running together in a soothing tune. The lamplight, bright on his boots and dull on the autumn-leaf yellow of her hair, glinted along the paper as she turned a page with a flutter of slender muscles in her arms.\n",
101
+ "[narrator] When we came in she held us silent for a moment with a lifted hand.\n",
102
+ "[Jordan] “To be continued,”\n",
103
+ "[narrator] she said, tossing the magazine on the table,\n",
104
+ "[Jordan] “in our very next issue.”\n",
105
+ "[narrator] Her body asserted itself with a restless movement of her knee, and she stood up.\n",
106
+ "[Jordan] “Ten o’clock,”\n",
107
+ "[narrator] she remarked, apparently finding the time on the ceiling.\n",
108
+ "[Jordan] “Time for this good girl to go to bed.”\n",
109
+ "[Daisy] “Jordan’s going to play in the tournament tomorrow,”\n",
110
+ "[narrator] explained Daisy,\n",
111
+ "[Daisy] “over at Westchester.”\n",
112
+ "[narrator] “Oh—you’re Jordan Baker.”\n",
113
+ "[narrator] I knew now why her face was familiar—its pleasing contemptuous expression had looked out at me from many rotogravure pictures of the sporting life at Asheville and Hot Springs and Palm Beach. I had heard some story of her too, a critical, unpleasant story, but what it was I had forgotten long ago.\n",
114
+ "[Jordan] “Good night,”\n",
115
+ "[narrator] she said softly.\n",
116
+ "[Jordan] “Wake me at eight, won’t you.”\n",
117
+ "[c1] “If you’ll get up.”\n",
118
+ "[Jordan] “I will. Good night, Mr. Carraway. See you anon.”\n",
119
+ "[Daisy] “Of course you will,”\n",
120
+ "[narrator] confirmed Daisy.\n",
121
+ "[Daisy] “In fact I think I’ll arrange a marriage. Come over often, Nick, and I’ll sort of—oh—fling you together. You know—lock you up accidentally in linen closets and push you out to sea in a boat, and all that sort of thing—”\n",
122
+ "[Jordan] “Good night,”\n",
123
+ "[narrator] called Miss Baker from the stairs.\n",
124
+ "[Jordan] “I haven’t heard a word.”\n",
125
+ "[Tom] “She’s a nice girl,”\n",
126
+ "[narrator] said Tom after a moment.\n",
127
+ "[Tom] “They oughtn’t to let her run around the country this way.”\n",
128
+ "[Daisy] “Who oughtn’t to?”\n",
129
+ "[narrator] inquired Daisy coldly.\n",
130
+ "[Tom] “Her family.”\n",
131
+ "[Daisy] “Her family is one aunt about a thousand years old. Besides, Nick’s going to look after her, aren’t you, Nick? She’s going to spend lots of weekends out here this summer. I think the home influence will be very good for her.”\n",
132
+ "[narrator] Daisy and Tom looked at each other for a moment in silence.\n",
133
+ "[narrator] “Is she from New York?”\n",
134
+ "[narrator] I asked quickly.\n",
135
+ "[Daisy] “From Louisville. Our white girlhood was passed together there. Our beautiful white—”\n",
136
+ "[Tom] “Did you give Nick a little heart to heart talk on the veranda?”\n",
137
+ "[narrator] demanded Tom suddenly.\n",
138
+ "[Daisy] “Did I?”\n",
139
+ "[narrator] She looked at me.\n",
140
+ "[Daisy] “I can’t seem to remember, but I think we talked about the Nordic race. Yes, I’m sure we did. It sort of crept up on us and first thing you know—”\n",
141
+ "[Tom] “Don’t believe everything you hear, Nick,”\n",
142
+ "[narrator] he advised me.\n"
143
+ ]
144
+ }
145
+ ],
146
+ "source": [
147
+ "annotated_text = res.to_character_annotated_text()\n",
148
+ "print(annotated_text.to_pretty_text())"
149
  ]
150
  },
151
  {
152
  "cell_type": "code",
153
+ "execution_count": 9,
154
  "metadata": {},
155
  "outputs": [
156
  {
157
  "name": "stdout",
158
  "output_type": "stream",
159
  "text": [
160
+ "LLM usage:\n",
161
+ "\n",
162
+ "Tokens Used: 1817\n",
163
+ "\tPrompt Tokens: 877\n",
164
+ "\tCompletion Tokens: 940\n",
165
+ "Successful Requests: 1\n",
166
+ "Total Cost (USD): $0.0115925\n"
 
 
 
167
  ]
168
  }
169
  ],
170
  "source": [
171
+ "print(f'LLM usage:\\n\\n{cb}')"
172
  ]
173
  },
174
  {
readme.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### TODO
2
+
3
+ - [ ] prepare text for TTS
4
+ - [x] prepare prompt to split text into character phrases
5
+ - [ ] split large text in batches, process each batch separatelly, concat batches
6
+ - [ ] try to identify unknown characters
7
+ - [ ] select voices for TTS
8
+ - [ ] map characters to available voices
9
+ - [ ] use LLM to recognize characters for a given text and provide descriptions
10
+ detailed enough to select appropriate voice
11
+ - [ ] run TTS to create narration
12
+ - [ ] add effects. mix them with created narration
prompts.py → src/prompts.py RENAMED
@@ -1,4 +1,4 @@
1
- class SplitTextPrompt:
2
  SYSTEM = """\
3
  You are a helpful assistant proficient in literature and language.
4
  Imagine you are helping to prepare the provided text for narration to create the audio book.
@@ -37,7 +37,9 @@ Format your answer as a following JSON:
37
 
38
  Ensure the order of the parts in the JSON output matches the original order of the text.
39
 
40
- Example of text split by characters, already in the target format.
 
 
41
  {{
42
  "characters": ["Mr. Gatz", "narrator"],
43
  "parts":
@@ -53,6 +55,54 @@ Example of text split by characters, already in the target format.
53
  {{"character": "Mr. Gatz", "text": "He fumbled at the embroidered coverlet, trying to take it from the bed, and lay down stiffly—was instantly asleep."}},
54
  ]
55
  }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  """
57
 
58
  USER = """\
 
1
+ class SplitTextPromptV1:
2
  SYSTEM = """\
3
  You are a helpful assistant proficient in literature and language.
4
  Imagine you are helping to prepare the provided text for narration to create the audio book.
 
37
 
38
  Ensure the order of the parts in the JSON output matches the original order of the text.
39
 
40
+ Examples of text split by characters, already in the target format.
41
+
42
+ Example 1.
43
  {{
44
  "characters": ["Mr. Gatz", "narrator"],
45
  "parts":
 
55
  {{"character": "Mr. Gatz", "text": "He fumbled at the embroidered coverlet, trying to take it from the bed, and lay down stiffly—was instantly asleep."}},
56
  ]
57
  }}
58
+
59
+ Example 2.
60
+ {{
61
+ 'characters': [
62
+ 'narrator',
63
+ 'Mr. Carraway',
64
+ 'Daisy',
65
+ 'Miss Baker',
66
+ 'Tom',
67
+ 'Nick'
68
+ ],
69
+ 'parts': [
70
+ {{'character': 'narrator', 'text': '“If you’ll get up.”'}},
71
+ {{'character': 'Mr. Carraway', 'text': '“I will. Good night, Mr. Carraway. See you anon.”'}},
72
+ {{'character': 'Daisy', 'text': '“Of course you will,” confirmed Daisy. “In fact I think I’ll arrange a marriage. Come over often, Nick, and I’ll sort of—oh—fling you together. You know—lock you up accidentally in linen closets and push you out to sea in a boat, and all that sort of thing—”'}},
73
+ {{'character': 'Miss Baker', 'text': '“Good night,” called Miss Baker from the stairs. “I haven’t heard a word.”'}},
74
+ {{'character': 'Tom', 'text': '“She’s a nice girl,” said Tom after a moment. “They oughtn’t to let her run around the country this way.”'}},
75
+ {{'character': 'Daisy', 'text': '“Who oughtn’t to?” inquired Daisy coldly.'}},
76
+ {{'character': 'narrator', 'text': '“Her family.”'}},
77
+ {{'character': 'narrator', 'text': '“Her family is one aunt about a thousand years old. Besides, Nick’s going to look after her, aren’t you, Nick? She’s going to spend lots of weekends out here this summer. I think the home influence will be very good for her.”'}},
78
+ {{'character': 'narrator', 'text': 'Daisy and Tom looked at each other for a moment in silence.'}}
79
+ ]
80
+ }}
81
+ """
82
+
83
+ USER = """\
84
+ Here is the book sample:
85
+ ---
86
+ {text}"""
87
+
88
+
89
+ class SplitTextPromptV2:
90
+ SYSTEM = """\
91
+ you are provided with the book sample.
92
+ please rewrite it and insert xml tags indicating character to whom current phrase belongs.
93
+ for example: <narrator>I looked at her</narrator><Jill>What are you looking at?</Jill>
94
+
95
+ Notes:
96
+ - sometimes narrator is one of characters taking part in the action.
97
+ in this case use narrator's name (if available) instead of "narrator"
98
+ - if it's impossible to identify character name from the text provided, use codes "c1", "c2", etc,
99
+ where "c" prefix means character and number is used to enumerate unknown characters
100
+ - all quotes of direct speech must be attributed to characters, for example:
101
+ <Tom>“She’s a nice girl,”</Tom><narrator>said Tom after a moment.</narrator>
102
+ mind that sometimes narrator could also be a character.
103
+ - use ALL available context to determine the character.
104
+ sometimes the character name becomes clear from the following phrases
105
+ - DO NOT include in your response anything except for the original text with character xml tags!!!
106
  """
107
 
108
  USER = """\
src/text_split_chain.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ from langchain_core.output_parsers import StrOutputParser
4
+ from langchain_core.prompts import (
5
+ ChatPromptTemplate,
6
+ HumanMessagePromptTemplate,
7
+ SystemMessagePromptTemplate,
8
+ )
9
+ from pydantic import BaseModel
10
+
11
+ from src.prompts import SplitTextPromptV1, SplitTextPromptV2
12
+ from src.utils import GPTModels, get_chat_llm
13
+
14
+
15
+ class CharacterPhrase(BaseModel):
16
+ character: str
17
+ text: str
18
+
19
+
20
+ class CharacterAnnotatedText(BaseModel):
21
+ phrases: list[CharacterPhrase]
22
+ _characters: list[str]
23
+
24
+ def __init__(self, **data):
25
+ super().__init__(**data)
26
+ self._characters = list(set(phrase.character for phrase in self.phrases))
27
+
28
+ @property
29
+ def characters(self):
30
+ return self._characters
31
+
32
+ def to_pretty_text(self):
33
+ lines = []
34
+ lines.append(f"characters: {self.characters}")
35
+ lines.append("-" * 20)
36
+ lines.extend(f"[{phrase.character}] {phrase.text}" for phrase in self.phrases)
37
+ res = "\n".join(lines)
38
+ return res
39
+
40
+
41
+ class SplitTextOutputV1(BaseModel):
42
+ characters: list[str]
43
+ parts: list[CharacterPhrase]
44
+
45
+ def to_character_annotated_text(self):
46
+ return CharacterAnnotatedText(phrases=self.parts)
47
+
48
+
49
+ def create_split_text_chain_v1(llm_model: GPTModels):
50
+ llm = get_chat_llm(llm_model=llm_model, temperature=0.0)
51
+ llm = llm.with_structured_output(SplitTextOutputV1)
52
+
53
+ prompt = ChatPromptTemplate.from_messages(
54
+ [
55
+ SystemMessagePromptTemplate.from_template(SplitTextPromptV1.SYSTEM),
56
+ HumanMessagePromptTemplate.from_template(SplitTextPromptV1.USER),
57
+ ]
58
+ )
59
+
60
+ chain = prompt | llm
61
+ return chain
62
+
63
+
64
+ class SplitTextOutputV2(BaseModel):
65
+ text_raw: str
66
+ _phrases: list[CharacterPhrase]
67
+
68
+ @staticmethod
69
+ def _parse_phrases_from_xml_tags(text):
70
+ """
71
+ we rely on LLM to format response correctly.
72
+ so we don't check that opening xml tags match closing ones
73
+ """
74
+ pattern = re.compile(r"(?:<([^<>]+)>)(.*?)(?:</\1>)")
75
+ res = pattern.findall(text)
76
+ res = [CharacterPhrase(character=x[0], text=x[1]) for x in res]
77
+ return res
78
+
79
+ def __init__(self, **data):
80
+ super().__init__(**data)
81
+ self._phrases = self._parse_phrases_from_xml_tags(self.text_raw)
82
+
83
+ @property
84
+ def phrases(self):
85
+ return self._phrases
86
+
87
+ def to_character_annotated_text(self):
88
+ return CharacterAnnotatedText(phrases=self.phrases)
89
+
90
+
91
+ def create_split_text_chain_v2(llm_model: GPTModels):
92
+ llm = get_chat_llm(llm_model=llm_model, temperature=0.0)
93
+
94
+ prompt = ChatPromptTemplate.from_messages(
95
+ [
96
+ SystemMessagePromptTemplate.from_template(SplitTextPromptV2.SYSTEM),
97
+ HumanMessagePromptTemplate.from_template(SplitTextPromptV2.USER),
98
+ ]
99
+ )
100
+
101
+ chain = prompt | llm | StrOutputParser() | (lambda x: SplitTextOutputV2(text_raw=x))
102
+ return chain
src/utils.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import StrEnum
2
+
3
+ from httpx import Timeout
4
+ from langchain_openai import ChatOpenAI
5
+
6
+
7
+ class GPTModels(StrEnum):
8
+ GPT_4o = "gpt-4o"
9
+ GPT_4o_MINI = "gpt-4o-mini"
10
+ GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09"
11
+
12
+
13
+ def get_chat_llm(llm_model: GPTModels, temperature=0.0):
14
+ llm = ChatOpenAI(
15
+ model=llm_model, temperature=temperature, timeout=Timeout(60, connect=4)
16
+ )
17
+ return llm
utils.py DELETED
@@ -1,49 +0,0 @@
1
- from enum import StrEnum
2
-
3
- from httpx import Timeout
4
- from langchain_core.prompts import (
5
- ChatPromptTemplate,
6
- HumanMessagePromptTemplate,
7
- SystemMessagePromptTemplate,
8
- )
9
- from langchain_openai import ChatOpenAI
10
- from pydantic import BaseModel
11
-
12
- from prompts import SplitTextPrompt
13
-
14
-
15
- class GPTModels(StrEnum):
16
- GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09"
17
- GPT_4o_MINI = "gpt-4o-mini"
18
-
19
-
20
- class TextPart(BaseModel):
21
- character: str
22
- text: str
23
-
24
-
25
- class SplitTextOutput(BaseModel):
26
- characters: list[str]
27
- parts: list[TextPart]
28
-
29
- def to_pretty_text(self):
30
- lines = []
31
- lines.append(f"characters: {self.characters}")
32
- lines.extend(f"[{part.character}] {part.text}" for part in self.parts)
33
- res = "\n".join(lines)
34
- return res
35
-
36
-
37
- def create_split_text_chain(llm_model: GPTModels):
38
- llm = ChatOpenAI(model=llm_model, temperature=0.0, timeout=Timeout(60, connect=4))
39
- llm = llm.with_structured_output(SplitTextOutput)
40
-
41
- prompt = ChatPromptTemplate.from_messages(
42
- [
43
- SystemMessagePromptTemplate.from_template(SplitTextPrompt.SYSTEM),
44
- HumanMessagePromptTemplate.from_template(SplitTextPrompt.USER),
45
- ]
46
- )
47
-
48
- chain = prompt | llm
49
- return chain