Upload 78 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- pipecat/__init__.py +0 -0
- pipecat/frames/__init__.py +0 -0
- pipecat/frames/frames.proto +43 -0
- pipecat/frames/frames.py +340 -0
- pipecat/frames/protobufs/frames_pb2.py +32 -0
- pipecat/pipeline/__init__.py +0 -0
- pipecat/pipeline/base_pipeline.py +21 -0
- pipecat/pipeline/merge_pipeline.py +24 -0
- pipecat/pipeline/parallel_pipeline.py +154 -0
- pipecat/pipeline/parallel_task.py +119 -0
- pipecat/pipeline/pipeline.py +95 -0
- pipecat/pipeline/runner.py +58 -0
- pipecat/pipeline/task.py +142 -0
- pipecat/processors/__init__.py +0 -0
- pipecat/processors/aggregators/__init__.py +0 -0
- pipecat/processors/aggregators/gated.py +74 -0
- pipecat/processors/aggregators/llm_response.py +266 -0
- pipecat/processors/aggregators/openai_llm_context.py +114 -0
- pipecat/processors/aggregators/sentence.py +54 -0
- pipecat/processors/aggregators/user_response.py +156 -0
- pipecat/processors/aggregators/vision_image_frame.py +47 -0
- pipecat/processors/async_frame_processor.py +63 -0
- pipecat/processors/filters/__init__.py +0 -0
- pipecat/processors/filters/frame_filter.py +36 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/wake_check_filter.py +86 -0
- pipecat/processors/frame_processor.py +162 -0
- pipecat/processors/frameworks/__init__.py +0 -0
- pipecat/processors/frameworks/langchain.py +80 -0
- pipecat/processors/logger.py +27 -0
- pipecat/processors/text_transformer.py +38 -0
- pipecat/serializers/__init__.py +0 -0
- pipecat/serializers/base_serializer.py +20 -0
- pipecat/serializers/protobuf.py +92 -0
- pipecat/serializers/twilio.py +52 -0
- pipecat/services/__init__.py +0 -0
- pipecat/services/ai_services.py +300 -0
- pipecat/services/anthropic.py +145 -0
- pipecat/services/azure.py +233 -0
- pipecat/services/cartesia.py +65 -0
- pipecat/services/deepgram.py +149 -0
- pipecat/services/elevenlabs.py +66 -0
- pipecat/services/fal.py +83 -0
- pipecat/services/fireworks.py +25 -0
- pipecat/services/google.py +129 -0
- pipecat/services/moondream.py +92 -0
- pipecat/services/ollama.py +13 -0
- pipecat/services/openai.py +338 -0
- pipecat/services/openpipe.py +71 -0
- pipecat/services/playht.py +83 -0
pipecat/__init__.py
ADDED
File without changes
|
pipecat/frames/__init__.py
ADDED
File without changes
|
pipecat/frames/frames.proto
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//
|
2 |
+
// Copyright (c) 2024, Daily
|
3 |
+
//
|
4 |
+
// SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
//
|
6 |
+
|
7 |
+
// Generate frames_pb2.py with:
|
8 |
+
//
|
9 |
+
// python -m grpc_tools.protoc --proto_path=./ --python_out=./protobufs frames.proto
|
10 |
+
|
11 |
+
syntax = "proto3";
|
12 |
+
|
13 |
+
package pipecat;
|
14 |
+
|
15 |
+
message TextFrame {
|
16 |
+
uint64 id = 1;
|
17 |
+
string name = 2;
|
18 |
+
string text = 3;
|
19 |
+
}
|
20 |
+
|
21 |
+
message AudioRawFrame {
|
22 |
+
uint64 id = 1;
|
23 |
+
string name = 2;
|
24 |
+
bytes audio = 3;
|
25 |
+
uint32 sample_rate = 4;
|
26 |
+
uint32 num_channels = 5;
|
27 |
+
}
|
28 |
+
|
29 |
+
message TranscriptionFrame {
|
30 |
+
uint64 id = 1;
|
31 |
+
string name = 2;
|
32 |
+
string text = 3;
|
33 |
+
string user_id = 4;
|
34 |
+
string timestamp = 5;
|
35 |
+
}
|
36 |
+
|
37 |
+
message Frame {
|
38 |
+
oneof frame {
|
39 |
+
TextFrame text = 1;
|
40 |
+
AudioRawFrame audio = 2;
|
41 |
+
TranscriptionFrame transcription = 3;
|
42 |
+
}
|
43 |
+
}
|
pipecat/frames/frames.py
ADDED
@@ -0,0 +1,340 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from typing import Any, List, Mapping, Tuple
|
8 |
+
|
9 |
+
from dataclasses import dataclass, field
|
10 |
+
|
11 |
+
from pipecat.utils.utils import obj_count, obj_id
|
12 |
+
|
13 |
+
|
14 |
+
@dataclass
|
15 |
+
class Frame:
|
16 |
+
id: int = field(init=False)
|
17 |
+
name: str = field(init=False)
|
18 |
+
|
19 |
+
def __post_init__(self):
|
20 |
+
self.id: int = obj_id()
|
21 |
+
self.name: str = f"{self.__class__.__name__}#{obj_count(self)}"
|
22 |
+
|
23 |
+
def __str__(self):
|
24 |
+
return self.name
|
25 |
+
|
26 |
+
|
27 |
+
@dataclass
|
28 |
+
class DataFrame(Frame):
|
29 |
+
pass
|
30 |
+
|
31 |
+
|
32 |
+
@dataclass
|
33 |
+
class AudioRawFrame(DataFrame):
|
34 |
+
"""A chunk of audio. Will be played by the transport if the transport's
|
35 |
+
microphone has been enabled.
|
36 |
+
|
37 |
+
"""
|
38 |
+
audio: bytes
|
39 |
+
sample_rate: int
|
40 |
+
num_channels: int
|
41 |
+
|
42 |
+
def __post_init__(self):
|
43 |
+
super().__post_init__()
|
44 |
+
self.num_frames = int(len(self.audio) / (self.num_channels * 2))
|
45 |
+
|
46 |
+
def __str__(self):
|
47 |
+
return f"{self.name}(size: {len(self.audio)}, frames: {self.num_frames}, sample_rate: {self.sample_rate}, channels: {self.num_channels})"
|
48 |
+
|
49 |
+
|
50 |
+
@dataclass
|
51 |
+
class ImageRawFrame(DataFrame):
|
52 |
+
"""An image. Will be shown by the transport if the transport's camera is
|
53 |
+
enabled.
|
54 |
+
|
55 |
+
"""
|
56 |
+
image: bytes
|
57 |
+
size: Tuple[int, int]
|
58 |
+
format: str | None
|
59 |
+
|
60 |
+
def __str__(self):
|
61 |
+
return f"{self.name}(size: {self.size}, format: {self.format})"
|
62 |
+
|
63 |
+
|
64 |
+
@dataclass
|
65 |
+
class URLImageRawFrame(ImageRawFrame):
|
66 |
+
"""An image with an associated URL. Will be shown by the transport if the
|
67 |
+
transport's camera is enabled.
|
68 |
+
|
69 |
+
"""
|
70 |
+
url: str | None
|
71 |
+
|
72 |
+
def __str__(self):
|
73 |
+
return f"{self.name}(url: {self.url}, size: {self.size}, format: {self.format})"
|
74 |
+
|
75 |
+
|
76 |
+
@dataclass
|
77 |
+
class VisionImageRawFrame(ImageRawFrame):
|
78 |
+
"""An image with an associated text to ask for a description of it. Will be
|
79 |
+
shown by the transport if the transport's camera is enabled.
|
80 |
+
|
81 |
+
"""
|
82 |
+
text: str | None
|
83 |
+
|
84 |
+
def __str__(self):
|
85 |
+
return f"{self.name}(text: {self.text}, size: {self.size}, format: {self.format})"
|
86 |
+
|
87 |
+
|
88 |
+
@dataclass
|
89 |
+
class UserImageRawFrame(ImageRawFrame):
|
90 |
+
"""An image associated to a user. Will be shown by the transport if the
|
91 |
+
transport's camera is enabled.
|
92 |
+
|
93 |
+
"""
|
94 |
+
user_id: str
|
95 |
+
|
96 |
+
def __str__(self):
|
97 |
+
return f"{self.name}(user: {self.user_id}, size: {self.size}, format: {self.format})"
|
98 |
+
|
99 |
+
|
100 |
+
@dataclass
|
101 |
+
class SpriteFrame(Frame):
|
102 |
+
"""An animated sprite. Will be shown by the transport if the transport's
|
103 |
+
camera is enabled. Will play at the framerate specified in the transport's
|
104 |
+
`fps` constructor parameter.
|
105 |
+
|
106 |
+
"""
|
107 |
+
images: List[ImageRawFrame]
|
108 |
+
|
109 |
+
def __str__(self):
|
110 |
+
return f"{self.name}(size: {len(self.images)})"
|
111 |
+
|
112 |
+
|
113 |
+
@dataclass
|
114 |
+
class TextFrame(DataFrame):
|
115 |
+
"""A chunk of text. Emitted by LLM services, consumed by TTS services, can
|
116 |
+
be used to send text through pipelines.
|
117 |
+
|
118 |
+
"""
|
119 |
+
text: str
|
120 |
+
|
121 |
+
def __str__(self):
|
122 |
+
return f"{self.name}(text: {self.text})"
|
123 |
+
|
124 |
+
|
125 |
+
@dataclass
|
126 |
+
class TranscriptionFrame(TextFrame):
|
127 |
+
"""A text frame with transcription-specific data. Will be placed in the
|
128 |
+
transport's receive queue when a participant speaks.
|
129 |
+
|
130 |
+
"""
|
131 |
+
user_id: str
|
132 |
+
timestamp: str
|
133 |
+
|
134 |
+
def __str__(self):
|
135 |
+
return f"{self.name}(user: {self.user_id}, text: {self.text}, timestamp: {self.timestamp})"
|
136 |
+
|
137 |
+
|
138 |
+
@dataclass
|
139 |
+
class InterimTranscriptionFrame(TextFrame):
|
140 |
+
"""A text frame with interim transcription-specific data. Will be placed in
|
141 |
+
the transport's receive queue when a participant speaks."""
|
142 |
+
user_id: str
|
143 |
+
timestamp: str
|
144 |
+
|
145 |
+
def __str__(self):
|
146 |
+
return f"{self.name}(user: {self.user_id}, text: {self.text}, timestamp: {self.timestamp})"
|
147 |
+
|
148 |
+
|
149 |
+
@dataclass
|
150 |
+
class LLMMessagesFrame(DataFrame):
|
151 |
+
"""A frame containing a list of LLM messages. Used to signal that an LLM
|
152 |
+
service should run a chat completion and emit an LLMStartFrames, TextFrames
|
153 |
+
and an LLMEndFrame. Note that the messages property on this class is
|
154 |
+
mutable, and will be be updated by various ResponseAggregator frame
|
155 |
+
processors.
|
156 |
+
|
157 |
+
"""
|
158 |
+
messages: List[dict]
|
159 |
+
|
160 |
+
|
161 |
+
@dataclass
|
162 |
+
class TransportMessageFrame(DataFrame):
|
163 |
+
message: Any
|
164 |
+
|
165 |
+
def __str__(self):
|
166 |
+
return f"{self.name}(message: {self.message})"
|
167 |
+
|
168 |
+
#
|
169 |
+
# App frames. Application user-defined frames.
|
170 |
+
#
|
171 |
+
|
172 |
+
|
173 |
+
@dataclass
|
174 |
+
class AppFrame(Frame):
|
175 |
+
pass
|
176 |
+
|
177 |
+
#
|
178 |
+
# System frames
|
179 |
+
#
|
180 |
+
|
181 |
+
|
182 |
+
@dataclass
|
183 |
+
class SystemFrame(Frame):
|
184 |
+
pass
|
185 |
+
|
186 |
+
|
187 |
+
@dataclass
|
188 |
+
class StartFrame(SystemFrame):
|
189 |
+
"""This is the first frame that should be pushed down a pipeline."""
|
190 |
+
allow_interruptions: bool = False
|
191 |
+
enable_metrics: bool = False
|
192 |
+
report_only_initial_ttfb: bool = False
|
193 |
+
|
194 |
+
|
195 |
+
@dataclass
|
196 |
+
class CancelFrame(SystemFrame):
|
197 |
+
"""Indicates that a pipeline needs to stop right away."""
|
198 |
+
pass
|
199 |
+
|
200 |
+
|
201 |
+
@dataclass
|
202 |
+
class ErrorFrame(SystemFrame):
|
203 |
+
"""This is used notify upstream that an error has occurred downstream the
|
204 |
+
pipeline."""
|
205 |
+
error: str | None
|
206 |
+
|
207 |
+
def __str__(self):
|
208 |
+
return f"{self.name}(error: {self.error})"
|
209 |
+
|
210 |
+
|
211 |
+
@dataclass
|
212 |
+
class StopTaskFrame(SystemFrame):
|
213 |
+
"""Indicates that a pipeline task should be stopped. This should inform the
|
214 |
+
pipeline processors that they should stop pushing frames but that they
|
215 |
+
should be kept in a running state.
|
216 |
+
|
217 |
+
"""
|
218 |
+
pass
|
219 |
+
|
220 |
+
|
221 |
+
@dataclass
|
222 |
+
class StartInterruptionFrame(SystemFrame):
|
223 |
+
"""Emitted by VAD to indicate that a user has started speaking (i.e. is
|
224 |
+
interruption). This is similar to UserStartedSpeakingFrame except that it
|
225 |
+
should be pushed concurrently with other frames (so the order is not
|
226 |
+
guaranteed).
|
227 |
+
|
228 |
+
"""
|
229 |
+
pass
|
230 |
+
|
231 |
+
|
232 |
+
@dataclass
|
233 |
+
class StopInterruptionFrame(SystemFrame):
|
234 |
+
"""Emitted by VAD to indicate that a user has stopped speaking (i.e. no more
|
235 |
+
interruptions). This is similar to UserStoppedSpeakingFrame except that it
|
236 |
+
should be pushed concurrently with other frames (so the order is not
|
237 |
+
guaranteed).
|
238 |
+
|
239 |
+
"""
|
240 |
+
pass
|
241 |
+
|
242 |
+
|
243 |
+
@dataclass
|
244 |
+
class MetricsFrame(SystemFrame):
|
245 |
+
"""Emitted by processor that can compute metrics like latencies.
|
246 |
+
"""
|
247 |
+
ttfb: List[Mapping[str, Any]] | None = None
|
248 |
+
processing: List[Mapping[str, Any]] | None = None
|
249 |
+
|
250 |
+
#
|
251 |
+
# Control frames
|
252 |
+
#
|
253 |
+
|
254 |
+
|
255 |
+
@dataclass
|
256 |
+
class ControlFrame(Frame):
|
257 |
+
pass
|
258 |
+
|
259 |
+
|
260 |
+
@dataclass
|
261 |
+
class EndFrame(ControlFrame):
|
262 |
+
"""Indicates that a pipeline has ended and frame processors and pipelines
|
263 |
+
should be shut down. If the transport receives this frame, it will stop
|
264 |
+
sending frames to its output channel(s) and close all its threads. Note,
|
265 |
+
that this is a control frame, which means it will received in the order it
|
266 |
+
was sent (unline system frames).
|
267 |
+
|
268 |
+
"""
|
269 |
+
pass
|
270 |
+
|
271 |
+
|
272 |
+
@dataclass
|
273 |
+
class LLMFullResponseStartFrame(ControlFrame):
|
274 |
+
"""Used to indicate the beginning of a full LLM response. Following
|
275 |
+
LLMResponseStartFrame, TextFrame and LLMResponseEndFrame for each sentence
|
276 |
+
until a LLMFullResponseEndFrame."""
|
277 |
+
pass
|
278 |
+
|
279 |
+
|
280 |
+
@dataclass
|
281 |
+
class LLMFullResponseEndFrame(ControlFrame):
|
282 |
+
"""Indicates the end of a full LLM response."""
|
283 |
+
pass
|
284 |
+
|
285 |
+
|
286 |
+
@dataclass
|
287 |
+
class LLMResponseStartFrame(ControlFrame):
|
288 |
+
"""Used to indicate the beginning of an LLM response. Following TextFrames
|
289 |
+
are part of the LLM response until an LLMResponseEndFrame"""
|
290 |
+
pass
|
291 |
+
|
292 |
+
|
293 |
+
@dataclass
|
294 |
+
class LLMResponseEndFrame(ControlFrame):
|
295 |
+
"""Indicates the end of an LLM response."""
|
296 |
+
pass
|
297 |
+
|
298 |
+
|
299 |
+
@dataclass
|
300 |
+
class UserStartedSpeakingFrame(ControlFrame):
|
301 |
+
"""Emitted by VAD to indicate that a user has started speaking. This can be
|
302 |
+
used for interruptions or other times when detecting that someone is
|
303 |
+
speaking is more important than knowing what they're saying (as you will
|
304 |
+
with a TranscriptionFrame)
|
305 |
+
|
306 |
+
"""
|
307 |
+
pass
|
308 |
+
|
309 |
+
|
310 |
+
@dataclass
|
311 |
+
class UserStoppedSpeakingFrame(ControlFrame):
|
312 |
+
"""Emitted by the VAD to indicate that a user stopped speaking."""
|
313 |
+
pass
|
314 |
+
|
315 |
+
|
316 |
+
@dataclass
|
317 |
+
class TTSStartedFrame(ControlFrame):
|
318 |
+
"""Used to indicate the beginning of a TTS response. Following
|
319 |
+
AudioRawFrames are part of the TTS response until an TTSEndFrame. These
|
320 |
+
frames can be used for aggregating audio frames in a transport to optimize
|
321 |
+
the size of frames sent to the session, without needing to control this in
|
322 |
+
the TTS service.
|
323 |
+
|
324 |
+
"""
|
325 |
+
pass
|
326 |
+
|
327 |
+
|
328 |
+
@dataclass
|
329 |
+
class TTSStoppedFrame(ControlFrame):
|
330 |
+
"""Indicates the end of a TTS response."""
|
331 |
+
pass
|
332 |
+
|
333 |
+
|
334 |
+
@dataclass
|
335 |
+
class UserImageRequestFrame(ControlFrame):
|
336 |
+
"""A frame user to request an image from the given user."""
|
337 |
+
user_id: str
|
338 |
+
|
339 |
+
def __str__(self):
|
340 |
+
return f"{self.name}, user: {self.user_id}"
|
pipecat/frames/protobufs/frames_pb2.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
3 |
+
# source: frames.proto
|
4 |
+
# Protobuf Python Version: 4.25.1
|
5 |
+
"""Generated protocol buffer code."""
|
6 |
+
from google.protobuf import descriptor as _descriptor
|
7 |
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
8 |
+
from google.protobuf import symbol_database as _symbol_database
|
9 |
+
from google.protobuf.internal import builder as _builder
|
10 |
+
# @@protoc_insertion_point(imports)
|
11 |
+
|
12 |
+
_sym_db = _symbol_database.Default()
|
13 |
+
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0c\x66rames.proto\x12\x07pipecat\"3\n\tTextFrame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04text\x18\x03 \x01(\t\"c\n\rAudioRawFrame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\r\n\x05\x61udio\x18\x03 \x01(\x0c\x12\x13\n\x0bsample_rate\x18\x04 \x01(\r\x12\x14\n\x0cnum_channels\x18\x05 \x01(\r\"`\n\x12TranscriptionFrame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x0f\n\x07user_id\x18\x04 \x01(\t\x12\x11\n\ttimestamp\x18\x05 \x01(\t\"\x93\x01\n\x05\x46rame\x12\"\n\x04text\x18\x01 \x01(\x0b\x32\x12.pipecat.TextFrameH\x00\x12\'\n\x05\x61udio\x18\x02 \x01(\x0b\x32\x16.pipecat.AudioRawFrameH\x00\x12\x34\n\rtranscription\x18\x03 \x01(\x0b\x32\x1b.pipecat.TranscriptionFrameH\x00\x42\x07\n\x05\x66rameb\x06proto3')
|
18 |
+
|
19 |
+
_globals = globals()
|
20 |
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
21 |
+
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'frames_pb2', _globals)
|
22 |
+
if _descriptor._USE_C_DESCRIPTORS == False:
|
23 |
+
DESCRIPTOR._options = None
|
24 |
+
_globals['_TEXTFRAME']._serialized_start=25
|
25 |
+
_globals['_TEXTFRAME']._serialized_end=76
|
26 |
+
_globals['_AUDIORAWFRAME']._serialized_start=78
|
27 |
+
_globals['_AUDIORAWFRAME']._serialized_end=177
|
28 |
+
_globals['_TRANSCRIPTIONFRAME']._serialized_start=179
|
29 |
+
_globals['_TRANSCRIPTIONFRAME']._serialized_end=275
|
30 |
+
_globals['_FRAME']._serialized_start=278
|
31 |
+
_globals['_FRAME']._serialized_end=425
|
32 |
+
# @@protoc_insertion_point(module_scope)
|
pipecat/pipeline/__init__.py
ADDED
File without changes
|
pipecat/pipeline/base_pipeline.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from abc import abstractmethod
|
8 |
+
|
9 |
+
from typing import List
|
10 |
+
|
11 |
+
from pipecat.processors.frame_processor import FrameProcessor
|
12 |
+
|
13 |
+
|
14 |
+
class BasePipeline(FrameProcessor):
|
15 |
+
|
16 |
+
def __init__(self):
|
17 |
+
super().__init__()
|
18 |
+
|
19 |
+
@abstractmethod
|
20 |
+
def processors_with_metrics(self) -> List[FrameProcessor]:
|
21 |
+
pass
|
pipecat/pipeline/merge_pipeline.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
from pipecat.pipeline.frames import EndFrame, EndPipeFrame
|
3 |
+
from pipecat.pipeline.pipeline import Pipeline
|
4 |
+
|
5 |
+
|
6 |
+
class SequentialMergePipeline(Pipeline):
|
7 |
+
"""This class merges the sink queues from a list of pipelines. Frames from
|
8 |
+
each pipeline's sink are merged in the order of pipelines in the list."""
|
9 |
+
|
10 |
+
def __init__(self, pipelines: List[Pipeline]):
|
11 |
+
super().__init__([])
|
12 |
+
self.pipelines = pipelines
|
13 |
+
|
14 |
+
async def run_pipeline(self):
|
15 |
+
for idx, pipeline in enumerate(self.pipelines):
|
16 |
+
while True:
|
17 |
+
frame = await pipeline.sink.get()
|
18 |
+
if isinstance(
|
19 |
+
frame, EndFrame) or isinstance(
|
20 |
+
frame, EndPipeFrame):
|
21 |
+
break
|
22 |
+
await self.sink.put(frame)
|
23 |
+
|
24 |
+
await self.sink.put(EndFrame())
|
pipecat/pipeline/parallel_pipeline.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import asyncio
|
8 |
+
|
9 |
+
from itertools import chain
|
10 |
+
from typing import List
|
11 |
+
|
12 |
+
from pipecat.pipeline.base_pipeline import BasePipeline
|
13 |
+
from pipecat.pipeline.pipeline import Pipeline
|
14 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
15 |
+
from pipecat.frames.frames import CancelFrame, EndFrame, Frame, StartFrame
|
16 |
+
|
17 |
+
from loguru import logger
|
18 |
+
|
19 |
+
|
20 |
+
class Source(FrameProcessor):
|
21 |
+
|
22 |
+
def __init__(self, upstream_queue: asyncio.Queue):
|
23 |
+
super().__init__()
|
24 |
+
self._up_queue = upstream_queue
|
25 |
+
|
26 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
27 |
+
await super().process_frame(frame, direction)
|
28 |
+
|
29 |
+
match direction:
|
30 |
+
case FrameDirection.UPSTREAM:
|
31 |
+
await self._up_queue.put(frame)
|
32 |
+
case FrameDirection.DOWNSTREAM:
|
33 |
+
await self.push_frame(frame, direction)
|
34 |
+
|
35 |
+
|
36 |
+
class Sink(FrameProcessor):
|
37 |
+
|
38 |
+
def __init__(self, downstream_queue: asyncio.Queue):
|
39 |
+
super().__init__()
|
40 |
+
self._down_queue = downstream_queue
|
41 |
+
|
42 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
43 |
+
await super().process_frame(frame, direction)
|
44 |
+
|
45 |
+
match direction:
|
46 |
+
case FrameDirection.UPSTREAM:
|
47 |
+
await self.push_frame(frame, direction)
|
48 |
+
case FrameDirection.DOWNSTREAM:
|
49 |
+
await self._down_queue.put(frame)
|
50 |
+
|
51 |
+
|
52 |
+
class ParallelPipeline(BasePipeline):
|
53 |
+
def __init__(self, *args):
|
54 |
+
super().__init__()
|
55 |
+
|
56 |
+
if len(args) == 0:
|
57 |
+
raise Exception(f"ParallelPipeline needs at least one argument")
|
58 |
+
|
59 |
+
self._sources = []
|
60 |
+
self._sinks = []
|
61 |
+
|
62 |
+
self._up_queue = asyncio.Queue()
|
63 |
+
self._down_queue = asyncio.Queue()
|
64 |
+
self._up_task: asyncio.Task | None = None
|
65 |
+
self._down_task: asyncio.Task | None = None
|
66 |
+
|
67 |
+
self._pipelines = []
|
68 |
+
|
69 |
+
logger.debug(f"Creating {self} pipelines")
|
70 |
+
for processors in args:
|
71 |
+
if not isinstance(processors, list):
|
72 |
+
raise TypeError(f"ParallelPipeline argument {processors} is not a list")
|
73 |
+
|
74 |
+
# We will add a source before the pipeline and a sink after.
|
75 |
+
source = Source(self._up_queue)
|
76 |
+
sink = Sink(self._down_queue)
|
77 |
+
self._sources.append(source)
|
78 |
+
self._sinks.append(sink)
|
79 |
+
|
80 |
+
# Create pipeline
|
81 |
+
pipeline = Pipeline(processors)
|
82 |
+
source.link(pipeline)
|
83 |
+
pipeline.link(sink)
|
84 |
+
self._pipelines.append(pipeline)
|
85 |
+
|
86 |
+
logger.debug(f"Finished creating {self} pipelines")
|
87 |
+
|
88 |
+
#
|
89 |
+
# BasePipeline
|
90 |
+
#
|
91 |
+
|
92 |
+
def processors_with_metrics(self) -> List[FrameProcessor]:
|
93 |
+
return list(chain.from_iterable(p.processors_with_metrics() for p in self._pipelines))
|
94 |
+
|
95 |
+
#
|
96 |
+
# Frame processor
|
97 |
+
#
|
98 |
+
|
99 |
+
async def cleanup(self):
|
100 |
+
await asyncio.gather(*[p.cleanup() for p in self._pipelines])
|
101 |
+
|
102 |
+
async def _start_tasks(self):
|
103 |
+
loop = self.get_event_loop()
|
104 |
+
self._up_task = loop.create_task(self._process_up_queue())
|
105 |
+
self._down_task = loop.create_task(self._process_down_queue())
|
106 |
+
|
107 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
108 |
+
await super().process_frame(frame, direction)
|
109 |
+
|
110 |
+
if isinstance(frame, StartFrame):
|
111 |
+
await self._start_tasks()
|
112 |
+
|
113 |
+
if direction == FrameDirection.UPSTREAM:
|
114 |
+
# If we get an upstream frame we process it in each sink.
|
115 |
+
await asyncio.gather(*[s.process_frame(frame, direction) for s in self._sinks])
|
116 |
+
elif direction == FrameDirection.DOWNSTREAM:
|
117 |
+
# If we get a downstream frame we process it in each source.
|
118 |
+
# TODO(aleix): We are creating task for each frame. For real-time
|
119 |
+
# video/audio this might be too slow. We should use an already
|
120 |
+
# created task instead.
|
121 |
+
await asyncio.gather(*[s.process_frame(frame, direction) for s in self._sources])
|
122 |
+
|
123 |
+
# If we get an EndFrame we stop our queue processing tasks and wait on
|
124 |
+
# all the pipelines to finish.
|
125 |
+
if isinstance(frame, CancelFrame) or isinstance(frame, EndFrame):
|
126 |
+
# Use None to indicate when queues should be done processing.
|
127 |
+
await self._up_queue.put(None)
|
128 |
+
await self._down_queue.put(None)
|
129 |
+
if self._up_task:
|
130 |
+
await self._up_task
|
131 |
+
if self._down_task:
|
132 |
+
await self._down_task
|
133 |
+
|
134 |
+
async def _process_up_queue(self):
|
135 |
+
running = True
|
136 |
+
seen_ids = set()
|
137 |
+
while running:
|
138 |
+
frame = await self._up_queue.get()
|
139 |
+
if frame and frame.id not in seen_ids:
|
140 |
+
await self.push_frame(frame, FrameDirection.UPSTREAM)
|
141 |
+
seen_ids.add(frame.id)
|
142 |
+
running = frame is not None
|
143 |
+
self._up_queue.task_done()
|
144 |
+
|
145 |
+
async def _process_down_queue(self):
|
146 |
+
running = True
|
147 |
+
seen_ids = set()
|
148 |
+
while running:
|
149 |
+
frame = await self._down_queue.get()
|
150 |
+
if frame and frame.id not in seen_ids:
|
151 |
+
await self.push_frame(frame, FrameDirection.DOWNSTREAM)
|
152 |
+
seen_ids.add(frame.id)
|
153 |
+
running = frame is not None
|
154 |
+
self._down_queue.task_done()
|
pipecat/pipeline/parallel_task.py
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import asyncio
|
8 |
+
|
9 |
+
from itertools import chain
|
10 |
+
from typing import List
|
11 |
+
|
12 |
+
from pipecat.pipeline.base_pipeline import BasePipeline
|
13 |
+
from pipecat.pipeline.pipeline import Pipeline
|
14 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
15 |
+
from pipecat.frames.frames import Frame
|
16 |
+
|
17 |
+
from loguru import logger
|
18 |
+
|
19 |
+
|
20 |
+
class Source(FrameProcessor):
|
21 |
+
|
22 |
+
def __init__(self, upstream_queue: asyncio.Queue):
|
23 |
+
super().__init__()
|
24 |
+
self._up_queue = upstream_queue
|
25 |
+
|
26 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
27 |
+
await super().process_frame(frame, direction)
|
28 |
+
|
29 |
+
match direction:
|
30 |
+
case FrameDirection.UPSTREAM:
|
31 |
+
await self._up_queue.put(frame)
|
32 |
+
case FrameDirection.DOWNSTREAM:
|
33 |
+
await self.push_frame(frame, direction)
|
34 |
+
|
35 |
+
|
36 |
+
class Sink(FrameProcessor):
|
37 |
+
|
38 |
+
def __init__(self, downstream_queue: asyncio.Queue):
|
39 |
+
super().__init__()
|
40 |
+
self._down_queue = downstream_queue
|
41 |
+
|
42 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
43 |
+
await super().process_frame(frame, direction)
|
44 |
+
|
45 |
+
match direction:
|
46 |
+
case FrameDirection.UPSTREAM:
|
47 |
+
await self.push_frame(frame, direction)
|
48 |
+
case FrameDirection.DOWNSTREAM:
|
49 |
+
await self._down_queue.put(frame)
|
50 |
+
|
51 |
+
|
52 |
+
class ParallelTask(BasePipeline):
|
53 |
+
def __init__(self, *args):
|
54 |
+
super().__init__()
|
55 |
+
|
56 |
+
if len(args) == 0:
|
57 |
+
raise Exception(f"ParallelTask needs at least one argument")
|
58 |
+
|
59 |
+
self._sinks = []
|
60 |
+
self._pipelines = []
|
61 |
+
|
62 |
+
self._up_queue = asyncio.Queue()
|
63 |
+
self._down_queue = asyncio.Queue()
|
64 |
+
|
65 |
+
logger.debug(f"Creating {self} pipelines")
|
66 |
+
for processors in args:
|
67 |
+
if not isinstance(processors, list):
|
68 |
+
raise TypeError(f"ParallelTask argument {processors} is not a list")
|
69 |
+
|
70 |
+
# We add a source at the beginning of the pipeline and a sink at the end.
|
71 |
+
source = Source(self._up_queue)
|
72 |
+
sink = Sink(self._down_queue)
|
73 |
+
processors: List[FrameProcessor] = [source] + processors
|
74 |
+
processors.append(sink)
|
75 |
+
|
76 |
+
# Keep track of sinks. We access the source through the pipeline.
|
77 |
+
self._sinks.append(sink)
|
78 |
+
|
79 |
+
# Create pipeline
|
80 |
+
pipeline = Pipeline(processors)
|
81 |
+
self._pipelines.append(pipeline)
|
82 |
+
logger.debug(f"Finished creating {self} pipelines")
|
83 |
+
|
84 |
+
#
|
85 |
+
# BasePipeline
|
86 |
+
#
|
87 |
+
|
88 |
+
def processors_with_metrics(self) -> List[FrameProcessor]:
|
89 |
+
return list(chain.from_iterable(p.processors_with_metrics() for p in self._pipelines))
|
90 |
+
|
91 |
+
#
|
92 |
+
# Frame processor
|
93 |
+
#
|
94 |
+
|
95 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
96 |
+
await super().process_frame(frame, direction)
|
97 |
+
|
98 |
+
if direction == FrameDirection.UPSTREAM:
|
99 |
+
# If we get an upstream frame we process it in each sink.
|
100 |
+
await asyncio.gather(*[s.process_frame(frame, direction) for s in self._sinks])
|
101 |
+
elif direction == FrameDirection.DOWNSTREAM:
|
102 |
+
# If we get a downstream frame we process it in each source (using the pipeline).
|
103 |
+
await asyncio.gather(*[p.process_frame(frame, direction) for p in self._pipelines])
|
104 |
+
|
105 |
+
seen_ids = set()
|
106 |
+
while not self._up_queue.empty():
|
107 |
+
frame = await self._up_queue.get()
|
108 |
+
if frame and frame.id not in seen_ids:
|
109 |
+
await self.push_frame(frame, FrameDirection.UPSTREAM)
|
110 |
+
seen_ids.add(frame.id)
|
111 |
+
self._up_queue.task_done()
|
112 |
+
|
113 |
+
seen_ids = set()
|
114 |
+
while not self._down_queue.empty():
|
115 |
+
frame = await self._down_queue.get()
|
116 |
+
if frame and frame.id not in seen_ids:
|
117 |
+
await self.push_frame(frame, FrameDirection.DOWNSTREAM)
|
118 |
+
seen_ids.add(frame.id)
|
119 |
+
self._down_queue.task_done()
|
pipecat/pipeline/pipeline.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from typing import Callable, Coroutine, List
|
8 |
+
|
9 |
+
from pipecat.frames.frames import Frame
|
10 |
+
from pipecat.pipeline.base_pipeline import BasePipeline
|
11 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
12 |
+
|
13 |
+
|
14 |
+
class PipelineSource(FrameProcessor):
|
15 |
+
|
16 |
+
def __init__(self, upstream_push_frame: Callable[[Frame, FrameDirection], Coroutine]):
|
17 |
+
super().__init__()
|
18 |
+
self._upstream_push_frame = upstream_push_frame
|
19 |
+
|
20 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
21 |
+
await super().process_frame(frame, direction)
|
22 |
+
|
23 |
+
match direction:
|
24 |
+
case FrameDirection.UPSTREAM:
|
25 |
+
await self._upstream_push_frame(frame, direction)
|
26 |
+
case FrameDirection.DOWNSTREAM:
|
27 |
+
await self.push_frame(frame, direction)
|
28 |
+
|
29 |
+
|
30 |
+
class PipelineSink(FrameProcessor):
|
31 |
+
|
32 |
+
def __init__(self, downstream_push_frame: Callable[[Frame, FrameDirection], Coroutine]):
|
33 |
+
super().__init__()
|
34 |
+
self._downstream_push_frame = downstream_push_frame
|
35 |
+
|
36 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
37 |
+
await super().process_frame(frame, direction)
|
38 |
+
|
39 |
+
match direction:
|
40 |
+
case FrameDirection.UPSTREAM:
|
41 |
+
await self.push_frame(frame, direction)
|
42 |
+
case FrameDirection.DOWNSTREAM:
|
43 |
+
await self._downstream_push_frame(frame, direction)
|
44 |
+
|
45 |
+
|
46 |
+
class Pipeline(BasePipeline):
|
47 |
+
|
48 |
+
def __init__(self, processors: List[FrameProcessor]):
|
49 |
+
super().__init__()
|
50 |
+
|
51 |
+
# Add a source and a sink queue so we can forward frames upstream and
|
52 |
+
# downstream outside of the pipeline.
|
53 |
+
self._source = PipelineSource(self.push_frame)
|
54 |
+
self._sink = PipelineSink(self.push_frame)
|
55 |
+
self._processors: List[FrameProcessor] = [self._source] + processors + [self._sink]
|
56 |
+
|
57 |
+
self._link_processors()
|
58 |
+
|
59 |
+
#
|
60 |
+
# BasePipeline
|
61 |
+
#
|
62 |
+
|
63 |
+
def processors_with_metrics(self):
|
64 |
+
services = []
|
65 |
+
for p in self._processors:
|
66 |
+
if isinstance(p, BasePipeline):
|
67 |
+
services += p.processors_with_metrics()
|
68 |
+
elif p.can_generate_metrics():
|
69 |
+
services.append(p)
|
70 |
+
return services
|
71 |
+
|
72 |
+
#
|
73 |
+
# Frame processor
|
74 |
+
#
|
75 |
+
|
76 |
+
async def cleanup(self):
|
77 |
+
await self._cleanup_processors()
|
78 |
+
|
79 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
80 |
+
await super().process_frame(frame, direction)
|
81 |
+
|
82 |
+
if direction == FrameDirection.DOWNSTREAM:
|
83 |
+
await self._source.process_frame(frame, FrameDirection.DOWNSTREAM)
|
84 |
+
elif direction == FrameDirection.UPSTREAM:
|
85 |
+
await self._sink.process_frame(frame, FrameDirection.UPSTREAM)
|
86 |
+
|
87 |
+
async def _cleanup_processors(self):
|
88 |
+
for p in self._processors:
|
89 |
+
await p.cleanup()
|
90 |
+
|
91 |
+
def _link_processors(self):
|
92 |
+
prev = self._processors[0]
|
93 |
+
for curr in self._processors[1:]:
|
94 |
+
prev.link(curr)
|
95 |
+
prev = curr
|
pipecat/pipeline/runner.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import asyncio
|
8 |
+
import signal
|
9 |
+
|
10 |
+
from pipecat.pipeline.task import PipelineTask
|
11 |
+
from pipecat.utils.utils import obj_count, obj_id
|
12 |
+
|
13 |
+
from loguru import logger
|
14 |
+
|
15 |
+
|
16 |
+
class PipelineRunner:
|
17 |
+
|
18 |
+
def __init__(self, *, name: str | None = None, handle_sigint: bool = True):
|
19 |
+
self.id: int = obj_id()
|
20 |
+
self.name: str = name or f"{self.__class__.__name__}#{obj_count(self)}"
|
21 |
+
|
22 |
+
self._tasks = {}
|
23 |
+
|
24 |
+
if handle_sigint:
|
25 |
+
self._setup_sigint()
|
26 |
+
|
27 |
+
async def run(self, task: PipelineTask):
|
28 |
+
logger.debug(f"Runner {self} started running {task}")
|
29 |
+
self._tasks[task.name] = task
|
30 |
+
await task.run()
|
31 |
+
del self._tasks[task.name]
|
32 |
+
logger.debug(f"Runner {self} finished running {task}")
|
33 |
+
|
34 |
+
async def stop_when_done(self):
|
35 |
+
logger.debug(f"Runner {self} scheduled to stop when all tasks are done")
|
36 |
+
await asyncio.gather(*[t.stop_when_done() for t in self._tasks.values()])
|
37 |
+
|
38 |
+
async def cancel(self):
|
39 |
+
logger.debug(f"Canceling runner {self}")
|
40 |
+
await asyncio.gather(*[t.cancel() for t in self._tasks.values()])
|
41 |
+
|
42 |
+
def _setup_sigint(self):
|
43 |
+
loop = asyncio.get_running_loop()
|
44 |
+
loop.add_signal_handler(
|
45 |
+
signal.SIGINT,
|
46 |
+
lambda *args: asyncio.create_task(self._sig_handler())
|
47 |
+
)
|
48 |
+
loop.add_signal_handler(
|
49 |
+
signal.SIGTERM,
|
50 |
+
lambda *args: asyncio.create_task(self._sig_handler())
|
51 |
+
)
|
52 |
+
|
53 |
+
async def _sig_handler(self):
|
54 |
+
logger.warning(f"Interruption detected. Canceling runner {self}")
|
55 |
+
await self.cancel()
|
56 |
+
|
57 |
+
def __str__(self):
|
58 |
+
return self.name
|
pipecat/pipeline/task.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import asyncio
|
8 |
+
|
9 |
+
from typing import AsyncIterable, Iterable
|
10 |
+
|
11 |
+
from pydantic import BaseModel
|
12 |
+
|
13 |
+
from pipecat.frames.frames import CancelFrame, EndFrame, ErrorFrame, Frame, MetricsFrame, StartFrame, StopTaskFrame
|
14 |
+
from pipecat.pipeline.base_pipeline import BasePipeline
|
15 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
16 |
+
from pipecat.utils.utils import obj_count, obj_id
|
17 |
+
|
18 |
+
from loguru import logger
|
19 |
+
|
20 |
+
|
21 |
+
class PipelineParams(BaseModel):
|
22 |
+
allow_interruptions: bool = False
|
23 |
+
enable_metrics: bool = False
|
24 |
+
report_only_initial_ttfb: bool = False
|
25 |
+
|
26 |
+
|
27 |
+
class Source(FrameProcessor):
|
28 |
+
|
29 |
+
def __init__(self, up_queue: asyncio.Queue):
|
30 |
+
super().__init__()
|
31 |
+
self._up_queue = up_queue
|
32 |
+
|
33 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
34 |
+
await super().process_frame(frame, direction)
|
35 |
+
|
36 |
+
match direction:
|
37 |
+
case FrameDirection.UPSTREAM:
|
38 |
+
await self._up_queue.put(frame)
|
39 |
+
case FrameDirection.DOWNSTREAM:
|
40 |
+
await self.push_frame(frame, direction)
|
41 |
+
|
42 |
+
|
43 |
+
class PipelineTask:
|
44 |
+
|
45 |
+
def __init__(self, pipeline: BasePipeline, params: PipelineParams = PipelineParams()):
|
46 |
+
self.id: int = obj_id()
|
47 |
+
self.name: str = f"{self.__class__.__name__}#{obj_count(self)}"
|
48 |
+
|
49 |
+
self._pipeline = pipeline
|
50 |
+
self._params = params
|
51 |
+
self._finished = False
|
52 |
+
|
53 |
+
self._down_queue = asyncio.Queue()
|
54 |
+
self._up_queue = asyncio.Queue()
|
55 |
+
|
56 |
+
self._source = Source(self._up_queue)
|
57 |
+
self._source.link(pipeline)
|
58 |
+
|
59 |
+
def has_finished(self):
|
60 |
+
return self._finished
|
61 |
+
|
62 |
+
async def stop_when_done(self):
|
63 |
+
logger.debug(f"Task {self} scheduled to stop when done")
|
64 |
+
await self.queue_frame(EndFrame())
|
65 |
+
|
66 |
+
async def cancel(self):
|
67 |
+
logger.debug(f"Canceling pipeline task {self}")
|
68 |
+
# Make sure everything is cleaned up downstream. This is sent
|
69 |
+
# out-of-band from the main streaming task which is what we want since
|
70 |
+
# we want to cancel right away.
|
71 |
+
await self._source.process_frame(CancelFrame(), FrameDirection.DOWNSTREAM)
|
72 |
+
self._process_down_task.cancel()
|
73 |
+
self._process_up_task.cancel()
|
74 |
+
await self._process_down_task
|
75 |
+
await self._process_up_task
|
76 |
+
|
77 |
+
async def run(self):
|
78 |
+
self._process_up_task = asyncio.create_task(self._process_up_queue())
|
79 |
+
self._process_down_task = asyncio.create_task(self._process_down_queue())
|
80 |
+
await asyncio.gather(self._process_up_task, self._process_down_task)
|
81 |
+
self._finished = True
|
82 |
+
|
83 |
+
async def queue_frame(self, frame: Frame):
|
84 |
+
await self._down_queue.put(frame)
|
85 |
+
|
86 |
+
async def queue_frames(self, frames: Iterable[Frame] | AsyncIterable[Frame]):
|
87 |
+
if isinstance(frames, AsyncIterable):
|
88 |
+
async for frame in frames:
|
89 |
+
await self.queue_frame(frame)
|
90 |
+
elif isinstance(frames, Iterable):
|
91 |
+
for frame in frames:
|
92 |
+
await self.queue_frame(frame)
|
93 |
+
else:
|
94 |
+
raise Exception("Frames must be an iterable or async iterable")
|
95 |
+
|
96 |
+
def _initial_metrics_frame(self) -> MetricsFrame:
|
97 |
+
processors = self._pipeline.processors_with_metrics()
|
98 |
+
ttfb = [{"name": p.name, "time": 0.0} for p in processors]
|
99 |
+
processing = [{"name": p.name, "time": 0.0} for p in processors]
|
100 |
+
return MetricsFrame(ttfb=ttfb, processing=processing)
|
101 |
+
|
102 |
+
async def _process_down_queue(self):
|
103 |
+
start_frame = StartFrame(
|
104 |
+
allow_interruptions=self._params.allow_interruptions,
|
105 |
+
enable_metrics=self._params.enable_metrics,
|
106 |
+
report_only_initial_ttfb=self._params.report_only_initial_ttfb
|
107 |
+
)
|
108 |
+
await self._source.process_frame(start_frame, FrameDirection.DOWNSTREAM)
|
109 |
+
await self._source.process_frame(self._initial_metrics_frame(), FrameDirection.DOWNSTREAM)
|
110 |
+
|
111 |
+
running = True
|
112 |
+
should_cleanup = True
|
113 |
+
while running:
|
114 |
+
try:
|
115 |
+
frame = await self._down_queue.get()
|
116 |
+
await self._source.process_frame(frame, FrameDirection.DOWNSTREAM)
|
117 |
+
running = not (isinstance(frame, StopTaskFrame) or isinstance(frame, EndFrame))
|
118 |
+
should_cleanup = not isinstance(frame, StopTaskFrame)
|
119 |
+
self._down_queue.task_done()
|
120 |
+
except asyncio.CancelledError:
|
121 |
+
break
|
122 |
+
# Cleanup only if we need to.
|
123 |
+
if should_cleanup:
|
124 |
+
await self._source.cleanup()
|
125 |
+
await self._pipeline.cleanup()
|
126 |
+
# We just enqueue None to terminate the task gracefully.
|
127 |
+
self._process_up_task.cancel()
|
128 |
+
await self._process_up_task
|
129 |
+
|
130 |
+
async def _process_up_queue(self):
|
131 |
+
while True:
|
132 |
+
try:
|
133 |
+
frame = await self._up_queue.get()
|
134 |
+
if isinstance(frame, ErrorFrame):
|
135 |
+
logger.error(f"Error running app: {frame.error}")
|
136 |
+
await self.queue_frame(CancelFrame())
|
137 |
+
self._up_queue.task_done()
|
138 |
+
except asyncio.CancelledError:
|
139 |
+
break
|
140 |
+
|
141 |
+
def __str__(self):
|
142 |
+
return self.name
|
pipecat/processors/__init__.py
ADDED
File without changes
|
pipecat/processors/aggregators/__init__.py
ADDED
File without changes
|
pipecat/processors/aggregators/gated.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from typing import List
|
8 |
+
|
9 |
+
from pipecat.frames.frames import Frame, SystemFrame
|
10 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
11 |
+
|
12 |
+
from loguru import logger
|
13 |
+
|
14 |
+
|
15 |
+
class GatedAggregator(FrameProcessor):
|
16 |
+
"""Accumulate frames, with custom functions to start and stop accumulation.
|
17 |
+
Yields gate-opening frame before any accumulated frames, then ensuing frames
|
18 |
+
until and not including the gate-closed frame.
|
19 |
+
|
20 |
+
>>> from pipecat.pipeline.frames import ImageFrame
|
21 |
+
|
22 |
+
>>> async def print_frames(aggregator, frame):
|
23 |
+
... async for frame in aggregator.process_frame(frame):
|
24 |
+
... if isinstance(frame, TextFrame):
|
25 |
+
... print(frame.text)
|
26 |
+
... else:
|
27 |
+
... print(frame.__class__.__name__)
|
28 |
+
|
29 |
+
>>> aggregator = GatedAggregator(
|
30 |
+
... gate_close_fn=lambda x: isinstance(x, LLMResponseStartFrame),
|
31 |
+
... gate_open_fn=lambda x: isinstance(x, ImageFrame),
|
32 |
+
... start_open=False)
|
33 |
+
>>> asyncio.run(print_frames(aggregator, TextFrame("Hello")))
|
34 |
+
>>> asyncio.run(print_frames(aggregator, TextFrame("Hello again.")))
|
35 |
+
>>> asyncio.run(print_frames(aggregator, ImageFrame(image=bytes([]), size=(0, 0))))
|
36 |
+
ImageFrame
|
37 |
+
Hello
|
38 |
+
Hello again.
|
39 |
+
>>> asyncio.run(print_frames(aggregator, TextFrame("Goodbye.")))
|
40 |
+
Goodbye.
|
41 |
+
"""
|
42 |
+
|
43 |
+
def __init__(self, gate_open_fn, gate_close_fn, start_open):
|
44 |
+
super().__init__()
|
45 |
+
self._gate_open_fn = gate_open_fn
|
46 |
+
self._gate_close_fn = gate_close_fn
|
47 |
+
self._gate_open = start_open
|
48 |
+
self._accumulator: List[Frame] = []
|
49 |
+
|
50 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
51 |
+
await super().process_frame(frame, direction)
|
52 |
+
|
53 |
+
# We must not block system frames.
|
54 |
+
if isinstance(frame, SystemFrame):
|
55 |
+
await self.push_frame(frame, direction)
|
56 |
+
return
|
57 |
+
|
58 |
+
old_state = self._gate_open
|
59 |
+
if self._gate_open:
|
60 |
+
self._gate_open = not self._gate_close_fn(frame)
|
61 |
+
else:
|
62 |
+
self._gate_open = self._gate_open_fn(frame)
|
63 |
+
|
64 |
+
if old_state != self._gate_open:
|
65 |
+
state = "open" if self._gate_open else "closed"
|
66 |
+
logger.debug(f"Gate is now {state} because of {frame}")
|
67 |
+
|
68 |
+
if self._gate_open:
|
69 |
+
await self.push_frame(frame, direction)
|
70 |
+
for frame in self._accumulator:
|
71 |
+
await self.push_frame(frame, direction)
|
72 |
+
self._accumulator = []
|
73 |
+
else:
|
74 |
+
self._accumulator.append(frame)
|
pipecat/processors/aggregators/llm_response.py
ADDED
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from typing import List
|
8 |
+
|
9 |
+
from pipecat.services.openai import OpenAILLMContextFrame, OpenAILLMContext
|
10 |
+
|
11 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
12 |
+
from pipecat.frames.frames import (
|
13 |
+
Frame,
|
14 |
+
InterimTranscriptionFrame,
|
15 |
+
LLMFullResponseEndFrame,
|
16 |
+
LLMFullResponseStartFrame,
|
17 |
+
LLMResponseEndFrame,
|
18 |
+
LLMResponseStartFrame,
|
19 |
+
LLMMessagesFrame,
|
20 |
+
StartInterruptionFrame,
|
21 |
+
TranscriptionFrame,
|
22 |
+
TextFrame,
|
23 |
+
UserStartedSpeakingFrame,
|
24 |
+
UserStoppedSpeakingFrame)
|
25 |
+
|
26 |
+
|
27 |
+
class LLMResponseAggregator(FrameProcessor):
|
28 |
+
|
29 |
+
def __init__(
|
30 |
+
self,
|
31 |
+
*,
|
32 |
+
messages: List[dict],
|
33 |
+
role: str,
|
34 |
+
start_frame,
|
35 |
+
end_frame,
|
36 |
+
accumulator_frame: TextFrame,
|
37 |
+
interim_accumulator_frame: TextFrame | None = None,
|
38 |
+
handle_interruptions: bool = False
|
39 |
+
):
|
40 |
+
super().__init__()
|
41 |
+
|
42 |
+
self._messages = messages
|
43 |
+
self._role = role
|
44 |
+
self._start_frame = start_frame
|
45 |
+
self._end_frame = end_frame
|
46 |
+
self._accumulator_frame = accumulator_frame
|
47 |
+
self._interim_accumulator_frame = interim_accumulator_frame
|
48 |
+
self._handle_interruptions = handle_interruptions
|
49 |
+
|
50 |
+
# Reset our accumulator state.
|
51 |
+
self._reset()
|
52 |
+
|
53 |
+
@property
|
54 |
+
def messages(self):
|
55 |
+
return self._messages
|
56 |
+
|
57 |
+
@property
|
58 |
+
def role(self):
|
59 |
+
return self._role
|
60 |
+
|
61 |
+
#
|
62 |
+
# Frame processor
|
63 |
+
#
|
64 |
+
|
65 |
+
# Use cases implemented:
|
66 |
+
#
|
67 |
+
# S: Start, E: End, T: Transcription, I: Interim, X: Text
|
68 |
+
#
|
69 |
+
# S E -> None
|
70 |
+
# S T E -> X
|
71 |
+
# S I T E -> X
|
72 |
+
# S I E T -> X
|
73 |
+
# S I E I T -> X
|
74 |
+
# S E T -> X
|
75 |
+
# S E I T -> X
|
76 |
+
#
|
77 |
+
# The following case would not be supported:
|
78 |
+
#
|
79 |
+
# S I E T1 I T2 -> X
|
80 |
+
#
|
81 |
+
# and T2 would be dropped.
|
82 |
+
|
83 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
84 |
+
await super().process_frame(frame, direction)
|
85 |
+
|
86 |
+
send_aggregation = False
|
87 |
+
|
88 |
+
if isinstance(frame, self._start_frame):
|
89 |
+
self._aggregation = ""
|
90 |
+
self._aggregating = True
|
91 |
+
self._seen_start_frame = True
|
92 |
+
self._seen_end_frame = False
|
93 |
+
self._seen_interim_results = False
|
94 |
+
await self.push_frame(frame, direction)
|
95 |
+
elif isinstance(frame, self._end_frame):
|
96 |
+
self._seen_end_frame = True
|
97 |
+
self._seen_start_frame = False
|
98 |
+
|
99 |
+
# We might have received the end frame but we might still be
|
100 |
+
# aggregating (i.e. we have seen interim results but not the final
|
101 |
+
# text).
|
102 |
+
self._aggregating = self._seen_interim_results or len(self._aggregation) == 0
|
103 |
+
|
104 |
+
# Send the aggregation if we are not aggregating anymore (i.e. no
|
105 |
+
# more interim results received).
|
106 |
+
send_aggregation = not self._aggregating
|
107 |
+
await self.push_frame(frame, direction)
|
108 |
+
elif isinstance(frame, self._accumulator_frame):
|
109 |
+
if self._aggregating:
|
110 |
+
self._aggregation += f" {frame.text}"
|
111 |
+
# We have recevied a complete sentence, so if we have seen the
|
112 |
+
# end frame and we were still aggregating, it means we should
|
113 |
+
# send the aggregation.
|
114 |
+
send_aggregation = self._seen_end_frame
|
115 |
+
|
116 |
+
# We just got our final result, so let's reset interim results.
|
117 |
+
self._seen_interim_results = False
|
118 |
+
elif self._interim_accumulator_frame and isinstance(frame, self._interim_accumulator_frame):
|
119 |
+
self._seen_interim_results = True
|
120 |
+
elif self._handle_interruptions and isinstance(frame, StartInterruptionFrame):
|
121 |
+
await self._push_aggregation()
|
122 |
+
# Reset anyways
|
123 |
+
self._reset()
|
124 |
+
await self.push_frame(frame, direction)
|
125 |
+
else:
|
126 |
+
await self.push_frame(frame, direction)
|
127 |
+
|
128 |
+
if send_aggregation:
|
129 |
+
await self._push_aggregation()
|
130 |
+
|
131 |
+
async def _push_aggregation(self):
|
132 |
+
if len(self._aggregation) > 0:
|
133 |
+
self._messages.append({"role": self._role, "content": self._aggregation})
|
134 |
+
|
135 |
+
# Reset the aggregation. Reset it before pushing it down, otherwise
|
136 |
+
# if the tasks gets cancelled we won't be able to clear things up.
|
137 |
+
self._aggregation = ""
|
138 |
+
|
139 |
+
frame = LLMMessagesFrame(self._messages)
|
140 |
+
await self.push_frame(frame)
|
141 |
+
|
142 |
+
def _reset(self):
|
143 |
+
self._aggregation = ""
|
144 |
+
self._aggregating = False
|
145 |
+
self._seen_start_frame = False
|
146 |
+
self._seen_end_frame = False
|
147 |
+
self._seen_interim_results = False
|
148 |
+
|
149 |
+
|
150 |
+
class LLMAssistantResponseAggregator(LLMResponseAggregator):
|
151 |
+
def __init__(self, messages: List[dict] = []):
|
152 |
+
super().__init__(
|
153 |
+
messages=messages,
|
154 |
+
role="assistant",
|
155 |
+
start_frame=LLMFullResponseStartFrame,
|
156 |
+
end_frame=LLMFullResponseEndFrame,
|
157 |
+
accumulator_frame=TextFrame,
|
158 |
+
handle_interruptions=True
|
159 |
+
)
|
160 |
+
|
161 |
+
|
162 |
+
class LLMUserResponseAggregator(LLMResponseAggregator):
|
163 |
+
def __init__(self, messages: List[dict] = []):
|
164 |
+
super().__init__(
|
165 |
+
messages=messages,
|
166 |
+
role="user",
|
167 |
+
start_frame=UserStartedSpeakingFrame,
|
168 |
+
end_frame=UserStoppedSpeakingFrame,
|
169 |
+
accumulator_frame=TranscriptionFrame,
|
170 |
+
interim_accumulator_frame=InterimTranscriptionFrame
|
171 |
+
)
|
172 |
+
|
173 |
+
|
174 |
+
class LLMFullResponseAggregator(FrameProcessor):
|
175 |
+
"""This class aggregates Text frames until it receives a
|
176 |
+
LLMResponseEndFrame, then emits the concatenated text as
|
177 |
+
a single text frame.
|
178 |
+
|
179 |
+
given the following frames:
|
180 |
+
|
181 |
+
TextFrame("Hello,")
|
182 |
+
TextFrame(" world.")
|
183 |
+
TextFrame(" I am")
|
184 |
+
TextFrame(" an LLM.")
|
185 |
+
LLMResponseEndFrame()]
|
186 |
+
|
187 |
+
this processor will yield nothing for the first 4 frames, then
|
188 |
+
|
189 |
+
TextFrame("Hello, world. I am an LLM.")
|
190 |
+
LLMResponseEndFrame()
|
191 |
+
|
192 |
+
when passed the last frame.
|
193 |
+
|
194 |
+
>>> async def print_frames(aggregator, frame):
|
195 |
+
... async for frame in aggregator.process_frame(frame):
|
196 |
+
... if isinstance(frame, TextFrame):
|
197 |
+
... print(frame.text)
|
198 |
+
... else:
|
199 |
+
... print(frame.__class__.__name__)
|
200 |
+
|
201 |
+
>>> aggregator = LLMFullResponseAggregator()
|
202 |
+
>>> asyncio.run(print_frames(aggregator, TextFrame("Hello,")))
|
203 |
+
>>> asyncio.run(print_frames(aggregator, TextFrame(" world.")))
|
204 |
+
>>> asyncio.run(print_frames(aggregator, TextFrame(" I am")))
|
205 |
+
>>> asyncio.run(print_frames(aggregator, TextFrame(" an LLM.")))
|
206 |
+
>>> asyncio.run(print_frames(aggregator, LLMResponseEndFrame()))
|
207 |
+
Hello, world. I am an LLM.
|
208 |
+
LLMResponseEndFrame
|
209 |
+
"""
|
210 |
+
|
211 |
+
def __init__(self):
|
212 |
+
super().__init__()
|
213 |
+
self._aggregation = ""
|
214 |
+
|
215 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
216 |
+
await super().process_frame(frame, direction)
|
217 |
+
|
218 |
+
if isinstance(frame, TextFrame):
|
219 |
+
self._aggregation += frame.text
|
220 |
+
elif isinstance(frame, LLMFullResponseEndFrame):
|
221 |
+
await self.push_frame(TextFrame(self._aggregation))
|
222 |
+
await self.push_frame(frame)
|
223 |
+
self._aggregation = ""
|
224 |
+
else:
|
225 |
+
await self.push_frame(frame, direction)
|
226 |
+
|
227 |
+
|
228 |
+
class LLMContextAggregator(LLMResponseAggregator):
|
229 |
+
def __init__(self, *, context: OpenAILLMContext, **kwargs):
|
230 |
+
|
231 |
+
self._context = context
|
232 |
+
super().__init__(**kwargs)
|
233 |
+
|
234 |
+
async def _push_aggregation(self):
|
235 |
+
if len(self._aggregation) > 0:
|
236 |
+
self._context.add_message({"role": self._role, "content": self._aggregation})
|
237 |
+
frame = OpenAILLMContextFrame(self._context)
|
238 |
+
await self.push_frame(frame)
|
239 |
+
|
240 |
+
# Reset our accumulator state.
|
241 |
+
self._reset()
|
242 |
+
|
243 |
+
|
244 |
+
class LLMAssistantContextAggregator(LLMContextAggregator):
|
245 |
+
def __init__(self, context: OpenAILLMContext):
|
246 |
+
super().__init__(
|
247 |
+
messages=[],
|
248 |
+
context=context,
|
249 |
+
role="assistant",
|
250 |
+
start_frame=LLMResponseStartFrame,
|
251 |
+
end_frame=LLMResponseEndFrame,
|
252 |
+
accumulator_frame=TextFrame
|
253 |
+
)
|
254 |
+
|
255 |
+
|
256 |
+
class LLMUserContextAggregator(LLMContextAggregator):
|
257 |
+
def __init__(self, context: OpenAILLMContext):
|
258 |
+
super().__init__(
|
259 |
+
messages=[],
|
260 |
+
context=context,
|
261 |
+
role="user",
|
262 |
+
start_frame=UserStartedSpeakingFrame,
|
263 |
+
end_frame=UserStoppedSpeakingFrame,
|
264 |
+
accumulator_frame=TranscriptionFrame,
|
265 |
+
interim_accumulator_frame=InterimTranscriptionFrame
|
266 |
+
)
|
pipecat/processors/aggregators/openai_llm_context.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from dataclasses import dataclass
|
8 |
+
import io
|
9 |
+
import json
|
10 |
+
|
11 |
+
from typing import List
|
12 |
+
|
13 |
+
from PIL import Image
|
14 |
+
|
15 |
+
from pipecat.frames.frames import Frame, VisionImageRawFrame
|
16 |
+
|
17 |
+
from openai._types import NOT_GIVEN, NotGiven
|
18 |
+
|
19 |
+
from openai.types.chat import (
|
20 |
+
ChatCompletionToolParam,
|
21 |
+
ChatCompletionToolChoiceOptionParam,
|
22 |
+
ChatCompletionMessageParam
|
23 |
+
)
|
24 |
+
|
25 |
+
# JSON custom encoder to handle bytes arrays so that we can log contexts
|
26 |
+
# with images to the console.
|
27 |
+
|
28 |
+
|
29 |
+
class CustomEncoder(json.JSONEncoder):
|
30 |
+
def default(self, obj):
|
31 |
+
if isinstance(obj, io.BytesIO):
|
32 |
+
# Convert the first 8 bytes to an ASCII hex string
|
33 |
+
return (f"{obj.getbuffer()[0:8].hex()}...")
|
34 |
+
return super().default(obj)
|
35 |
+
|
36 |
+
|
37 |
+
class OpenAILLMContext:
|
38 |
+
|
39 |
+
def __init__(
|
40 |
+
self,
|
41 |
+
messages: List[ChatCompletionMessageParam] | None = None,
|
42 |
+
tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
|
43 |
+
tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN
|
44 |
+
):
|
45 |
+
self.messages: List[ChatCompletionMessageParam] = messages if messages else [
|
46 |
+
]
|
47 |
+
self.tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = tool_choice
|
48 |
+
self.tools: List[ChatCompletionToolParam] | NotGiven = tools
|
49 |
+
|
50 |
+
@staticmethod
|
51 |
+
def from_messages(messages: List[dict]) -> "OpenAILLMContext":
|
52 |
+
context = OpenAILLMContext()
|
53 |
+
for message in messages:
|
54 |
+
context.add_message({
|
55 |
+
"content": message["content"],
|
56 |
+
"role": message["role"],
|
57 |
+
"name": message["name"] if "name" in message else message["role"]
|
58 |
+
})
|
59 |
+
return context
|
60 |
+
|
61 |
+
@staticmethod
|
62 |
+
def from_image_frame(frame: VisionImageRawFrame) -> "OpenAILLMContext":
|
63 |
+
"""
|
64 |
+
For images, we are deviating from the OpenAI messages shape. OpenAI
|
65 |
+
expects images to be base64 encoded, but other vision models may not.
|
66 |
+
So we'll store the image as bytes and do the base64 encoding as needed
|
67 |
+
in the LLM service.
|
68 |
+
"""
|
69 |
+
context = OpenAILLMContext()
|
70 |
+
buffer = io.BytesIO()
|
71 |
+
Image.frombytes(
|
72 |
+
frame.format,
|
73 |
+
frame.size,
|
74 |
+
frame.image
|
75 |
+
).save(
|
76 |
+
buffer,
|
77 |
+
format="JPEG")
|
78 |
+
context.add_message({
|
79 |
+
"content": frame.text,
|
80 |
+
"role": "user",
|
81 |
+
"data": buffer,
|
82 |
+
"mime_type": "image/jpeg"
|
83 |
+
})
|
84 |
+
return context
|
85 |
+
|
86 |
+
def add_message(self, message: ChatCompletionMessageParam):
|
87 |
+
self.messages.append(message)
|
88 |
+
|
89 |
+
def get_messages(self) -> List[ChatCompletionMessageParam]:
|
90 |
+
return self.messages
|
91 |
+
|
92 |
+
def get_messages_json(self) -> str:
|
93 |
+
return json.dumps(self.messages, cls=CustomEncoder)
|
94 |
+
|
95 |
+
def set_tool_choice(
|
96 |
+
self, tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven
|
97 |
+
):
|
98 |
+
self.tool_choice = tool_choice
|
99 |
+
|
100 |
+
def set_tools(self, tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN):
|
101 |
+
if tools != NOT_GIVEN and len(tools) == 0:
|
102 |
+
tools = NOT_GIVEN
|
103 |
+
|
104 |
+
self.tools = tools
|
105 |
+
|
106 |
+
|
107 |
+
@dataclass
|
108 |
+
class OpenAILLMContextFrame(Frame):
|
109 |
+
"""Like an LLMMessagesFrame, but with extra context specific to the OpenAI
|
110 |
+
API. The context in this message is also mutable, and will be changed by the
|
111 |
+
OpenAIContextAggregator frame processor.
|
112 |
+
|
113 |
+
"""
|
114 |
+
context: OpenAILLMContext
|
pipecat/processors/aggregators/sentence.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import re
|
8 |
+
|
9 |
+
from pipecat.frames.frames import EndFrame, Frame, InterimTranscriptionFrame, TextFrame
|
10 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
11 |
+
|
12 |
+
|
13 |
+
class SentenceAggregator(FrameProcessor):
|
14 |
+
"""This frame processor aggregates text frames into complete sentences.
|
15 |
+
|
16 |
+
Frame input/output:
|
17 |
+
TextFrame("Hello,") -> None
|
18 |
+
TextFrame(" world.") -> TextFrame("Hello world.")
|
19 |
+
|
20 |
+
Doctest:
|
21 |
+
>>> async def print_frames(aggregator, frame):
|
22 |
+
... async for frame in aggregator.process_frame(frame):
|
23 |
+
... print(frame.text)
|
24 |
+
|
25 |
+
>>> aggregator = SentenceAggregator()
|
26 |
+
>>> asyncio.run(print_frames(aggregator, TextFrame("Hello,")))
|
27 |
+
>>> asyncio.run(print_frames(aggregator, TextFrame(" world.")))
|
28 |
+
Hello, world.
|
29 |
+
"""
|
30 |
+
|
31 |
+
def __init__(self):
|
32 |
+
super().__init__()
|
33 |
+
self._aggregation = ""
|
34 |
+
|
35 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
36 |
+
await super().process_frame(frame, direction)
|
37 |
+
|
38 |
+
# We ignore interim description at this point.
|
39 |
+
if isinstance(frame, InterimTranscriptionFrame):
|
40 |
+
return
|
41 |
+
|
42 |
+
if isinstance(frame, TextFrame):
|
43 |
+
m = re.search("(.*[?.!])(.*)", frame.text)
|
44 |
+
if m:
|
45 |
+
await self.push_frame(TextFrame(self._aggregation + m.group(1)))
|
46 |
+
self._aggregation = m.group(2)
|
47 |
+
else:
|
48 |
+
self._aggregation += frame.text
|
49 |
+
elif isinstance(frame, EndFrame):
|
50 |
+
if self._aggregation:
|
51 |
+
await self.push_frame(TextFrame(self._aggregation))
|
52 |
+
await self.push_frame(frame)
|
53 |
+
else:
|
54 |
+
await self.push_frame(frame, direction)
|
pipecat/processors/aggregators/user_response.py
ADDED
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
8 |
+
from pipecat.frames.frames import (
|
9 |
+
Frame,
|
10 |
+
InterimTranscriptionFrame,
|
11 |
+
StartInterruptionFrame,
|
12 |
+
TextFrame,
|
13 |
+
TranscriptionFrame,
|
14 |
+
UserStartedSpeakingFrame,
|
15 |
+
UserStoppedSpeakingFrame)
|
16 |
+
|
17 |
+
|
18 |
+
class ResponseAggregator(FrameProcessor):
|
19 |
+
"""This frame processor aggregates frames between a start and an end frame
|
20 |
+
into complete text frame sentences.
|
21 |
+
|
22 |
+
For example, frame input/output:
|
23 |
+
UserStartedSpeakingFrame() -> None
|
24 |
+
TranscriptionFrame("Hello,") -> None
|
25 |
+
TranscriptionFrame(" world.") -> None
|
26 |
+
UserStoppedSpeakingFrame() -> TextFrame("Hello world.")
|
27 |
+
|
28 |
+
Doctest:
|
29 |
+
>>> async def print_frames(aggregator, frame):
|
30 |
+
... async for frame in aggregator.process_frame(frame):
|
31 |
+
... if isinstance(frame, TextFrame):
|
32 |
+
... print(frame.text)
|
33 |
+
|
34 |
+
>>> aggregator = ResponseAggregator(start_frame = UserStartedSpeakingFrame,
|
35 |
+
... end_frame=UserStoppedSpeakingFrame,
|
36 |
+
... accumulator_frame=TranscriptionFrame,
|
37 |
+
... pass_through=False)
|
38 |
+
>>> asyncio.run(print_frames(aggregator, UserStartedSpeakingFrame()))
|
39 |
+
>>> asyncio.run(print_frames(aggregator, TranscriptionFrame("Hello,", 1, 1)))
|
40 |
+
>>> asyncio.run(print_frames(aggregator, TranscriptionFrame("world.", 1, 2)))
|
41 |
+
>>> asyncio.run(print_frames(aggregator, UserStoppedSpeakingFrame()))
|
42 |
+
Hello, world.
|
43 |
+
|
44 |
+
"""
|
45 |
+
|
46 |
+
def __init__(
|
47 |
+
self,
|
48 |
+
*,
|
49 |
+
start_frame,
|
50 |
+
end_frame,
|
51 |
+
accumulator_frame: TextFrame,
|
52 |
+
interim_accumulator_frame: TextFrame | None = None
|
53 |
+
):
|
54 |
+
super().__init__()
|
55 |
+
|
56 |
+
self._start_frame = start_frame
|
57 |
+
self._end_frame = end_frame
|
58 |
+
self._accumulator_frame = accumulator_frame
|
59 |
+
self._interim_accumulator_frame = interim_accumulator_frame
|
60 |
+
|
61 |
+
# Reset our accumulator state.
|
62 |
+
self._reset()
|
63 |
+
|
64 |
+
#
|
65 |
+
# Frame processor
|
66 |
+
#
|
67 |
+
|
68 |
+
# Use cases implemented:
|
69 |
+
#
|
70 |
+
# S: Start, E: End, T: Transcription, I: Interim, X: Text
|
71 |
+
#
|
72 |
+
# S E -> None
|
73 |
+
# S T E -> X
|
74 |
+
# S I T E -> X
|
75 |
+
# S I E T -> X
|
76 |
+
# S I E I T -> X
|
77 |
+
# S E T -> X
|
78 |
+
# S E I T -> X
|
79 |
+
#
|
80 |
+
# The following case would not be supported:
|
81 |
+
#
|
82 |
+
# S I E T1 I T2 -> X
|
83 |
+
#
|
84 |
+
# and T2 would be dropped.
|
85 |
+
|
86 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
87 |
+
await super().process_frame(frame, direction)
|
88 |
+
|
89 |
+
send_aggregation = False
|
90 |
+
|
91 |
+
if isinstance(frame, self._start_frame):
|
92 |
+
self._aggregating = True
|
93 |
+
self._seen_start_frame = True
|
94 |
+
self._seen_end_frame = False
|
95 |
+
self._seen_interim_results = False
|
96 |
+
await self.push_frame(frame, direction)
|
97 |
+
elif isinstance(frame, self._end_frame):
|
98 |
+
self._seen_end_frame = True
|
99 |
+
self._seen_start_frame = False
|
100 |
+
|
101 |
+
# We might have received the end frame but we might still be
|
102 |
+
# aggregating (i.e. we have seen interim results but not the final
|
103 |
+
# text).
|
104 |
+
self._aggregating = self._seen_interim_results or len(self._aggregation) == 0
|
105 |
+
|
106 |
+
# Send the aggregation if we are not aggregating anymore (i.e. no
|
107 |
+
# more interim results received).
|
108 |
+
send_aggregation = not self._aggregating
|
109 |
+
await self.push_frame(frame, direction)
|
110 |
+
elif isinstance(frame, self._accumulator_frame):
|
111 |
+
if self._aggregating:
|
112 |
+
self._aggregation += f" {frame.text}"
|
113 |
+
# We have recevied a complete sentence, so if we have seen the
|
114 |
+
# end frame and we were still aggregating, it means we should
|
115 |
+
# send the aggregation.
|
116 |
+
send_aggregation = self._seen_end_frame
|
117 |
+
|
118 |
+
# We just got our final result, so let's reset interim results.
|
119 |
+
self._seen_interim_results = False
|
120 |
+
elif self._interim_accumulator_frame and isinstance(frame, self._interim_accumulator_frame):
|
121 |
+
self._seen_interim_results = True
|
122 |
+
else:
|
123 |
+
await self.push_frame(frame, direction)
|
124 |
+
|
125 |
+
if send_aggregation:
|
126 |
+
await self._push_aggregation()
|
127 |
+
|
128 |
+
async def _push_aggregation(self):
|
129 |
+
if len(self._aggregation) > 0:
|
130 |
+
frame = TextFrame(self._aggregation.strip())
|
131 |
+
|
132 |
+
# Reset the aggregation. Reset it before pushing it down, otherwise
|
133 |
+
# if the tasks gets cancelled we won't be able to clear things up.
|
134 |
+
self._aggregation = ""
|
135 |
+
|
136 |
+
await self.push_frame(frame)
|
137 |
+
|
138 |
+
# Reset our accumulator state.
|
139 |
+
self._reset()
|
140 |
+
|
141 |
+
def _reset(self):
|
142 |
+
self._aggregation = ""
|
143 |
+
self._aggregating = False
|
144 |
+
self._seen_start_frame = False
|
145 |
+
self._seen_end_frame = False
|
146 |
+
self._seen_interim_results = False
|
147 |
+
|
148 |
+
|
149 |
+
class UserResponseAggregator(ResponseAggregator):
|
150 |
+
def __init__(self):
|
151 |
+
super().__init__(
|
152 |
+
start_frame=UserStartedSpeakingFrame,
|
153 |
+
end_frame=UserStoppedSpeakingFrame,
|
154 |
+
accumulator_frame=TranscriptionFrame,
|
155 |
+
interim_accumulator_frame=InterimTranscriptionFrame,
|
156 |
+
)
|
pipecat/processors/aggregators/vision_image_frame.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from pipecat.frames.frames import Frame, ImageRawFrame, TextFrame, VisionImageRawFrame
|
8 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
9 |
+
|
10 |
+
|
11 |
+
class VisionImageFrameAggregator(FrameProcessor):
|
12 |
+
"""This aggregator waits for a consecutive TextFrame and an
|
13 |
+
ImageFrame. After the ImageFrame arrives it will output a VisionImageFrame.
|
14 |
+
|
15 |
+
>>> from pipecat.pipeline.frames import ImageFrame
|
16 |
+
|
17 |
+
>>> async def print_frames(aggregator, frame):
|
18 |
+
... async for frame in aggregator.process_frame(frame):
|
19 |
+
... print(frame)
|
20 |
+
|
21 |
+
>>> aggregator = VisionImageFrameAggregator()
|
22 |
+
>>> asyncio.run(print_frames(aggregator, TextFrame("What do you see?")))
|
23 |
+
>>> asyncio.run(print_frames(aggregator, ImageFrame(image=bytes([]), size=(0, 0))))
|
24 |
+
VisionImageFrame, text: What do you see?, image size: 0x0, buffer size: 0 B
|
25 |
+
|
26 |
+
"""
|
27 |
+
|
28 |
+
def __init__(self):
|
29 |
+
super().__init__()
|
30 |
+
self._describe_text = None
|
31 |
+
|
32 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
33 |
+
await super().process_frame(frame, direction)
|
34 |
+
|
35 |
+
if isinstance(frame, TextFrame):
|
36 |
+
self._describe_text = frame.text
|
37 |
+
elif isinstance(frame, ImageRawFrame):
|
38 |
+
if self._describe_text:
|
39 |
+
frame = VisionImageRawFrame(
|
40 |
+
text=self._describe_text,
|
41 |
+
image=frame.image,
|
42 |
+
size=frame.size,
|
43 |
+
format=frame.format)
|
44 |
+
await self.push_frame(frame)
|
45 |
+
self._describe_text = None
|
46 |
+
else:
|
47 |
+
await self.push_frame(frame, direction)
|
pipecat/processors/async_frame_processor.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import asyncio
|
8 |
+
|
9 |
+
from pipecat.frames.frames import EndFrame, Frame, StartInterruptionFrame
|
10 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
11 |
+
|
12 |
+
|
13 |
+
class AsyncFrameProcessor(FrameProcessor):
|
14 |
+
|
15 |
+
def __init__(
|
16 |
+
self,
|
17 |
+
*,
|
18 |
+
name: str | None = None,
|
19 |
+
loop: asyncio.AbstractEventLoop | None = None,
|
20 |
+
**kwargs):
|
21 |
+
super().__init__(name=name, loop=loop, **kwargs)
|
22 |
+
|
23 |
+
self._create_push_task()
|
24 |
+
|
25 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
26 |
+
await super().process_frame(frame, direction)
|
27 |
+
|
28 |
+
if isinstance(frame, StartInterruptionFrame):
|
29 |
+
await self._handle_interruptions(frame)
|
30 |
+
|
31 |
+
async def queue_frame(
|
32 |
+
self,
|
33 |
+
frame: Frame,
|
34 |
+
direction: FrameDirection = FrameDirection.DOWNSTREAM):
|
35 |
+
await self._push_queue.put((frame, direction))
|
36 |
+
|
37 |
+
async def cleanup(self):
|
38 |
+
self._push_frame_task.cancel()
|
39 |
+
await self._push_frame_task
|
40 |
+
|
41 |
+
async def _handle_interruptions(self, frame: Frame):
|
42 |
+
# Cancel the task. This will stop pushing frames downstream.
|
43 |
+
self._push_frame_task.cancel()
|
44 |
+
await self._push_frame_task
|
45 |
+
# Push an out-of-band frame (i.e. not using the ordered push
|
46 |
+
# frame task).
|
47 |
+
await self.push_frame(frame)
|
48 |
+
# Create a new queue and task.
|
49 |
+
self._create_push_task()
|
50 |
+
|
51 |
+
def _create_push_task(self):
|
52 |
+
self._push_queue = asyncio.Queue()
|
53 |
+
self._push_frame_task = self.get_event_loop().create_task(self._push_frame_task_handler())
|
54 |
+
|
55 |
+
async def _push_frame_task_handler(self):
|
56 |
+
running = True
|
57 |
+
while running:
|
58 |
+
try:
|
59 |
+
(frame, direction) = await self._push_queue.get()
|
60 |
+
await self.push_frame(frame, direction)
|
61 |
+
running = not isinstance(frame, EndFrame)
|
62 |
+
except asyncio.CancelledError:
|
63 |
+
break
|
pipecat/processors/filters/__init__.py
ADDED
File without changes
|
pipecat/processors/filters/frame_filter.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from typing import List
|
8 |
+
|
9 |
+
from pipecat.frames.frames import AppFrame, ControlFrame, Frame, SystemFrame
|
10 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
11 |
+
|
12 |
+
|
13 |
+
class FrameFilter(FrameProcessor):
|
14 |
+
|
15 |
+
def __init__(self, types: List[type]):
|
16 |
+
super().__init__()
|
17 |
+
self._types = types
|
18 |
+
|
19 |
+
#
|
20 |
+
# Frame processor
|
21 |
+
#
|
22 |
+
|
23 |
+
def _should_passthrough_frame(self, frame):
|
24 |
+
for t in self._types:
|
25 |
+
if isinstance(frame, t):
|
26 |
+
return True
|
27 |
+
|
28 |
+
return (isinstance(frame, AppFrame)
|
29 |
+
or isinstance(frame, ControlFrame)
|
30 |
+
or isinstance(frame, SystemFrame))
|
31 |
+
|
32 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
33 |
+
await super().process_frame(frame, direction)
|
34 |
+
|
35 |
+
if self._should_passthrough_frame(frame):
|
36 |
+
await self.push_frame(frame, direction)
|
pipecat/processors/filters/function_filter.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from typing import Awaitable, Callable
|
8 |
+
|
9 |
+
from pipecat.frames.frames import Frame, SystemFrame
|
10 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
11 |
+
|
12 |
+
|
13 |
+
class FunctionFilter(FrameProcessor):
|
14 |
+
|
15 |
+
def __init__(self, filter: Callable[[Frame], Awaitable[bool]]):
|
16 |
+
super().__init__()
|
17 |
+
self._filter = filter
|
18 |
+
|
19 |
+
#
|
20 |
+
# Frame processor
|
21 |
+
#
|
22 |
+
|
23 |
+
def _should_passthrough_frame(self, frame):
|
24 |
+
return isinstance(frame, SystemFrame)
|
25 |
+
|
26 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
27 |
+
passthrough = self._should_passthrough_frame(frame)
|
28 |
+
allowed = await self._filter(frame)
|
29 |
+
if passthrough or allowed:
|
30 |
+
await self.push_frame(frame, direction)
|
pipecat/processors/filters/wake_check_filter.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import re
|
8 |
+
import time
|
9 |
+
|
10 |
+
from enum import Enum
|
11 |
+
|
12 |
+
from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame
|
13 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
14 |
+
|
15 |
+
from loguru import logger
|
16 |
+
|
17 |
+
|
18 |
+
class WakeCheckFilter(FrameProcessor):
|
19 |
+
"""
|
20 |
+
This filter looks for wake phrases in the transcription frames and only passes through frames
|
21 |
+
after a wake phrase has been detected. It also has a keepalive timeout to allow for a brief
|
22 |
+
period of continued conversation after a wake phrase has been detected.
|
23 |
+
"""
|
24 |
+
class WakeState(Enum):
|
25 |
+
IDLE = 1
|
26 |
+
AWAKE = 2
|
27 |
+
|
28 |
+
class ParticipantState:
|
29 |
+
def __init__(self, participant_id: str):
|
30 |
+
self.participant_id = participant_id
|
31 |
+
self.state = WakeCheckFilter.WakeState.IDLE
|
32 |
+
self.wake_timer = 0.0
|
33 |
+
self.accumulator = ""
|
34 |
+
|
35 |
+
def __init__(self, wake_phrases: list[str], keepalive_timeout: float = 3):
|
36 |
+
super().__init__()
|
37 |
+
self._participant_states = {}
|
38 |
+
self._keepalive_timeout = keepalive_timeout
|
39 |
+
self._wake_patterns = []
|
40 |
+
for name in wake_phrases:
|
41 |
+
pattern = re.compile(r'\b' + r'\s*'.join(re.escape(word)
|
42 |
+
for word in name.split()) + r'\b', re.IGNORECASE)
|
43 |
+
self._wake_patterns.append(pattern)
|
44 |
+
|
45 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
46 |
+
await super().process_frame(frame, direction)
|
47 |
+
|
48 |
+
try:
|
49 |
+
if isinstance(frame, TranscriptionFrame):
|
50 |
+
p = self._participant_states.get(frame.user_id)
|
51 |
+
if p is None:
|
52 |
+
p = WakeCheckFilter.ParticipantState(frame.user_id)
|
53 |
+
self._participant_states[frame.user_id] = p
|
54 |
+
|
55 |
+
# If we have been AWAKE within the last keepalive_timeout seconds, pass
|
56 |
+
# the frame through
|
57 |
+
if p.state == WakeCheckFilter.WakeState.AWAKE:
|
58 |
+
if time.time() - p.wake_timer < self._keepalive_timeout:
|
59 |
+
logger.debug(
|
60 |
+
f"Wake phrase keepalive timeout has not expired. Pushing {frame}")
|
61 |
+
p.wake_timer = time.time()
|
62 |
+
await self.push_frame(frame)
|
63 |
+
return
|
64 |
+
else:
|
65 |
+
p.state = WakeCheckFilter.WakeState.IDLE
|
66 |
+
|
67 |
+
p.accumulator += frame.text
|
68 |
+
for pattern in self._wake_patterns:
|
69 |
+
match = pattern.search(p.accumulator)
|
70 |
+
if match:
|
71 |
+
logger.debug(f"Wake phrase triggered: {match.group()}")
|
72 |
+
# Found the wake word. Discard from the accumulator up to the start of the match
|
73 |
+
# and modify the frame in place.
|
74 |
+
p.state = WakeCheckFilter.WakeState.AWAKE
|
75 |
+
p.wake_timer = time.time()
|
76 |
+
frame.text = p.accumulator[match.start():]
|
77 |
+
p.accumulator = ""
|
78 |
+
await self.push_frame(frame)
|
79 |
+
else:
|
80 |
+
pass
|
81 |
+
else:
|
82 |
+
await self.push_frame(frame, direction)
|
83 |
+
except Exception as e:
|
84 |
+
error_msg = f"Error in wake word filter: {e}"
|
85 |
+
logger.exception(error_msg)
|
86 |
+
await self.push_error(ErrorFrame(error_msg))
|
pipecat/processors/frame_processor.py
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import asyncio
|
8 |
+
import time
|
9 |
+
|
10 |
+
from enum import Enum
|
11 |
+
|
12 |
+
from pipecat.frames.frames import ErrorFrame, Frame, MetricsFrame, StartFrame, StartInterruptionFrame, UserStoppedSpeakingFrame
|
13 |
+
from pipecat.utils.utils import obj_count, obj_id
|
14 |
+
|
15 |
+
from loguru import logger
|
16 |
+
|
17 |
+
|
18 |
+
class FrameDirection(Enum):
|
19 |
+
DOWNSTREAM = 1
|
20 |
+
UPSTREAM = 2
|
21 |
+
|
22 |
+
|
23 |
+
class FrameProcessorMetrics:
|
24 |
+
def __init__(self, name: str):
|
25 |
+
self._name = name
|
26 |
+
self._start_ttfb_time = 0
|
27 |
+
self._start_processing_time = 0
|
28 |
+
self._should_report_ttfb = True
|
29 |
+
|
30 |
+
async def start_ttfb_metrics(self, report_only_initial_ttfb):
|
31 |
+
if self._should_report_ttfb:
|
32 |
+
self._start_ttfb_time = time.time()
|
33 |
+
self._should_report_ttfb = not report_only_initial_ttfb
|
34 |
+
|
35 |
+
async def stop_ttfb_metrics(self):
|
36 |
+
if self._start_ttfb_time == 0:
|
37 |
+
return None
|
38 |
+
|
39 |
+
value = time.time() - self._start_ttfb_time
|
40 |
+
logger.debug(f"{self._name} TTFB: {value}")
|
41 |
+
ttfb = {
|
42 |
+
"processor": self._name,
|
43 |
+
"value": value
|
44 |
+
}
|
45 |
+
self._start_ttfb_time = 0
|
46 |
+
return MetricsFrame(ttfb=[ttfb])
|
47 |
+
|
48 |
+
async def start_processing_metrics(self):
|
49 |
+
self._start_processing_time = time.time()
|
50 |
+
|
51 |
+
async def stop_processing_metrics(self):
|
52 |
+
if self._start_processing_time == 0:
|
53 |
+
return None
|
54 |
+
|
55 |
+
value = time.time() - self._start_processing_time
|
56 |
+
logger.debug(f"{self._name} processing time: {value}")
|
57 |
+
processing = {
|
58 |
+
"processor": self._name,
|
59 |
+
"value": value
|
60 |
+
}
|
61 |
+
self._start_processing_time = 0
|
62 |
+
return MetricsFrame(processing=[processing])
|
63 |
+
|
64 |
+
|
65 |
+
class FrameProcessor:
|
66 |
+
|
67 |
+
def __init__(
|
68 |
+
self,
|
69 |
+
*,
|
70 |
+
name: str | None = None,
|
71 |
+
loop: asyncio.AbstractEventLoop | None = None,
|
72 |
+
**kwargs):
|
73 |
+
self.id: int = obj_id()
|
74 |
+
self.name = name or f"{self.__class__.__name__}#{obj_count(self)}"
|
75 |
+
self._prev: "FrameProcessor" | None = None
|
76 |
+
self._next: "FrameProcessor" | None = None
|
77 |
+
self._loop: asyncio.AbstractEventLoop = loop or asyncio.get_running_loop()
|
78 |
+
|
79 |
+
# Properties
|
80 |
+
self._allow_interruptions = False
|
81 |
+
self._enable_metrics = False
|
82 |
+
self._report_only_initial_ttfb = False
|
83 |
+
|
84 |
+
# Metrics
|
85 |
+
self._metrics = FrameProcessorMetrics(name=self.name)
|
86 |
+
|
87 |
+
@property
|
88 |
+
def interruptions_allowed(self):
|
89 |
+
return self._allow_interruptions
|
90 |
+
|
91 |
+
@property
|
92 |
+
def metrics_enabled(self):
|
93 |
+
return self._enable_metrics
|
94 |
+
|
95 |
+
@property
|
96 |
+
def report_only_initial_ttfb(self):
|
97 |
+
return self._report_only_initial_ttfb
|
98 |
+
|
99 |
+
def can_generate_metrics(self) -> bool:
|
100 |
+
return False
|
101 |
+
|
102 |
+
async def start_ttfb_metrics(self):
|
103 |
+
if self.can_generate_metrics() and self.metrics_enabled:
|
104 |
+
await self._metrics.start_ttfb_metrics(self._report_only_initial_ttfb)
|
105 |
+
|
106 |
+
async def stop_ttfb_metrics(self):
|
107 |
+
if self.can_generate_metrics() and self.metrics_enabled:
|
108 |
+
frame = await self._metrics.stop_ttfb_metrics()
|
109 |
+
if frame:
|
110 |
+
await self.push_frame(frame)
|
111 |
+
|
112 |
+
async def start_processing_metrics(self):
|
113 |
+
if self.can_generate_metrics() and self.metrics_enabled:
|
114 |
+
await self._metrics.start_processing_metrics()
|
115 |
+
|
116 |
+
async def stop_processing_metrics(self):
|
117 |
+
if self.can_generate_metrics() and self.metrics_enabled:
|
118 |
+
frame = await self._metrics.stop_processing_metrics()
|
119 |
+
if frame:
|
120 |
+
await self.push_frame(frame)
|
121 |
+
|
122 |
+
async def stop_all_metrics(self):
|
123 |
+
await self.stop_ttfb_metrics()
|
124 |
+
await self.stop_processing_metrics()
|
125 |
+
|
126 |
+
async def cleanup(self):
|
127 |
+
pass
|
128 |
+
|
129 |
+
def link(self, processor: 'FrameProcessor'):
|
130 |
+
self._next = processor
|
131 |
+
processor._prev = self
|
132 |
+
logger.debug(f"Linking {self} -> {self._next}")
|
133 |
+
|
134 |
+
def get_event_loop(self) -> asyncio.AbstractEventLoop:
|
135 |
+
return self._loop
|
136 |
+
|
137 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
138 |
+
if isinstance(frame, StartFrame):
|
139 |
+
self._allow_interruptions = frame.allow_interruptions
|
140 |
+
self._enable_metrics = frame.enable_metrics
|
141 |
+
self._report_only_initial_ttfb = frame.report_only_initial_ttfb
|
142 |
+
elif isinstance(frame, StartInterruptionFrame):
|
143 |
+
await self.stop_all_metrics()
|
144 |
+
elif isinstance(frame, UserStoppedSpeakingFrame):
|
145 |
+
self._should_report_ttfb = True
|
146 |
+
|
147 |
+
async def push_error(self, error: ErrorFrame):
|
148 |
+
await self.push_frame(error, FrameDirection.UPSTREAM)
|
149 |
+
|
150 |
+
async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM):
|
151 |
+
try:
|
152 |
+
if direction == FrameDirection.DOWNSTREAM and self._next:
|
153 |
+
logger.trace(f"Pushing {frame} from {self} to {self._next}")
|
154 |
+
await self._next.process_frame(frame, direction)
|
155 |
+
elif direction == FrameDirection.UPSTREAM and self._prev:
|
156 |
+
logger.trace(f"Pushing {frame} upstream from {self} to {self._prev}")
|
157 |
+
await self._prev.process_frame(frame, direction)
|
158 |
+
except Exception as e:
|
159 |
+
logger.exception(f"Uncaught exception in {self}: {e}")
|
160 |
+
|
161 |
+
def __str__(self):
|
162 |
+
return self.name
|
pipecat/processors/frameworks/__init__.py
ADDED
File without changes
|
pipecat/processors/frameworks/langchain.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from typing import Union
|
8 |
+
|
9 |
+
from pipecat.frames.frames import (
|
10 |
+
Frame,
|
11 |
+
LLMFullResponseEndFrame,
|
12 |
+
LLMFullResponseStartFrame,
|
13 |
+
LLMMessagesFrame,
|
14 |
+
LLMResponseEndFrame,
|
15 |
+
LLMResponseStartFrame,
|
16 |
+
TextFrame)
|
17 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
18 |
+
|
19 |
+
from loguru import logger
|
20 |
+
|
21 |
+
try:
|
22 |
+
from langchain_core.messages import AIMessageChunk
|
23 |
+
from langchain_core.runnables import Runnable
|
24 |
+
except ModuleNotFoundError as e:
|
25 |
+
logger.exception(
|
26 |
+
"In order to use Langchain, you need to `pip install pipecat-ai[langchain]`. "
|
27 |
+
)
|
28 |
+
raise Exception(f"Missing module: {e}")
|
29 |
+
|
30 |
+
|
31 |
+
class LangchainProcessor(FrameProcessor):
|
32 |
+
def __init__(self, chain: Runnable, transcript_key: str = "input"):
|
33 |
+
super().__init__()
|
34 |
+
self._chain = chain
|
35 |
+
self._transcript_key = transcript_key
|
36 |
+
self._participant_id: str | None = None
|
37 |
+
|
38 |
+
def set_participant_id(self, participant_id: str):
|
39 |
+
self._participant_id = participant_id
|
40 |
+
|
41 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
42 |
+
await super().process_frame(frame, direction)
|
43 |
+
|
44 |
+
if isinstance(frame, LLMMessagesFrame):
|
45 |
+
# Messages are accumulated by the `LLMUserResponseAggregator` in a list of messages.
|
46 |
+
# The last one by the human is the one we want to send to the LLM.
|
47 |
+
logger.debug(f"Got transcription frame {frame}")
|
48 |
+
text: str = frame.messages[-1]["content"]
|
49 |
+
|
50 |
+
await self._ainvoke(text.strip())
|
51 |
+
else:
|
52 |
+
await self.push_frame(frame, direction)
|
53 |
+
|
54 |
+
@staticmethod
|
55 |
+
def __get_token_value(text: Union[str, AIMessageChunk]) -> str:
|
56 |
+
match text:
|
57 |
+
case str():
|
58 |
+
return text
|
59 |
+
case AIMessageChunk():
|
60 |
+
return text.content
|
61 |
+
case _:
|
62 |
+
return ""
|
63 |
+
|
64 |
+
async def _ainvoke(self, text: str):
|
65 |
+
logger.debug(f"Invoking chain with {text}")
|
66 |
+
await self.push_frame(LLMFullResponseStartFrame())
|
67 |
+
try:
|
68 |
+
async for token in self._chain.astream(
|
69 |
+
{self._transcript_key: text},
|
70 |
+
config={"configurable": {"session_id": self._participant_id}},
|
71 |
+
):
|
72 |
+
await self.push_frame(LLMResponseStartFrame())
|
73 |
+
await self.push_frame(TextFrame(self.__get_token_value(token)))
|
74 |
+
await self.push_frame(LLMResponseEndFrame())
|
75 |
+
except GeneratorExit:
|
76 |
+
logger.warning(f"{self} generator was closed prematurely")
|
77 |
+
except Exception as e:
|
78 |
+
logger.exception(f"{self} an unknown error occurred: {e}")
|
79 |
+
finally:
|
80 |
+
await self.push_frame(LLMFullResponseEndFrame())
|
pipecat/processors/logger.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from pipecat.frames.frames import Frame
|
8 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
9 |
+
from loguru import logger
|
10 |
+
from typing import Optional
|
11 |
+
logger = logger.opt(ansi=True)
|
12 |
+
|
13 |
+
|
14 |
+
class FrameLogger(FrameProcessor):
|
15 |
+
def __init__(self, prefix="Frame", color: Optional[str] = None):
|
16 |
+
super().__init__()
|
17 |
+
self._prefix = prefix
|
18 |
+
self._color = color
|
19 |
+
|
20 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
21 |
+
dir = "<" if direction is FrameDirection.UPSTREAM else ">"
|
22 |
+
msg = f"{dir} {self._prefix}: {frame}"
|
23 |
+
if self._color:
|
24 |
+
msg = f"<{self._color}>{msg}</>"
|
25 |
+
logger.debug(msg)
|
26 |
+
|
27 |
+
await self.push_frame(frame, direction)
|
pipecat/processors/text_transformer.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from typing import Coroutine
|
8 |
+
|
9 |
+
from pipecat.frames.frames import Frame, TextFrame
|
10 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
11 |
+
|
12 |
+
|
13 |
+
class StatelessTextTransformer(FrameProcessor):
|
14 |
+
"""This processor calls the given function on any text in a text frame.
|
15 |
+
|
16 |
+
>>> async def print_frames(aggregator, frame):
|
17 |
+
... async for frame in aggregator.process_frame(frame):
|
18 |
+
... print(frame.text)
|
19 |
+
|
20 |
+
>>> aggregator = StatelessTextTransformer(lambda x: x.upper())
|
21 |
+
>>> asyncio.run(print_frames(aggregator, TextFrame("Hello")))
|
22 |
+
HELLO
|
23 |
+
"""
|
24 |
+
|
25 |
+
def __init__(self, transform_fn):
|
26 |
+
super().__init__()
|
27 |
+
self._transform_fn = transform_fn
|
28 |
+
|
29 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
30 |
+
await super().process_frame(frame, direction)
|
31 |
+
|
32 |
+
if isinstance(frame, TextFrame):
|
33 |
+
result = self._transform_fn(frame.text)
|
34 |
+
if isinstance(result, Coroutine):
|
35 |
+
result = await result
|
36 |
+
await self.push_frame(result)
|
37 |
+
else:
|
38 |
+
await self.push_frame(frame, direction)
|
pipecat/serializers/__init__.py
ADDED
File without changes
|
pipecat/serializers/base_serializer.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from abc import ABC, abstractmethod
|
8 |
+
|
9 |
+
from pipecat.frames.frames import Frame
|
10 |
+
|
11 |
+
|
12 |
+
class FrameSerializer(ABC):
|
13 |
+
|
14 |
+
@abstractmethod
|
15 |
+
def serialize(self, frame: Frame) -> str | bytes | None:
|
16 |
+
pass
|
17 |
+
|
18 |
+
@abstractmethod
|
19 |
+
def deserialize(self, data: str | bytes) -> Frame | None:
|
20 |
+
pass
|
pipecat/serializers/protobuf.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import dataclasses
|
8 |
+
|
9 |
+
import pipecat.frames.protobufs.frames_pb2 as frame_protos
|
10 |
+
|
11 |
+
from pipecat.frames.frames import AudioRawFrame, Frame, TextFrame, TranscriptionFrame
|
12 |
+
from pipecat.serializers.base_serializer import FrameSerializer
|
13 |
+
|
14 |
+
from loguru import logger
|
15 |
+
|
16 |
+
|
17 |
+
class ProtobufFrameSerializer(FrameSerializer):
|
18 |
+
SERIALIZABLE_TYPES = {
|
19 |
+
TextFrame: "text",
|
20 |
+
AudioRawFrame: "audio",
|
21 |
+
TranscriptionFrame: "transcription"
|
22 |
+
}
|
23 |
+
|
24 |
+
SERIALIZABLE_FIELDS = {v: k for k, v in SERIALIZABLE_TYPES.items()}
|
25 |
+
|
26 |
+
def __init__(self):
|
27 |
+
pass
|
28 |
+
|
29 |
+
def serialize(self, frame: Frame) -> str | bytes | None:
|
30 |
+
proto_frame = frame_protos.Frame()
|
31 |
+
if type(frame) not in self.SERIALIZABLE_TYPES:
|
32 |
+
raise ValueError(
|
33 |
+
f"Frame type {type(frame)} is not serializable. You may need to add it to ProtobufFrameSerializer.SERIALIZABLE_FIELDS.")
|
34 |
+
|
35 |
+
# ignoring linter errors; we check that type(frame) is in this dict above
|
36 |
+
proto_optional_name = self.SERIALIZABLE_TYPES[type(frame)] # type: ignore
|
37 |
+
for field in dataclasses.fields(frame): # type: ignore
|
38 |
+
setattr(getattr(proto_frame, proto_optional_name), field.name,
|
39 |
+
getattr(frame, field.name))
|
40 |
+
|
41 |
+
result = proto_frame.SerializeToString()
|
42 |
+
return result
|
43 |
+
|
44 |
+
def deserialize(self, data: str | bytes) -> Frame | None:
|
45 |
+
"""Returns a Frame object from a Frame protobuf. Used to convert frames
|
46 |
+
passed over the wire as protobufs to Frame objects used in pipelines
|
47 |
+
and frame processors.
|
48 |
+
|
49 |
+
>>> serializer = ProtobufFrameSerializer()
|
50 |
+
>>> serializer.deserialize(
|
51 |
+
... serializer.serialize(AudioFrame(data=b'1234567890')))
|
52 |
+
AudioFrame(data=b'1234567890')
|
53 |
+
|
54 |
+
>>> serializer.deserialize(
|
55 |
+
... serializer.serialize(TextFrame(text='hello world')))
|
56 |
+
TextFrame(text='hello world')
|
57 |
+
|
58 |
+
>>> serializer.deserialize(serializer.serialize(TranscriptionFrame(
|
59 |
+
... text="Hello there!", participantId="123", timestamp="2021-01-01")))
|
60 |
+
TranscriptionFrame(text='Hello there!', participantId='123', timestamp='2021-01-01')
|
61 |
+
"""
|
62 |
+
|
63 |
+
proto = frame_protos.Frame.FromString(data)
|
64 |
+
which = proto.WhichOneof("frame")
|
65 |
+
if which not in self.SERIALIZABLE_FIELDS:
|
66 |
+
logger.error("Unable to deserialize a valid frame")
|
67 |
+
return None
|
68 |
+
|
69 |
+
class_name = self.SERIALIZABLE_FIELDS[which]
|
70 |
+
args = getattr(proto, which)
|
71 |
+
args_dict = {}
|
72 |
+
for field in proto.DESCRIPTOR.fields_by_name[which].message_type.fields:
|
73 |
+
args_dict[field.name] = getattr(args, field.name)
|
74 |
+
|
75 |
+
# Remove special fields if needed
|
76 |
+
id = getattr(args, "id")
|
77 |
+
name = getattr(args, "name")
|
78 |
+
if not id:
|
79 |
+
del args_dict["id"]
|
80 |
+
if not name:
|
81 |
+
del args_dict["name"]
|
82 |
+
|
83 |
+
# Create the instance
|
84 |
+
instance = class_name(**args_dict)
|
85 |
+
|
86 |
+
# Set special fields
|
87 |
+
if id:
|
88 |
+
setattr(instance, "id", getattr(args, "id"))
|
89 |
+
if name:
|
90 |
+
setattr(instance, "name", getattr(args, "name"))
|
91 |
+
|
92 |
+
return instance
|
pipecat/serializers/twilio.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import base64
|
8 |
+
import json
|
9 |
+
|
10 |
+
from pipecat.frames.frames import AudioRawFrame, Frame
|
11 |
+
from pipecat.serializers.base_serializer import FrameSerializer
|
12 |
+
from pipecat.utils.audio import ulaw_8000_to_pcm_16000, pcm_16000_to_ulaw_8000
|
13 |
+
|
14 |
+
|
15 |
+
class TwilioFrameSerializer(FrameSerializer):
|
16 |
+
SERIALIZABLE_TYPES = {
|
17 |
+
AudioRawFrame: "audio",
|
18 |
+
}
|
19 |
+
|
20 |
+
def __init__(self, stream_sid: str):
|
21 |
+
self._stream_sid = stream_sid
|
22 |
+
|
23 |
+
def serialize(self, frame: Frame) -> str | bytes | None:
|
24 |
+
if not isinstance(frame, AudioRawFrame):
|
25 |
+
return None
|
26 |
+
|
27 |
+
data = frame.audio
|
28 |
+
|
29 |
+
serialized_data = pcm_16000_to_ulaw_8000(data)
|
30 |
+
payload = base64.b64encode(serialized_data).decode("utf-8")
|
31 |
+
answer = {
|
32 |
+
"event": "media",
|
33 |
+
"streamSid": self._stream_sid,
|
34 |
+
"media": {
|
35 |
+
"payload": payload
|
36 |
+
}
|
37 |
+
}
|
38 |
+
|
39 |
+
return json.dumps(answer)
|
40 |
+
|
41 |
+
def deserialize(self, data: str | bytes) -> Frame | None:
|
42 |
+
message = json.loads(data)
|
43 |
+
|
44 |
+
if message["event"] != "media":
|
45 |
+
return None
|
46 |
+
else:
|
47 |
+
payload_base64 = message["media"]["payload"]
|
48 |
+
payload = base64.b64decode(payload_base64)
|
49 |
+
|
50 |
+
deserialized_data = ulaw_8000_to_pcm_16000(payload)
|
51 |
+
audio_frame = AudioRawFrame(audio=deserialized_data, num_channels=1, sample_rate=16000)
|
52 |
+
return audio_frame
|
pipecat/services/__init__.py
ADDED
File without changes
|
pipecat/services/ai_services.py
ADDED
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import io
|
8 |
+
import wave
|
9 |
+
|
10 |
+
from abc import abstractmethod
|
11 |
+
from typing import AsyncGenerator
|
12 |
+
|
13 |
+
from pipecat.frames.frames import (
|
14 |
+
AudioRawFrame,
|
15 |
+
CancelFrame,
|
16 |
+
EndFrame,
|
17 |
+
ErrorFrame,
|
18 |
+
Frame,
|
19 |
+
LLMFullResponseEndFrame,
|
20 |
+
StartFrame,
|
21 |
+
StartInterruptionFrame,
|
22 |
+
TTSStartedFrame,
|
23 |
+
TTSStoppedFrame,
|
24 |
+
TextFrame,
|
25 |
+
VisionImageRawFrame,
|
26 |
+
)
|
27 |
+
from pipecat.processors.async_frame_processor import AsyncFrameProcessor
|
28 |
+
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
29 |
+
from pipecat.utils.audio import calculate_audio_volume
|
30 |
+
from pipecat.utils.utils import exp_smoothing
|
31 |
+
|
32 |
+
|
33 |
+
class AIService(FrameProcessor):
|
34 |
+
def __init__(self, **kwargs):
|
35 |
+
super().__init__(**kwargs)
|
36 |
+
|
37 |
+
async def start(self, frame: StartFrame):
|
38 |
+
pass
|
39 |
+
|
40 |
+
async def stop(self, frame: EndFrame):
|
41 |
+
pass
|
42 |
+
|
43 |
+
async def cancel(self, frame: CancelFrame):
|
44 |
+
pass
|
45 |
+
|
46 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
47 |
+
await super().process_frame(frame, direction)
|
48 |
+
|
49 |
+
if isinstance(frame, StartFrame):
|
50 |
+
await self.start(frame)
|
51 |
+
elif isinstance(frame, CancelFrame):
|
52 |
+
await self.cancel(frame)
|
53 |
+
elif isinstance(frame, EndFrame):
|
54 |
+
await self.stop(frame)
|
55 |
+
|
56 |
+
async def process_generator(self, generator: AsyncGenerator[Frame, None]):
|
57 |
+
async for f in generator:
|
58 |
+
if isinstance(f, ErrorFrame):
|
59 |
+
await self.push_error(f)
|
60 |
+
else:
|
61 |
+
await self.push_frame(f)
|
62 |
+
|
63 |
+
|
64 |
+
class AsyncAIService(AsyncFrameProcessor):
|
65 |
+
def __init__(self, **kwargs):
|
66 |
+
super().__init__(**kwargs)
|
67 |
+
|
68 |
+
async def start(self, frame: StartFrame):
|
69 |
+
pass
|
70 |
+
|
71 |
+
async def stop(self, frame: EndFrame):
|
72 |
+
pass
|
73 |
+
|
74 |
+
async def cancel(self, frame: CancelFrame):
|
75 |
+
pass
|
76 |
+
|
77 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
78 |
+
await super().process_frame(frame, direction)
|
79 |
+
|
80 |
+
if isinstance(frame, StartFrame):
|
81 |
+
await self.start(frame)
|
82 |
+
elif isinstance(frame, CancelFrame):
|
83 |
+
await self.cancel(frame)
|
84 |
+
elif isinstance(frame, EndFrame):
|
85 |
+
await self.stop(frame)
|
86 |
+
|
87 |
+
|
88 |
+
class LLMService(AIService):
|
89 |
+
"""This class is a no-op but serves as a base class for LLM services."""
|
90 |
+
|
91 |
+
def __init__(self, **kwargs):
|
92 |
+
super().__init__(**kwargs)
|
93 |
+
self._callbacks = {}
|
94 |
+
self._start_callbacks = {}
|
95 |
+
|
96 |
+
# TODO-CB: callback function type
|
97 |
+
def register_function(self, function_name: str, callback, start_callback=None):
|
98 |
+
self._callbacks[function_name] = callback
|
99 |
+
if start_callback:
|
100 |
+
self._start_callbacks[function_name] = start_callback
|
101 |
+
|
102 |
+
def unregister_function(self, function_name: str):
|
103 |
+
del self._callbacks[function_name]
|
104 |
+
if self._start_callbacks[function_name]:
|
105 |
+
del self._start_callbacks[function_name]
|
106 |
+
|
107 |
+
def has_function(self, function_name: str):
|
108 |
+
return function_name in self._callbacks.keys()
|
109 |
+
|
110 |
+
async def call_function(self, function_name: str, args):
|
111 |
+
if function_name in self._callbacks.keys():
|
112 |
+
return await self._callbacks[function_name](self, args)
|
113 |
+
return None
|
114 |
+
|
115 |
+
async def call_start_function(self, function_name: str):
|
116 |
+
if function_name in self._start_callbacks.keys():
|
117 |
+
await self._start_callbacks[function_name](self)
|
118 |
+
|
119 |
+
|
120 |
+
class TTSService(AIService):
|
121 |
+
def __init__(self, *, aggregate_sentences: bool = True, **kwargs):
|
122 |
+
super().__init__(**kwargs)
|
123 |
+
self._aggregate_sentences: bool = aggregate_sentences
|
124 |
+
self._current_sentence: str = ""
|
125 |
+
|
126 |
+
# Converts the text to audio.
|
127 |
+
@abstractmethod
|
128 |
+
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
129 |
+
pass
|
130 |
+
|
131 |
+
async def say(self, text: str):
|
132 |
+
await self.process_frame(TextFrame(text=text), FrameDirection.DOWNSTREAM)
|
133 |
+
|
134 |
+
async def _process_text_frame(self, frame: TextFrame):
|
135 |
+
text: str | None = None
|
136 |
+
if not self._aggregate_sentences:
|
137 |
+
text = frame.text
|
138 |
+
else:
|
139 |
+
self._current_sentence += frame.text
|
140 |
+
if self._current_sentence.strip().endswith(
|
141 |
+
(".", "?", "!")) and not self._current_sentence.strip().endswith(
|
142 |
+
("Mr,", "Mrs.", "Ms.", "Dr.")):
|
143 |
+
text = self._current_sentence
|
144 |
+
self._current_sentence = ""
|
145 |
+
|
146 |
+
if text:
|
147 |
+
await self._push_tts_frames(text)
|
148 |
+
|
149 |
+
async def _push_tts_frames(self, text: str):
|
150 |
+
text = text.strip()
|
151 |
+
if not text:
|
152 |
+
return
|
153 |
+
|
154 |
+
await self.push_frame(TTSStartedFrame())
|
155 |
+
await self.start_processing_metrics()
|
156 |
+
await self.process_generator(self.run_tts(text))
|
157 |
+
await self.stop_processing_metrics()
|
158 |
+
await self.push_frame(TTSStoppedFrame())
|
159 |
+
# We send the original text after the audio. This way, if we are
|
160 |
+
# interrupted, the text is not added to the assistant context.
|
161 |
+
await self.push_frame(TextFrame(text))
|
162 |
+
|
163 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
164 |
+
await super().process_frame(frame, direction)
|
165 |
+
|
166 |
+
if isinstance(frame, TextFrame):
|
167 |
+
await self._process_text_frame(frame)
|
168 |
+
elif isinstance(frame, StartInterruptionFrame):
|
169 |
+
self._current_sentence = ""
|
170 |
+
await self.push_frame(frame, direction)
|
171 |
+
elif isinstance(frame, LLMFullResponseEndFrame) or isinstance(frame, EndFrame):
|
172 |
+
self._current_sentence = ""
|
173 |
+
await self._push_tts_frames(self._current_sentence)
|
174 |
+
await self.push_frame(frame)
|
175 |
+
else:
|
176 |
+
await self.push_frame(frame, direction)
|
177 |
+
|
178 |
+
|
179 |
+
class STTService(AIService):
|
180 |
+
"""STTService is a base class for speech-to-text services."""
|
181 |
+
|
182 |
+
def __init__(self,
|
183 |
+
*,
|
184 |
+
min_volume: float = 0.6,
|
185 |
+
max_silence_secs: float = 0.3,
|
186 |
+
max_buffer_secs: float = 1.5,
|
187 |
+
sample_rate: int = 16000,
|
188 |
+
num_channels: int = 1,
|
189 |
+
**kwargs):
|
190 |
+
super().__init__(**kwargs)
|
191 |
+
self._min_volume = min_volume
|
192 |
+
self._max_silence_secs = max_silence_secs
|
193 |
+
self._max_buffer_secs = max_buffer_secs
|
194 |
+
self._sample_rate = sample_rate
|
195 |
+
self._num_channels = num_channels
|
196 |
+
(self._content, self._wave) = self._new_wave()
|
197 |
+
self._silence_num_frames = 0
|
198 |
+
# Volume exponential smoothing
|
199 |
+
self._smoothing_factor = 0.2
|
200 |
+
self._prev_volume = 0
|
201 |
+
|
202 |
+
@abstractmethod
|
203 |
+
async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
|
204 |
+
"""Returns transcript as a string"""
|
205 |
+
pass
|
206 |
+
|
207 |
+
def _new_wave(self):
|
208 |
+
content = io.BytesIO()
|
209 |
+
ww = wave.open(content, "wb")
|
210 |
+
ww.setsampwidth(2)
|
211 |
+
ww.setnchannels(self._num_channels)
|
212 |
+
ww.setframerate(self._sample_rate)
|
213 |
+
return (content, ww)
|
214 |
+
|
215 |
+
def _get_smoothed_volume(self, frame: AudioRawFrame) -> float:
|
216 |
+
volume = calculate_audio_volume(frame.audio, frame.sample_rate)
|
217 |
+
return exp_smoothing(volume, self._prev_volume, self._smoothing_factor)
|
218 |
+
|
219 |
+
async def _append_audio(self, frame: AudioRawFrame):
|
220 |
+
# Try to filter out empty background noise
|
221 |
+
volume = self._get_smoothed_volume(frame)
|
222 |
+
if volume >= self._min_volume:
|
223 |
+
# If volume is high enough, write new data to wave file
|
224 |
+
self._wave.writeframes(frame.audio)
|
225 |
+
self._silence_num_frames = 0
|
226 |
+
else:
|
227 |
+
self._silence_num_frames += frame.num_frames
|
228 |
+
self._prev_volume = volume
|
229 |
+
|
230 |
+
# If buffer is not empty and we have enough data or there's been a long
|
231 |
+
# silence, transcribe the audio gathered so far.
|
232 |
+
silence_secs = self._silence_num_frames / self._sample_rate
|
233 |
+
buffer_secs = self._wave.getnframes() / self._sample_rate
|
234 |
+
if self._content.tell() > 0 and (
|
235 |
+
buffer_secs > self._max_buffer_secs or silence_secs > self._max_silence_secs):
|
236 |
+
self._silence_num_frames = 0
|
237 |
+
self._wave.close()
|
238 |
+
self._content.seek(0)
|
239 |
+
await self.start_processing_metrics()
|
240 |
+
await self.process_generator(self.run_stt(self._content.read()))
|
241 |
+
await self.stop_processing_metrics()
|
242 |
+
(self._content, self._wave) = self._new_wave()
|
243 |
+
|
244 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
245 |
+
"""Processes a frame of audio data, either buffering or transcribing it."""
|
246 |
+
await super().process_frame(frame, direction)
|
247 |
+
|
248 |
+
if isinstance(frame, CancelFrame) or isinstance(frame, EndFrame):
|
249 |
+
self._wave.close()
|
250 |
+
await self.push_frame(frame, direction)
|
251 |
+
elif isinstance(frame, AudioRawFrame):
|
252 |
+
# In this service we accumulate audio internally and at the end we
|
253 |
+
# push a TextFrame. We don't really want to push audio frames down.
|
254 |
+
await self._append_audio(frame)
|
255 |
+
else:
|
256 |
+
await self.push_frame(frame, direction)
|
257 |
+
|
258 |
+
|
259 |
+
class ImageGenService(AIService):
|
260 |
+
|
261 |
+
def __init__(self, **kwargs):
|
262 |
+
super().__init__(**kwargs)
|
263 |
+
|
264 |
+
# Renders the image. Returns an Image object.
|
265 |
+
@abstractmethod
|
266 |
+
async def run_image_gen(self, prompt: str) -> AsyncGenerator[Frame, None]:
|
267 |
+
pass
|
268 |
+
|
269 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
270 |
+
await super().process_frame(frame, direction)
|
271 |
+
|
272 |
+
if isinstance(frame, TextFrame):
|
273 |
+
await self.push_frame(frame, direction)
|
274 |
+
await self.start_processing_metrics()
|
275 |
+
await self.process_generator(self.run_image_gen(frame.text))
|
276 |
+
await self.stop_processing_metrics()
|
277 |
+
else:
|
278 |
+
await self.push_frame(frame, direction)
|
279 |
+
|
280 |
+
|
281 |
+
class VisionService(AIService):
|
282 |
+
"""VisionService is a base class for vision services."""
|
283 |
+
|
284 |
+
def __init__(self, **kwargs):
|
285 |
+
super().__init__(**kwargs)
|
286 |
+
self._describe_text = None
|
287 |
+
|
288 |
+
@abstractmethod
|
289 |
+
async def run_vision(self, frame: VisionImageRawFrame) -> AsyncGenerator[Frame, None]:
|
290 |
+
pass
|
291 |
+
|
292 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
293 |
+
await super().process_frame(frame, direction)
|
294 |
+
|
295 |
+
if isinstance(frame, VisionImageRawFrame):
|
296 |
+
await self.start_processing_metrics()
|
297 |
+
await self.process_generator(self.run_vision(frame))
|
298 |
+
await self.stop_processing_metrics()
|
299 |
+
else:
|
300 |
+
await self.push_frame(frame, direction)
|
pipecat/services/anthropic.py
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import base64
|
8 |
+
|
9 |
+
from pipecat.frames.frames import (
|
10 |
+
Frame,
|
11 |
+
TextFrame,
|
12 |
+
VisionImageRawFrame,
|
13 |
+
LLMMessagesFrame,
|
14 |
+
LLMFullResponseStartFrame,
|
15 |
+
LLMResponseStartFrame,
|
16 |
+
LLMResponseEndFrame,
|
17 |
+
LLMFullResponseEndFrame
|
18 |
+
)
|
19 |
+
from pipecat.processors.frame_processor import FrameDirection
|
20 |
+
from pipecat.services.ai_services import LLMService
|
21 |
+
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext, OpenAILLMContextFrame
|
22 |
+
|
23 |
+
from loguru import logger
|
24 |
+
|
25 |
+
try:
|
26 |
+
from anthropic import AsyncAnthropic
|
27 |
+
except ModuleNotFoundError as e:
|
28 |
+
logger.error(f"Exception: {e}")
|
29 |
+
logger.error(
|
30 |
+
"In order to use Anthropic, you need to `pip install pipecat-ai[anthropic]`. Also, set `ANTHROPIC_API_KEY` environment variable.")
|
31 |
+
raise Exception(f"Missing module: {e}")
|
32 |
+
|
33 |
+
|
34 |
+
class AnthropicLLMService(LLMService):
|
35 |
+
"""This class implements inference with Anthropic's AI models
|
36 |
+
|
37 |
+
This service translates internally from OpenAILLMContext to the messages format
|
38 |
+
expected by the Anthropic Python SDK. We are using the OpenAILLMContext as a lingua
|
39 |
+
franca for all LLM services, so that it is easy to switch between different LLMs.
|
40 |
+
"""
|
41 |
+
|
42 |
+
def __init__(
|
43 |
+
self,
|
44 |
+
*,
|
45 |
+
api_key: str,
|
46 |
+
model: str = "claude-3-opus-20240229",
|
47 |
+
max_tokens: int = 1024):
|
48 |
+
super().__init__()
|
49 |
+
self._client = AsyncAnthropic(api_key=api_key)
|
50 |
+
self._model = model
|
51 |
+
self._max_tokens = max_tokens
|
52 |
+
|
53 |
+
def can_generate_metrics(self) -> bool:
|
54 |
+
return True
|
55 |
+
|
56 |
+
def _get_messages_from_openai_context(
|
57 |
+
self, context: OpenAILLMContext):
|
58 |
+
openai_messages = context.get_messages()
|
59 |
+
anthropic_messages = []
|
60 |
+
|
61 |
+
for message in openai_messages:
|
62 |
+
role = message["role"]
|
63 |
+
text = message["content"]
|
64 |
+
if role == "system":
|
65 |
+
role = "user"
|
66 |
+
if message.get("mime_type") == "image/jpeg":
|
67 |
+
# vision frame
|
68 |
+
encoded_image = base64.b64encode(message["data"].getvalue()).decode("utf-8")
|
69 |
+
anthropic_messages.append({
|
70 |
+
"role": role,
|
71 |
+
"content": [{
|
72 |
+
"type": "image",
|
73 |
+
"source": {
|
74 |
+
"type": "base64",
|
75 |
+
"media_type": message.get("mime_type"),
|
76 |
+
"data": encoded_image,
|
77 |
+
}
|
78 |
+
}, {
|
79 |
+
"type": "text",
|
80 |
+
"text": text
|
81 |
+
}]
|
82 |
+
})
|
83 |
+
else:
|
84 |
+
# Text frame. Anthropic needs the roles to alternate. This will
|
85 |
+
# cause an issue with interruptions. So, if we detect we are the
|
86 |
+
# ones asking again it probably means we were interrupted.
|
87 |
+
if role == "user" and len(anthropic_messages) > 1:
|
88 |
+
last_message = anthropic_messages[-1]
|
89 |
+
if last_message["role"] == "user":
|
90 |
+
anthropic_messages = anthropic_messages[:-1]
|
91 |
+
content = last_message["content"]
|
92 |
+
anthropic_messages.append(
|
93 |
+
{"role": "user", "content": f"Sorry, I just asked you about [{content}] but now I would like to know [{text}]."})
|
94 |
+
else:
|
95 |
+
anthropic_messages.append({"role": role, "content": text})
|
96 |
+
else:
|
97 |
+
anthropic_messages.append({"role": role, "content": text})
|
98 |
+
|
99 |
+
return anthropic_messages
|
100 |
+
|
101 |
+
async def _process_context(self, context: OpenAILLMContext):
|
102 |
+
await self.push_frame(LLMFullResponseStartFrame())
|
103 |
+
try:
|
104 |
+
logger.debug(f"Generating chat: {context.get_messages_json()}")
|
105 |
+
|
106 |
+
messages = self._get_messages_from_openai_context(context)
|
107 |
+
|
108 |
+
await self.start_ttfb_metrics()
|
109 |
+
|
110 |
+
response = await self._client.messages.create(
|
111 |
+
messages=messages,
|
112 |
+
model=self._model,
|
113 |
+
max_tokens=self._max_tokens,
|
114 |
+
stream=True)
|
115 |
+
|
116 |
+
await self.stop_ttfb_metrics()
|
117 |
+
|
118 |
+
async for event in response:
|
119 |
+
# logger.debug(f"Anthropic LLM event: {event}")
|
120 |
+
if (event.type == "content_block_delta"):
|
121 |
+
await self.push_frame(LLMResponseStartFrame())
|
122 |
+
await self.push_frame(TextFrame(event.delta.text))
|
123 |
+
await self.push_frame(LLMResponseEndFrame())
|
124 |
+
|
125 |
+
except Exception as e:
|
126 |
+
logger.exception(f"{self} exception: {e}")
|
127 |
+
finally:
|
128 |
+
await self.push_frame(LLMFullResponseEndFrame())
|
129 |
+
|
130 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
131 |
+
await super().process_frame(frame, direction)
|
132 |
+
|
133 |
+
context = None
|
134 |
+
|
135 |
+
if isinstance(frame, OpenAILLMContextFrame):
|
136 |
+
context: OpenAILLMContext = frame.context
|
137 |
+
elif isinstance(frame, LLMMessagesFrame):
|
138 |
+
context = OpenAILLMContext.from_messages(frame.messages)
|
139 |
+
elif isinstance(frame, VisionImageRawFrame):
|
140 |
+
context = OpenAILLMContext.from_image_frame(frame)
|
141 |
+
else:
|
142 |
+
await self.push_frame(frame, direction)
|
143 |
+
|
144 |
+
if context:
|
145 |
+
await self._process_context(context)
|
pipecat/services/azure.py
ADDED
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import aiohttp
|
8 |
+
import asyncio
|
9 |
+
import io
|
10 |
+
import time
|
11 |
+
|
12 |
+
from PIL import Image
|
13 |
+
from typing import AsyncGenerator
|
14 |
+
|
15 |
+
from pipecat.frames.frames import (
|
16 |
+
AudioRawFrame,
|
17 |
+
CancelFrame,
|
18 |
+
EndFrame,
|
19 |
+
ErrorFrame,
|
20 |
+
Frame,
|
21 |
+
StartFrame,
|
22 |
+
StartInterruptionFrame,
|
23 |
+
SystemFrame,
|
24 |
+
TranscriptionFrame,
|
25 |
+
URLImageRawFrame)
|
26 |
+
from pipecat.processors.frame_processor import FrameDirection
|
27 |
+
from pipecat.services.ai_services import AIService, AsyncAIService, TTSService, ImageGenService
|
28 |
+
from pipecat.services.openai import BaseOpenAILLMService
|
29 |
+
|
30 |
+
from loguru import logger
|
31 |
+
|
32 |
+
# See .env.example for Azure configuration needed
|
33 |
+
try:
|
34 |
+
from openai import AsyncAzureOpenAI
|
35 |
+
from azure.cognitiveservices.speech import (
|
36 |
+
SpeechConfig,
|
37 |
+
SpeechRecognizer,
|
38 |
+
SpeechSynthesizer,
|
39 |
+
ResultReason,
|
40 |
+
CancellationReason,
|
41 |
+
)
|
42 |
+
from azure.cognitiveservices.speech.audio import AudioStreamFormat, PushAudioInputStream
|
43 |
+
from azure.cognitiveservices.speech.dialog import AudioConfig
|
44 |
+
except ModuleNotFoundError as e:
|
45 |
+
logger.error(f"Exception: {e}")
|
46 |
+
logger.error(
|
47 |
+
"In order to use Azure, you need to `pip install pipecat-ai[azure]`. Also, set `AZURE_SPEECH_API_KEY` and `AZURE_SPEECH_REGION` environment variables.")
|
48 |
+
raise Exception(f"Missing module: {e}")
|
49 |
+
|
50 |
+
|
51 |
+
class AzureLLMService(BaseOpenAILLMService):
|
52 |
+
def __init__(
|
53 |
+
self,
|
54 |
+
*,
|
55 |
+
api_key: str,
|
56 |
+
endpoint: str,
|
57 |
+
model: str,
|
58 |
+
api_version: str = "2023-12-01-preview"):
|
59 |
+
# Initialize variables before calling parent __init__() because that
|
60 |
+
# will call create_client() and we need those values there.
|
61 |
+
self._endpoint = endpoint
|
62 |
+
self._api_version = api_version
|
63 |
+
super().__init__(api_key=api_key, model=model)
|
64 |
+
|
65 |
+
def create_client(self, api_key=None, base_url=None, **kwargs):
|
66 |
+
return AsyncAzureOpenAI(
|
67 |
+
api_key=api_key,
|
68 |
+
azure_endpoint=self._endpoint,
|
69 |
+
api_version=self._api_version,
|
70 |
+
)
|
71 |
+
|
72 |
+
|
73 |
+
class AzureTTSService(TTSService):
|
74 |
+
def __init__(self, *, api_key: str, region: str, voice="en-US-SaraNeural", **kwargs):
|
75 |
+
super().__init__(**kwargs)
|
76 |
+
|
77 |
+
speech_config = SpeechConfig(subscription=api_key, region=region)
|
78 |
+
self._speech_synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None)
|
79 |
+
|
80 |
+
self._voice = voice
|
81 |
+
|
82 |
+
def can_generate_metrics(self) -> bool:
|
83 |
+
return True
|
84 |
+
|
85 |
+
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
86 |
+
logger.debug(f"Generating TTS: {text}")
|
87 |
+
|
88 |
+
await self.start_ttfb_metrics()
|
89 |
+
|
90 |
+
ssml = (
|
91 |
+
"<speak version='1.0' xml:lang='en-US' xmlns='http://www.w3.org/2001/10/synthesis' "
|
92 |
+
"xmlns:mstts='http://www.w3.org/2001/mstts'>"
|
93 |
+
f"<voice name='{self._voice}'>"
|
94 |
+
"<mstts:silence type='Sentenceboundary' value='20ms' />"
|
95 |
+
"<mstts:express-as style='lyrical' styledegree='2' role='SeniorFemale'>"
|
96 |
+
"<prosody rate='1.05'>"
|
97 |
+
f"{text}"
|
98 |
+
"</prosody></mstts:express-as></voice></speak> ")
|
99 |
+
|
100 |
+
result = await asyncio.to_thread(self._speech_synthesizer.speak_ssml, (ssml))
|
101 |
+
|
102 |
+
if result.reason == ResultReason.SynthesizingAudioCompleted:
|
103 |
+
await self.stop_ttfb_metrics()
|
104 |
+
# Azure always sends a 44-byte header. Strip it off.
|
105 |
+
yield AudioRawFrame(audio=result.audio_data[44:], sample_rate=16000, num_channels=1)
|
106 |
+
elif result.reason == ResultReason.Canceled:
|
107 |
+
cancellation_details = result.cancellation_details
|
108 |
+
logger.warning(f"Speech synthesis canceled: {cancellation_details.reason}")
|
109 |
+
if cancellation_details.reason == CancellationReason.Error:
|
110 |
+
logger.error(f"{self} error: {cancellation_details.error_details}")
|
111 |
+
|
112 |
+
|
113 |
+
class AzureSTTService(AsyncAIService):
|
114 |
+
def __init__(
|
115 |
+
self,
|
116 |
+
*,
|
117 |
+
api_key: str,
|
118 |
+
region: str,
|
119 |
+
language="en-US",
|
120 |
+
sample_rate=16000,
|
121 |
+
channels=1,
|
122 |
+
**kwargs):
|
123 |
+
super().__init__(**kwargs)
|
124 |
+
|
125 |
+
speech_config = SpeechConfig(subscription=api_key, region=region)
|
126 |
+
speech_config.speech_recognition_language = language
|
127 |
+
|
128 |
+
stream_format = AudioStreamFormat(samples_per_second=sample_rate, channels=channels)
|
129 |
+
self._audio_stream = PushAudioInputStream(stream_format)
|
130 |
+
|
131 |
+
audio_config = AudioConfig(stream=self._audio_stream)
|
132 |
+
self._speech_recognizer = SpeechRecognizer(
|
133 |
+
speech_config=speech_config, audio_config=audio_config)
|
134 |
+
self._speech_recognizer.recognized.connect(self._on_handle_recognized)
|
135 |
+
|
136 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
137 |
+
await super().process_frame(frame, direction)
|
138 |
+
|
139 |
+
if isinstance(frame, SystemFrame):
|
140 |
+
await self.push_frame(frame, direction)
|
141 |
+
elif isinstance(frame, AudioRawFrame):
|
142 |
+
self._audio_stream.write(frame.audio)
|
143 |
+
else:
|
144 |
+
await self._push_queue.put((frame, direction))
|
145 |
+
|
146 |
+
async def start(self, frame: StartFrame):
|
147 |
+
self._speech_recognizer.start_continuous_recognition_async()
|
148 |
+
|
149 |
+
async def stop(self, frame: EndFrame):
|
150 |
+
self._speech_recognizer.stop_continuous_recognition_async()
|
151 |
+
|
152 |
+
async def cancel(self, frame: CancelFrame):
|
153 |
+
self._speech_recognizer.stop_continuous_recognition_async()
|
154 |
+
|
155 |
+
def _on_handle_recognized(self, event):
|
156 |
+
if event.result.reason == ResultReason.RecognizedSpeech and len(event.result.text) > 0:
|
157 |
+
frame = TranscriptionFrame(event.result.text, "", int(time.time_ns() / 1000000))
|
158 |
+
asyncio.run_coroutine_threadsafe(self.queue_frame(frame), self.get_event_loop())
|
159 |
+
|
160 |
+
|
161 |
+
class AzureImageGenServiceREST(ImageGenService):
|
162 |
+
|
163 |
+
def __init__(
|
164 |
+
self,
|
165 |
+
*,
|
166 |
+
aiohttp_session: aiohttp.ClientSession,
|
167 |
+
image_size: str,
|
168 |
+
api_key: str,
|
169 |
+
endpoint: str,
|
170 |
+
model: str,
|
171 |
+
api_version="2023-06-01-preview",
|
172 |
+
):
|
173 |
+
super().__init__()
|
174 |
+
|
175 |
+
self._api_key = api_key
|
176 |
+
self._azure_endpoint = endpoint
|
177 |
+
self._api_version = api_version
|
178 |
+
self._model = model
|
179 |
+
self._aiohttp_session = aiohttp_session
|
180 |
+
self._image_size = image_size
|
181 |
+
|
182 |
+
async def run_image_gen(self, prompt: str) -> AsyncGenerator[Frame, None]:
|
183 |
+
url = f"{self._azure_endpoint}openai/images/generations:submit?api-version={self._api_version}"
|
184 |
+
|
185 |
+
headers = {
|
186 |
+
"api-key": self._api_key,
|
187 |
+
"Content-Type": "application/json"}
|
188 |
+
|
189 |
+
body = {
|
190 |
+
# Enter your prompt text here
|
191 |
+
"prompt": prompt,
|
192 |
+
"size": self._image_size,
|
193 |
+
"n": 1,
|
194 |
+
}
|
195 |
+
|
196 |
+
async with self._aiohttp_session.post(url, headers=headers, json=body) as submission:
|
197 |
+
# We never get past this line, because this header isn't
|
198 |
+
# defined on a 429 response, but something is eating our
|
199 |
+
# exceptions!
|
200 |
+
operation_location = submission.headers["operation-location"]
|
201 |
+
status = ""
|
202 |
+
attempts_left = 120
|
203 |
+
json_response = None
|
204 |
+
while status != "succeeded":
|
205 |
+
attempts_left -= 1
|
206 |
+
if attempts_left == 0:
|
207 |
+
logger.error(f"{self} error: image generation timed out")
|
208 |
+
yield ErrorFrame("Image generation timed out")
|
209 |
+
return
|
210 |
+
|
211 |
+
await asyncio.sleep(1)
|
212 |
+
|
213 |
+
response = await self._aiohttp_session.get(operation_location, headers=headers)
|
214 |
+
|
215 |
+
json_response = await response.json()
|
216 |
+
status = json_response["status"]
|
217 |
+
|
218 |
+
image_url = json_response["result"]["data"][0]["url"] if json_response else None
|
219 |
+
if not image_url:
|
220 |
+
logger.error(f"{self} error: image generation failed")
|
221 |
+
yield ErrorFrame("Image generation failed")
|
222 |
+
return
|
223 |
+
|
224 |
+
# Load the image from the url
|
225 |
+
async with self._aiohttp_session.get(image_url) as response:
|
226 |
+
image_stream = io.BytesIO(await response.content.read())
|
227 |
+
image = Image.open(image_stream)
|
228 |
+
frame = URLImageRawFrame(
|
229 |
+
url=image_url,
|
230 |
+
image=image.tobytes(),
|
231 |
+
size=image.size,
|
232 |
+
format=image.format)
|
233 |
+
yield frame
|
pipecat/services/cartesia.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from cartesia import AsyncCartesia
|
8 |
+
|
9 |
+
from typing import AsyncGenerator
|
10 |
+
|
11 |
+
from pipecat.frames.frames import AudioRawFrame, Frame
|
12 |
+
from pipecat.services.ai_services import TTSService
|
13 |
+
|
14 |
+
from loguru import logger
|
15 |
+
|
16 |
+
|
17 |
+
class CartesiaTTSService(TTSService):
|
18 |
+
|
19 |
+
def __init__(
|
20 |
+
self,
|
21 |
+
*,
|
22 |
+
api_key: str,
|
23 |
+
voice_id: str,
|
24 |
+
model_id: str = "sonic-english",
|
25 |
+
encoding: str = "pcm_s16le",
|
26 |
+
sample_rate: int = 16000,
|
27 |
+
**kwargs):
|
28 |
+
super().__init__(**kwargs)
|
29 |
+
|
30 |
+
self._api_key = api_key
|
31 |
+
self._model_id = model_id
|
32 |
+
self._output_format = {
|
33 |
+
"container": "raw",
|
34 |
+
"encoding": encoding,
|
35 |
+
"sample_rate": sample_rate,
|
36 |
+
}
|
37 |
+
|
38 |
+
try:
|
39 |
+
self._client = AsyncCartesia(api_key=self._api_key)
|
40 |
+
self._voice = self._client.voices.get(id=voice_id)
|
41 |
+
except Exception as e:
|
42 |
+
logger.exception(f"{self} initialization error: {e}")
|
43 |
+
|
44 |
+
def can_generate_metrics(self) -> bool:
|
45 |
+
return True
|
46 |
+
|
47 |
+
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
48 |
+
logger.debug(f"Generating TTS: [{text}]")
|
49 |
+
|
50 |
+
try:
|
51 |
+
await self.start_ttfb_metrics()
|
52 |
+
|
53 |
+
chunk_generator = await self._client.tts.sse(
|
54 |
+
stream=True,
|
55 |
+
transcript=text,
|
56 |
+
voice_embedding=self._voice["embedding"],
|
57 |
+
model_id=self._model_id,
|
58 |
+
output_format=self._output_format,
|
59 |
+
)
|
60 |
+
|
61 |
+
async for chunk in chunk_generator:
|
62 |
+
await self.stop_ttfb_metrics()
|
63 |
+
yield AudioRawFrame(chunk["audio"], self._output_format["sample_rate"], 1)
|
64 |
+
except Exception as e:
|
65 |
+
logger.exception(f"{self} exception: {e}")
|
pipecat/services/deepgram.py
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import aiohttp
|
8 |
+
import time
|
9 |
+
|
10 |
+
from typing import AsyncGenerator
|
11 |
+
|
12 |
+
from pipecat.frames.frames import (
|
13 |
+
AudioRawFrame,
|
14 |
+
CancelFrame,
|
15 |
+
EndFrame,
|
16 |
+
ErrorFrame,
|
17 |
+
Frame,
|
18 |
+
InterimTranscriptionFrame,
|
19 |
+
StartFrame,
|
20 |
+
SystemFrame,
|
21 |
+
TranscriptionFrame)
|
22 |
+
from pipecat.processors.frame_processor import FrameDirection
|
23 |
+
from pipecat.services.ai_services import AsyncAIService, TTSService
|
24 |
+
|
25 |
+
from loguru import logger
|
26 |
+
|
27 |
+
# See .env.example for Deepgram configuration needed
|
28 |
+
try:
|
29 |
+
from deepgram import (
|
30 |
+
DeepgramClient,
|
31 |
+
DeepgramClientOptions,
|
32 |
+
LiveTranscriptionEvents,
|
33 |
+
LiveOptions,
|
34 |
+
)
|
35 |
+
except ModuleNotFoundError as e:
|
36 |
+
logger.error(f"Exception: {e}")
|
37 |
+
logger.error(
|
38 |
+
"In order to use Deepgram, you need to `pip install pipecat-ai[deepgram]`. Also, set `DEEPGRAM_API_KEY` environment variable.")
|
39 |
+
raise Exception(f"Missing module: {e}")
|
40 |
+
|
41 |
+
|
42 |
+
class DeepgramTTSService(TTSService):
|
43 |
+
|
44 |
+
def __init__(
|
45 |
+
self,
|
46 |
+
*,
|
47 |
+
aiohttp_session: aiohttp.ClientSession,
|
48 |
+
api_key: str,
|
49 |
+
voice: str = "aura-helios-en",
|
50 |
+
base_url: str = "https://api.deepgram.com/v1/speak",
|
51 |
+
**kwargs):
|
52 |
+
super().__init__(**kwargs)
|
53 |
+
|
54 |
+
self._voice = voice
|
55 |
+
self._api_key = api_key
|
56 |
+
self._aiohttp_session = aiohttp_session
|
57 |
+
self._base_url = base_url
|
58 |
+
|
59 |
+
def can_generate_metrics(self) -> bool:
|
60 |
+
return True
|
61 |
+
|
62 |
+
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
63 |
+
logger.debug(f"Generating TTS: [{text}]")
|
64 |
+
|
65 |
+
base_url = self._base_url
|
66 |
+
request_url = f"{base_url}?model={self._voice}&encoding=linear16&container=none&sample_rate=16000"
|
67 |
+
headers = {"authorization": f"token {self._api_key}"}
|
68 |
+
body = {"text": text}
|
69 |
+
|
70 |
+
try:
|
71 |
+
await self.start_ttfb_metrics()
|
72 |
+
async with self._aiohttp_session.post(request_url, headers=headers, json=body) as r:
|
73 |
+
if r.status != 200:
|
74 |
+
response_text = await r.text()
|
75 |
+
# If we get a a "Bad Request: Input is unutterable", just print out a debug log.
|
76 |
+
# All other unsuccesful requests should emit an error frame. If not specifically
|
77 |
+
# handled by the running PipelineTask, the ErrorFrame will cancel the task.
|
78 |
+
if "unutterable" in response_text:
|
79 |
+
logger.debug(f"Unutterable text: [{text}]")
|
80 |
+
return
|
81 |
+
|
82 |
+
logger.error(
|
83 |
+
f"{self} error getting audio (status: {r.status}, error: {response_text})")
|
84 |
+
yield ErrorFrame(f"Error getting audio (status: {r.status}, error: {response_text})")
|
85 |
+
return
|
86 |
+
|
87 |
+
async for data in r.content:
|
88 |
+
await self.stop_ttfb_metrics()
|
89 |
+
frame = AudioRawFrame(audio=data, sample_rate=16000, num_channels=1)
|
90 |
+
yield frame
|
91 |
+
except Exception as e:
|
92 |
+
logger.exception(f"{self} exception: {e}")
|
93 |
+
|
94 |
+
|
95 |
+
class DeepgramSTTService(AsyncAIService):
|
96 |
+
def __init__(self,
|
97 |
+
*,
|
98 |
+
api_key: str,
|
99 |
+
url: str = "",
|
100 |
+
live_options: LiveOptions = LiveOptions(
|
101 |
+
encoding="linear16",
|
102 |
+
language="en-US",
|
103 |
+
model="nova-2-conversationalai",
|
104 |
+
sample_rate=16000,
|
105 |
+
channels=1,
|
106 |
+
interim_results=True,
|
107 |
+
smart_format=True,
|
108 |
+
),
|
109 |
+
**kwargs):
|
110 |
+
super().__init__(**kwargs)
|
111 |
+
|
112 |
+
self._live_options = live_options
|
113 |
+
|
114 |
+
self._client = DeepgramClient(
|
115 |
+
api_key, config=DeepgramClientOptions(url=url, options={"keepalive": "true"}))
|
116 |
+
self._connection = self._client.listen.asynclive.v("1")
|
117 |
+
self._connection.on(LiveTranscriptionEvents.Transcript, self._on_message)
|
118 |
+
|
119 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
120 |
+
await super().process_frame(frame, direction)
|
121 |
+
|
122 |
+
if isinstance(frame, SystemFrame):
|
123 |
+
await self.push_frame(frame, direction)
|
124 |
+
elif isinstance(frame, AudioRawFrame):
|
125 |
+
await self._connection.send(frame.audio)
|
126 |
+
else:
|
127 |
+
await self.queue_frame(frame, direction)
|
128 |
+
|
129 |
+
async def start(self, frame: StartFrame):
|
130 |
+
if await self._connection.start(self._live_options):
|
131 |
+
logger.debug(f"{self}: Connected to Deepgram")
|
132 |
+
else:
|
133 |
+
logger.error(f"{self}: Unable to connect to Deepgram")
|
134 |
+
|
135 |
+
async def stop(self, frame: EndFrame):
|
136 |
+
await self._connection.finish()
|
137 |
+
|
138 |
+
async def cancel(self, frame: CancelFrame):
|
139 |
+
await self._connection.finish()
|
140 |
+
|
141 |
+
async def _on_message(self, *args, **kwargs):
|
142 |
+
result = kwargs["result"]
|
143 |
+
is_final = result.is_final
|
144 |
+
transcript = result.channel.alternatives[0].transcript
|
145 |
+
if len(transcript) > 0:
|
146 |
+
if is_final:
|
147 |
+
await self.queue_frame(TranscriptionFrame(transcript, "", int(time.time_ns() / 1000000)))
|
148 |
+
else:
|
149 |
+
await self.queue_frame(InterimTranscriptionFrame(transcript, "", int(time.time_ns() / 1000000)))
|
pipecat/services/elevenlabs.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import aiohttp
|
8 |
+
|
9 |
+
from typing import AsyncGenerator
|
10 |
+
|
11 |
+
from pipecat.frames.frames import AudioRawFrame, ErrorFrame, Frame
|
12 |
+
from pipecat.services.ai_services import TTSService
|
13 |
+
|
14 |
+
from loguru import logger
|
15 |
+
|
16 |
+
|
17 |
+
class ElevenLabsTTSService(TTSService):
|
18 |
+
|
19 |
+
def __init__(
|
20 |
+
self,
|
21 |
+
*,
|
22 |
+
aiohttp_session: aiohttp.ClientSession,
|
23 |
+
api_key: str,
|
24 |
+
voice_id: str,
|
25 |
+
model: str = "eleven_turbo_v2",
|
26 |
+
**kwargs):
|
27 |
+
super().__init__(**kwargs)
|
28 |
+
|
29 |
+
self._api_key = api_key
|
30 |
+
self._voice_id = voice_id
|
31 |
+
self._aiohttp_session = aiohttp_session
|
32 |
+
self._model = model
|
33 |
+
|
34 |
+
def can_generate_metrics(self) -> bool:
|
35 |
+
return True
|
36 |
+
|
37 |
+
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
38 |
+
logger.debug(f"Generating TTS: [{text}]")
|
39 |
+
|
40 |
+
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self._voice_id}/stream"
|
41 |
+
|
42 |
+
payload = {"text": text, "model_id": self._model}
|
43 |
+
|
44 |
+
querystring = {
|
45 |
+
"output_format": "pcm_16000",
|
46 |
+
"optimize_streaming_latency": 2}
|
47 |
+
|
48 |
+
headers = {
|
49 |
+
"xi-api-key": self._api_key,
|
50 |
+
"Content-Type": "application/json",
|
51 |
+
}
|
52 |
+
|
53 |
+
await self.start_ttfb_metrics()
|
54 |
+
|
55 |
+
async with self._aiohttp_session.post(url, json=payload, headers=headers, params=querystring) as r:
|
56 |
+
if r.status != 200:
|
57 |
+
text = await r.text()
|
58 |
+
logger.error(f"{self} error getting audio (status: {r.status}, error: {text})")
|
59 |
+
yield ErrorFrame(f"Error getting audio (status: {r.status}, error: {text})")
|
60 |
+
return
|
61 |
+
|
62 |
+
async for chunk in r.content:
|
63 |
+
if len(chunk) > 0:
|
64 |
+
await self.stop_ttfb_metrics()
|
65 |
+
frame = AudioRawFrame(chunk, 16000, 1)
|
66 |
+
yield frame
|
pipecat/services/fal.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import aiohttp
|
8 |
+
import io
|
9 |
+
import os
|
10 |
+
|
11 |
+
from PIL import Image
|
12 |
+
from pydantic import BaseModel
|
13 |
+
from typing import AsyncGenerator, Optional, Union, Dict
|
14 |
+
|
15 |
+
from pipecat.frames.frames import ErrorFrame, Frame, URLImageRawFrame
|
16 |
+
from pipecat.services.ai_services import ImageGenService
|
17 |
+
|
18 |
+
from loguru import logger
|
19 |
+
|
20 |
+
try:
|
21 |
+
import fal_client
|
22 |
+
except ModuleNotFoundError as e:
|
23 |
+
logger.error(f"Exception: {e}")
|
24 |
+
logger.error(
|
25 |
+
"In order to use Fal, you need to `pip install pipecat-ai[fal]`. Also, set `FAL_KEY` environment variable.")
|
26 |
+
raise Exception(f"Missing module: {e}")
|
27 |
+
|
28 |
+
|
29 |
+
class FalImageGenService(ImageGenService):
|
30 |
+
class InputParams(BaseModel):
|
31 |
+
seed: Optional[int] = None
|
32 |
+
num_inference_steps: int = 8
|
33 |
+
num_images: int = 1
|
34 |
+
image_size: Union[str, Dict[str, int]] = "square_hd"
|
35 |
+
expand_prompt: bool = False
|
36 |
+
enable_safety_checker: bool = True
|
37 |
+
format: str = "png"
|
38 |
+
|
39 |
+
def __init__(
|
40 |
+
self,
|
41 |
+
*,
|
42 |
+
aiohttp_session: aiohttp.ClientSession,
|
43 |
+
params: InputParams,
|
44 |
+
model: str = "fal-ai/fast-sdxl",
|
45 |
+
key: str | None = None,
|
46 |
+
):
|
47 |
+
super().__init__()
|
48 |
+
self._model = model
|
49 |
+
self._params = params
|
50 |
+
self._aiohttp_session = aiohttp_session
|
51 |
+
if key:
|
52 |
+
os.environ["FAL_KEY"] = key
|
53 |
+
|
54 |
+
async def run_image_gen(self, prompt: str) -> AsyncGenerator[Frame, None]:
|
55 |
+
logger.debug(f"Generating image from prompt: {prompt}")
|
56 |
+
|
57 |
+
response = await fal_client.run_async(
|
58 |
+
self._model,
|
59 |
+
arguments={"prompt": prompt, **self._params.model_dump()}
|
60 |
+
)
|
61 |
+
|
62 |
+
image_url = response["images"][0]["url"] if response else None
|
63 |
+
|
64 |
+
if not image_url:
|
65 |
+
logger.error(f"{self} error: image generation failed")
|
66 |
+
yield ErrorFrame("Image generation failed")
|
67 |
+
return
|
68 |
+
|
69 |
+
logger.debug(f"Image generated at: {image_url}")
|
70 |
+
|
71 |
+
# Load the image from the url
|
72 |
+
logger.debug(f"Downloading image {image_url} ...")
|
73 |
+
async with self._aiohttp_session.get(image_url) as response:
|
74 |
+
logger.debug(f"Downloaded image {image_url}")
|
75 |
+
image_stream = io.BytesIO(await response.content.read())
|
76 |
+
image = Image.open(image_stream)
|
77 |
+
|
78 |
+
frame = URLImageRawFrame(
|
79 |
+
url=image_url,
|
80 |
+
image=image.tobytes(),
|
81 |
+
size=image.size,
|
82 |
+
format=image.format)
|
83 |
+
yield frame
|
pipecat/services/fireworks.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from pipecat.services.openai import BaseOpenAILLMService
|
8 |
+
|
9 |
+
from loguru import logger
|
10 |
+
|
11 |
+
try:
|
12 |
+
from openai import AsyncOpenAI
|
13 |
+
except ModuleNotFoundError as e:
|
14 |
+
logger.error(f"Exception: {e}")
|
15 |
+
logger.error(
|
16 |
+
"In order to use Fireworks, you need to `pip install pipecat-ai[fireworks]`. Also, set the `FIREWORKS_API_KEY` environment variable.")
|
17 |
+
raise Exception(f"Missing module: {e}")
|
18 |
+
|
19 |
+
|
20 |
+
class FireworksLLMService(BaseOpenAILLMService):
|
21 |
+
def __init__(self,
|
22 |
+
*,
|
23 |
+
model: str = "accounts/fireworks/models/firefunction-v1",
|
24 |
+
base_url: str = "https://api.fireworks.ai/inference/v1"):
|
25 |
+
super().__init__(model, base_url)
|
pipecat/services/google.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import asyncio
|
8 |
+
|
9 |
+
from typing import List
|
10 |
+
|
11 |
+
from pipecat.frames.frames import (
|
12 |
+
Frame,
|
13 |
+
TextFrame,
|
14 |
+
VisionImageRawFrame,
|
15 |
+
LLMMessagesFrame,
|
16 |
+
LLMFullResponseStartFrame,
|
17 |
+
LLMResponseStartFrame,
|
18 |
+
LLMResponseEndFrame,
|
19 |
+
LLMFullResponseEndFrame
|
20 |
+
)
|
21 |
+
from pipecat.processors.frame_processor import FrameDirection
|
22 |
+
from pipecat.services.ai_services import LLMService
|
23 |
+
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext, OpenAILLMContextFrame
|
24 |
+
|
25 |
+
from loguru import logger
|
26 |
+
|
27 |
+
try:
|
28 |
+
import google.generativeai as gai
|
29 |
+
import google.ai.generativelanguage as glm
|
30 |
+
except ModuleNotFoundError as e:
|
31 |
+
logger.error(f"Exception: {e}")
|
32 |
+
logger.error(
|
33 |
+
"In order to use Google AI, you need to `pip install pipecat-ai[google]`. Also, set `GOOGLE_API_KEY` environment variable.")
|
34 |
+
raise Exception(f"Missing module: {e}")
|
35 |
+
|
36 |
+
|
37 |
+
class GoogleLLMService(LLMService):
|
38 |
+
"""This class implements inference with Google's AI models
|
39 |
+
|
40 |
+
This service translates internally from OpenAILLMContext to the messages format
|
41 |
+
expected by the Google AI model. We are using the OpenAILLMContext as a lingua
|
42 |
+
franca for all LLM services, so that it is easy to switch between different LLMs.
|
43 |
+
"""
|
44 |
+
|
45 |
+
def __init__(self, *, api_key: str, model: str = "gemini-1.5-flash-latest", **kwargs):
|
46 |
+
super().__init__(**kwargs)
|
47 |
+
gai.configure(api_key=api_key)
|
48 |
+
self._client = gai.GenerativeModel(model)
|
49 |
+
|
50 |
+
def can_generate_metrics(self) -> bool:
|
51 |
+
return True
|
52 |
+
|
53 |
+
def _get_messages_from_openai_context(
|
54 |
+
self, context: OpenAILLMContext) -> List[glm.Content]:
|
55 |
+
openai_messages = context.get_messages()
|
56 |
+
google_messages = []
|
57 |
+
|
58 |
+
for message in openai_messages:
|
59 |
+
role = message["role"]
|
60 |
+
content = message["content"]
|
61 |
+
if role == "system":
|
62 |
+
role = "user"
|
63 |
+
elif role == "assistant":
|
64 |
+
role = "model"
|
65 |
+
|
66 |
+
parts = [glm.Part(text=content)]
|
67 |
+
if "mime_type" in message:
|
68 |
+
parts.append(
|
69 |
+
glm.Part(inline_data=glm.Blob(
|
70 |
+
mime_type=message["mime_type"],
|
71 |
+
data=message["data"].getvalue()
|
72 |
+
)))
|
73 |
+
google_messages.append({"role": role, "parts": parts})
|
74 |
+
|
75 |
+
return google_messages
|
76 |
+
|
77 |
+
async def _async_generator_wrapper(self, sync_generator):
|
78 |
+
for item in sync_generator:
|
79 |
+
yield item
|
80 |
+
await asyncio.sleep(0)
|
81 |
+
|
82 |
+
async def _process_context(self, context: OpenAILLMContext):
|
83 |
+
await self.push_frame(LLMFullResponseStartFrame())
|
84 |
+
try:
|
85 |
+
logger.debug(f"Generating chat: {context.get_messages_json()}")
|
86 |
+
|
87 |
+
messages = self._get_messages_from_openai_context(context)
|
88 |
+
|
89 |
+
await self.start_ttfb_metrics()
|
90 |
+
|
91 |
+
response = self._client.generate_content(messages, stream=True)
|
92 |
+
|
93 |
+
await self.stop_ttfb_metrics()
|
94 |
+
|
95 |
+
async for chunk in self._async_generator_wrapper(response):
|
96 |
+
try:
|
97 |
+
text = chunk.text
|
98 |
+
await self.push_frame(LLMResponseStartFrame())
|
99 |
+
await self.push_frame(TextFrame(text))
|
100 |
+
await self.push_frame(LLMResponseEndFrame())
|
101 |
+
except Exception as e:
|
102 |
+
# Google LLMs seem to flag safety issues a lot!
|
103 |
+
if chunk.candidates[0].finish_reason == 3:
|
104 |
+
logger.debug(
|
105 |
+
f"LLM refused to generate content for safety reasons - {messages}.")
|
106 |
+
else:
|
107 |
+
logger.exception(f"{self} error: {e}")
|
108 |
+
|
109 |
+
except Exception as e:
|
110 |
+
logger.exception(f"{self} exception: {e}")
|
111 |
+
finally:
|
112 |
+
await self.push_frame(LLMFullResponseEndFrame())
|
113 |
+
|
114 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
115 |
+
await super().process_frame(frame, direction)
|
116 |
+
|
117 |
+
context = None
|
118 |
+
|
119 |
+
if isinstance(frame, OpenAILLMContextFrame):
|
120 |
+
context: OpenAILLMContext = frame.context
|
121 |
+
elif isinstance(frame, LLMMessagesFrame):
|
122 |
+
context = OpenAILLMContext.from_messages(frame.messages)
|
123 |
+
elif isinstance(frame, VisionImageRawFrame):
|
124 |
+
context = OpenAILLMContext.from_image_frame(frame)
|
125 |
+
else:
|
126 |
+
await self.push_frame(frame, direction)
|
127 |
+
|
128 |
+
if context:
|
129 |
+
await self._process_context(context)
|
pipecat/services/moondream.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import asyncio
|
8 |
+
|
9 |
+
from PIL import Image
|
10 |
+
|
11 |
+
from typing import AsyncGenerator
|
12 |
+
|
13 |
+
from pipecat.frames.frames import ErrorFrame, Frame, TextFrame, VisionImageRawFrame
|
14 |
+
from pipecat.services.ai_services import VisionService
|
15 |
+
|
16 |
+
from loguru import logger
|
17 |
+
|
18 |
+
try:
|
19 |
+
import torch
|
20 |
+
|
21 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
22 |
+
except ModuleNotFoundError as e:
|
23 |
+
logger.error(f"Exception: {e}")
|
24 |
+
logger.error("In order to use Moondream, you need to `pip install pipecat-ai[moondream]`.")
|
25 |
+
raise Exception(f"Missing module(s): {e}")
|
26 |
+
|
27 |
+
|
28 |
+
def detect_device():
|
29 |
+
"""
|
30 |
+
Detects the appropriate device to run on, and return the device and dtype.
|
31 |
+
"""
|
32 |
+
try:
|
33 |
+
import intel_extension_for_pytorch
|
34 |
+
if torch.xpu.is_available():
|
35 |
+
return torch.device("xpu"), torch.float32
|
36 |
+
except ImportError:
|
37 |
+
pass
|
38 |
+
if torch.cuda.is_available():
|
39 |
+
return torch.device("cuda"), torch.float16
|
40 |
+
elif torch.backends.mps.is_available():
|
41 |
+
return torch.device("mps"), torch.float16
|
42 |
+
else:
|
43 |
+
return torch.device("cpu"), torch.float32
|
44 |
+
|
45 |
+
|
46 |
+
class MoondreamService(VisionService):
|
47 |
+
def __init__(
|
48 |
+
self,
|
49 |
+
*,
|
50 |
+
model="vikhyatk/moondream2",
|
51 |
+
revision="2024-04-02",
|
52 |
+
use_cpu=False
|
53 |
+
):
|
54 |
+
super().__init__()
|
55 |
+
|
56 |
+
if not use_cpu:
|
57 |
+
device, dtype = detect_device()
|
58 |
+
else:
|
59 |
+
device = torch.device("cpu")
|
60 |
+
dtype = torch.float32
|
61 |
+
|
62 |
+
self._tokenizer = AutoTokenizer.from_pretrained(model, revision=revision)
|
63 |
+
|
64 |
+
logger.debug("Loading Moondream model...")
|
65 |
+
|
66 |
+
self._model = AutoModelForCausalLM.from_pretrained(
|
67 |
+
model, trust_remote_code=True, revision=revision
|
68 |
+
).to(device=device, dtype=dtype)
|
69 |
+
self._model.eval()
|
70 |
+
|
71 |
+
logger.debug("Loaded Moondream model")
|
72 |
+
|
73 |
+
async def run_vision(self, frame: VisionImageRawFrame) -> AsyncGenerator[Frame, None]:
|
74 |
+
if not self._model:
|
75 |
+
logger.error(f"{self} error: Moondream model not available")
|
76 |
+
yield ErrorFrame("Moondream model not available")
|
77 |
+
return
|
78 |
+
|
79 |
+
logger.debug(f"Analyzing image: {frame}")
|
80 |
+
|
81 |
+
def get_image_description(frame: VisionImageRawFrame):
|
82 |
+
image = Image.frombytes(frame.format, frame.size, frame.image)
|
83 |
+
image_embeds = self._model.encode_image(image)
|
84 |
+
description = self._model.answer_question(
|
85 |
+
image_embeds=image_embeds,
|
86 |
+
question=frame.text,
|
87 |
+
tokenizer=self._tokenizer)
|
88 |
+
return description
|
89 |
+
|
90 |
+
description = await asyncio.to_thread(get_image_description, frame)
|
91 |
+
|
92 |
+
yield TextFrame(text=description)
|
pipecat/services/ollama.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from pipecat.services.openai import BaseOpenAILLMService
|
8 |
+
|
9 |
+
|
10 |
+
class OLLamaLLMService(BaseOpenAILLMService):
|
11 |
+
|
12 |
+
def __init__(self, *, model: str = "llama2", base_url: str = "http://localhost:11434/v1"):
|
13 |
+
super().__init__(model=model, base_url=base_url, api_key="ollama")
|
pipecat/services/openai.py
ADDED
@@ -0,0 +1,338 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import aiohttp
|
8 |
+
import base64
|
9 |
+
import io
|
10 |
+
import json
|
11 |
+
|
12 |
+
from typing import AsyncGenerator, List, Literal
|
13 |
+
|
14 |
+
from loguru import logger
|
15 |
+
from PIL import Image
|
16 |
+
|
17 |
+
from pipecat.frames.frames import (
|
18 |
+
AudioRawFrame,
|
19 |
+
ErrorFrame,
|
20 |
+
Frame,
|
21 |
+
LLMFullResponseEndFrame,
|
22 |
+
LLMFullResponseStartFrame,
|
23 |
+
LLMMessagesFrame,
|
24 |
+
LLMResponseEndFrame,
|
25 |
+
LLMResponseStartFrame,
|
26 |
+
TextFrame,
|
27 |
+
URLImageRawFrame,
|
28 |
+
VisionImageRawFrame
|
29 |
+
)
|
30 |
+
from pipecat.processors.aggregators.openai_llm_context import (
|
31 |
+
OpenAILLMContext,
|
32 |
+
OpenAILLMContextFrame
|
33 |
+
)
|
34 |
+
from pipecat.processors.frame_processor import FrameDirection
|
35 |
+
from pipecat.services.ai_services import (
|
36 |
+
ImageGenService,
|
37 |
+
LLMService,
|
38 |
+
TTSService
|
39 |
+
)
|
40 |
+
|
41 |
+
try:
|
42 |
+
from openai import AsyncOpenAI, AsyncStream, BadRequestError
|
43 |
+
from openai.types.chat import (
|
44 |
+
ChatCompletionChunk,
|
45 |
+
ChatCompletionFunctionMessageParam,
|
46 |
+
ChatCompletionMessageParam,
|
47 |
+
ChatCompletionToolParam
|
48 |
+
)
|
49 |
+
except ModuleNotFoundError as e:
|
50 |
+
logger.error(f"Exception: {e}")
|
51 |
+
logger.error(
|
52 |
+
"In order to use OpenAI, you need to `pip install pipecat-ai[openai]`. Also, set `OPENAI_API_KEY` environment variable.")
|
53 |
+
raise Exception(f"Missing module: {e}")
|
54 |
+
|
55 |
+
|
56 |
+
class OpenAIUnhandledFunctionException(Exception):
|
57 |
+
pass
|
58 |
+
|
59 |
+
|
60 |
+
class BaseOpenAILLMService(LLMService):
|
61 |
+
"""This is the base for all services that use the AsyncOpenAI client.
|
62 |
+
|
63 |
+
This service consumes OpenAILLMContextFrame frames, which contain a reference
|
64 |
+
to an OpenAILLMContext frame. The OpenAILLMContext object defines the context
|
65 |
+
sent to the LLM for a completion. This includes user, assistant and system messages
|
66 |
+
as well as tool choices and the tool, which is used if requesting function
|
67 |
+
calls from the LLM.
|
68 |
+
"""
|
69 |
+
|
70 |
+
def __init__(self, *, model: str, api_key=None, base_url=None, **kwargs):
|
71 |
+
super().__init__(**kwargs)
|
72 |
+
self._model: str = model
|
73 |
+
self._client = self.create_client(api_key=api_key, base_url=base_url, **kwargs)
|
74 |
+
|
75 |
+
def create_client(self, api_key=None, base_url=None, **kwargs):
|
76 |
+
return AsyncOpenAI(api_key=api_key, base_url=base_url)
|
77 |
+
|
78 |
+
def can_generate_metrics(self) -> bool:
|
79 |
+
return True
|
80 |
+
|
81 |
+
async def get_chat_completions(
|
82 |
+
self,
|
83 |
+
context: OpenAILLMContext,
|
84 |
+
messages: List[ChatCompletionMessageParam]) -> AsyncStream[ChatCompletionChunk]:
|
85 |
+
chunks = await self._client.chat.completions.create(
|
86 |
+
model=self._model,
|
87 |
+
stream=True,
|
88 |
+
messages=messages,
|
89 |
+
tools=context.tools,
|
90 |
+
tool_choice=context.tool_choice,
|
91 |
+
)
|
92 |
+
return chunks
|
93 |
+
|
94 |
+
async def _stream_chat_completions(
|
95 |
+
self, context: OpenAILLMContext) -> AsyncStream[ChatCompletionChunk]:
|
96 |
+
logger.debug(f"Generating chat: {context.get_messages_json()}")
|
97 |
+
|
98 |
+
messages: List[ChatCompletionMessageParam] = context.get_messages()
|
99 |
+
|
100 |
+
# base64 encode any images
|
101 |
+
for message in messages:
|
102 |
+
if message.get("mime_type") == "image/jpeg":
|
103 |
+
encoded_image = base64.b64encode(message["data"].getvalue()).decode("utf-8")
|
104 |
+
text = message["content"]
|
105 |
+
message["content"] = [
|
106 |
+
{"type": "text", "text": text},
|
107 |
+
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}}
|
108 |
+
]
|
109 |
+
del message["data"]
|
110 |
+
del message["mime_type"]
|
111 |
+
|
112 |
+
chunks = await self.get_chat_completions(context, messages)
|
113 |
+
|
114 |
+
return chunks
|
115 |
+
|
116 |
+
async def _process_context(self, context: OpenAILLMContext):
|
117 |
+
function_name = ""
|
118 |
+
arguments = ""
|
119 |
+
tool_call_id = ""
|
120 |
+
|
121 |
+
await self.start_ttfb_metrics()
|
122 |
+
|
123 |
+
chunk_stream: AsyncStream[ChatCompletionChunk] = (
|
124 |
+
await self._stream_chat_completions(context)
|
125 |
+
)
|
126 |
+
|
127 |
+
async for chunk in chunk_stream:
|
128 |
+
if len(chunk.choices) == 0:
|
129 |
+
continue
|
130 |
+
|
131 |
+
await self.stop_ttfb_metrics()
|
132 |
+
|
133 |
+
if chunk.choices[0].delta.tool_calls:
|
134 |
+
# We're streaming the LLM response to enable the fastest response times.
|
135 |
+
# For text, we just yield each chunk as we receive it and count on consumers
|
136 |
+
# to do whatever coalescing they need (eg. to pass full sentences to TTS)
|
137 |
+
#
|
138 |
+
# If the LLM is a function call, we'll do some coalescing here.
|
139 |
+
# If the response contains a function name, we'll yield a frame to tell consumers
|
140 |
+
# that they can start preparing to call the function with that name.
|
141 |
+
# We accumulate all the arguments for the rest of the streamed response, then when
|
142 |
+
# the response is done, we package up all the arguments and the function name and
|
143 |
+
# yield a frame containing the function name and the arguments.
|
144 |
+
|
145 |
+
tool_call = chunk.choices[0].delta.tool_calls[0]
|
146 |
+
if tool_call.function and tool_call.function.name:
|
147 |
+
function_name += tool_call.function.name
|
148 |
+
tool_call_id = tool_call.id
|
149 |
+
await self.call_start_function(function_name)
|
150 |
+
if tool_call.function and tool_call.function.arguments:
|
151 |
+
# Keep iterating through the response to collect all the argument fragments
|
152 |
+
arguments += tool_call.function.arguments
|
153 |
+
elif chunk.choices[0].delta.content:
|
154 |
+
await self.push_frame(LLMResponseStartFrame())
|
155 |
+
await self.push_frame(TextFrame(chunk.choices[0].delta.content))
|
156 |
+
await self.push_frame(LLMResponseEndFrame())
|
157 |
+
|
158 |
+
# if we got a function name and arguments, check to see if it's a function with
|
159 |
+
# a registered handler. If so, run the registered callback, save the result to
|
160 |
+
# the context, and re-prompt to get a chat answer. If we don't have a registered
|
161 |
+
# handler, raise an exception.
|
162 |
+
if function_name and arguments:
|
163 |
+
if self.has_function(function_name):
|
164 |
+
await self._handle_function_call(context, tool_call_id, function_name, arguments)
|
165 |
+
else:
|
166 |
+
raise OpenAIUnhandledFunctionException(
|
167 |
+
f"The LLM tried to call a function named '{function_name}', but there isn't a callback registered for that function.")
|
168 |
+
|
169 |
+
async def _handle_function_call(
|
170 |
+
self,
|
171 |
+
context,
|
172 |
+
tool_call_id,
|
173 |
+
function_name,
|
174 |
+
arguments
|
175 |
+
):
|
176 |
+
arguments = json.loads(arguments)
|
177 |
+
result = await self.call_function(function_name, arguments)
|
178 |
+
arguments = json.dumps(arguments)
|
179 |
+
if isinstance(result, (str, dict)):
|
180 |
+
# Handle it in "full magic mode"
|
181 |
+
tool_call = ChatCompletionFunctionMessageParam({
|
182 |
+
"role": "assistant",
|
183 |
+
"tool_calls": [
|
184 |
+
{
|
185 |
+
"id": tool_call_id,
|
186 |
+
"function": {
|
187 |
+
"arguments": arguments,
|
188 |
+
"name": function_name
|
189 |
+
},
|
190 |
+
"type": "function"
|
191 |
+
}
|
192 |
+
]
|
193 |
+
|
194 |
+
})
|
195 |
+
context.add_message(tool_call)
|
196 |
+
if isinstance(result, dict):
|
197 |
+
result = json.dumps(result)
|
198 |
+
tool_result = ChatCompletionToolParam({
|
199 |
+
"tool_call_id": tool_call_id,
|
200 |
+
"role": "tool",
|
201 |
+
"content": result
|
202 |
+
})
|
203 |
+
context.add_message(tool_result)
|
204 |
+
# re-prompt to get a human answer
|
205 |
+
await self._process_context(context)
|
206 |
+
elif isinstance(result, list):
|
207 |
+
# reduced magic
|
208 |
+
for msg in result:
|
209 |
+
context.add_message(msg)
|
210 |
+
await self._process_context(context)
|
211 |
+
elif isinstance(result, type(None)):
|
212 |
+
pass
|
213 |
+
else:
|
214 |
+
raise TypeError(f"Unknown return type from function callback: {type(result)}")
|
215 |
+
|
216 |
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
217 |
+
await super().process_frame(frame, direction)
|
218 |
+
|
219 |
+
context = None
|
220 |
+
if isinstance(frame, OpenAILLMContextFrame):
|
221 |
+
context: OpenAILLMContext = frame.context
|
222 |
+
elif isinstance(frame, LLMMessagesFrame):
|
223 |
+
context = OpenAILLMContext.from_messages(frame.messages)
|
224 |
+
elif isinstance(frame, VisionImageRawFrame):
|
225 |
+
context = OpenAILLMContext.from_image_frame(frame)
|
226 |
+
else:
|
227 |
+
await self.push_frame(frame, direction)
|
228 |
+
|
229 |
+
if context:
|
230 |
+
await self.push_frame(LLMFullResponseStartFrame())
|
231 |
+
await self.start_processing_metrics()
|
232 |
+
await self._process_context(context)
|
233 |
+
await self.stop_processing_metrics()
|
234 |
+
await self.push_frame(LLMFullResponseEndFrame())
|
235 |
+
|
236 |
+
|
237 |
+
class OpenAILLMService(BaseOpenAILLMService):
|
238 |
+
|
239 |
+
def __init__(self, *, model: str = "gpt-4o", **kwargs):
|
240 |
+
super().__init__(model=model, **kwargs)
|
241 |
+
|
242 |
+
|
243 |
+
class OpenAIImageGenService(ImageGenService):
|
244 |
+
|
245 |
+
def __init__(
|
246 |
+
self,
|
247 |
+
*,
|
248 |
+
image_size: Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"],
|
249 |
+
aiohttp_session: aiohttp.ClientSession,
|
250 |
+
api_key: str,
|
251 |
+
model: str = "dall-e-3",
|
252 |
+
):
|
253 |
+
super().__init__()
|
254 |
+
self._model = model
|
255 |
+
self._image_size = image_size
|
256 |
+
self._client = AsyncOpenAI(api_key=api_key)
|
257 |
+
self._aiohttp_session = aiohttp_session
|
258 |
+
|
259 |
+
async def run_image_gen(self, prompt: str) -> AsyncGenerator[Frame, None]:
|
260 |
+
logger.debug(f"Generating image from prompt: {prompt}")
|
261 |
+
|
262 |
+
image = await self._client.images.generate(
|
263 |
+
prompt=prompt,
|
264 |
+
model=self._model,
|
265 |
+
n=1,
|
266 |
+
size=self._image_size
|
267 |
+
)
|
268 |
+
|
269 |
+
image_url = image.data[0].url
|
270 |
+
|
271 |
+
if not image_url:
|
272 |
+
logger.error(f"{self} No image provided in response: {image}")
|
273 |
+
yield ErrorFrame("Image generation failed")
|
274 |
+
return
|
275 |
+
|
276 |
+
# Load the image from the url
|
277 |
+
async with self._aiohttp_session.get(image_url) as response:
|
278 |
+
image_stream = io.BytesIO(await response.content.read())
|
279 |
+
image = Image.open(image_stream)
|
280 |
+
frame = URLImageRawFrame(image_url, image.tobytes(), image.size, image.format)
|
281 |
+
yield frame
|
282 |
+
|
283 |
+
|
284 |
+
class OpenAITTSService(TTSService):
|
285 |
+
"""This service uses the OpenAI TTS API to generate audio from text.
|
286 |
+
The returned audio is PCM encoded at 24kHz. When using the DailyTransport, set the sample rate in the DailyParams accordingly:
|
287 |
+
```
|
288 |
+
DailyParams(
|
289 |
+
audio_out_enabled=True,
|
290 |
+
audio_out_sample_rate=24_000,
|
291 |
+
)
|
292 |
+
```
|
293 |
+
"""
|
294 |
+
|
295 |
+
def __init__(
|
296 |
+
self,
|
297 |
+
*,
|
298 |
+
api_key: str | None = None,
|
299 |
+
base_url: str | None = None,
|
300 |
+
sample_rate: int = 24_000,
|
301 |
+
voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"] = "alloy",
|
302 |
+
model: Literal["tts-1", "tts-1-hd"] = "tts-1",
|
303 |
+
**kwargs):
|
304 |
+
super().__init__(**kwargs)
|
305 |
+
|
306 |
+
self._voice = voice
|
307 |
+
self._model = model
|
308 |
+
self.sample_rate=sample_rate
|
309 |
+
self._client = AsyncOpenAI(api_key=api_key,base_url=base_url)
|
310 |
+
|
311 |
+
def can_generate_metrics(self) -> bool:
|
312 |
+
return True
|
313 |
+
|
314 |
+
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
315 |
+
logger.debug(f"Generating TTS: [{text}]")
|
316 |
+
|
317 |
+
try:
|
318 |
+
await self.start_ttfb_metrics()
|
319 |
+
|
320 |
+
async with self._client.audio.speech.with_streaming_response.create(
|
321 |
+
input=text,
|
322 |
+
model=self._model,
|
323 |
+
voice=self._voice,
|
324 |
+
response_format="pcm",
|
325 |
+
) as r:
|
326 |
+
if r.status_code != 200:
|
327 |
+
error = await r.text()
|
328 |
+
logger.error(
|
329 |
+
f"{self} error getting audio (status: {r.status_code}, error: {error})")
|
330 |
+
yield ErrorFrame(f"Error getting audio (status: {r.status_code}, error: {error})")
|
331 |
+
return
|
332 |
+
async for chunk in r.iter_bytes(8192):
|
333 |
+
if len(chunk) > 0:
|
334 |
+
await self.stop_ttfb_metrics()
|
335 |
+
frame = AudioRawFrame(chunk, self.sample_rate, 1)
|
336 |
+
yield frame
|
337 |
+
except BadRequestError as e:
|
338 |
+
logger.exception(f"{self} error generating TTS: {e}")
|
pipecat/services/openpipe.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
from typing import Dict, List
|
8 |
+
|
9 |
+
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
10 |
+
from pipecat.services.openai import BaseOpenAILLMService
|
11 |
+
|
12 |
+
from loguru import logger
|
13 |
+
|
14 |
+
try:
|
15 |
+
from openpipe import AsyncOpenAI as OpenPipeAI, AsyncStream
|
16 |
+
from openai.types.chat import (ChatCompletionMessageParam, ChatCompletionChunk)
|
17 |
+
except ModuleNotFoundError as e:
|
18 |
+
logger.error(f"Exception: {e}")
|
19 |
+
logger.error(
|
20 |
+
"In order to use OpenPipe, you need to `pip install pipecat-ai[openpipe]`. Also, set `OPENPIPE_API_KEY` and `OPENAI_API_KEY` environment variables.")
|
21 |
+
raise Exception(f"Missing module: {e}")
|
22 |
+
|
23 |
+
|
24 |
+
class OpenPipeLLMService(BaseOpenAILLMService):
|
25 |
+
|
26 |
+
def __init__(
|
27 |
+
self,
|
28 |
+
*,
|
29 |
+
model: str = "gpt-4o",
|
30 |
+
api_key: str | None = None,
|
31 |
+
base_url: str | None = None,
|
32 |
+
openpipe_api_key: str | None = None,
|
33 |
+
openpipe_base_url: str = "https://app.openpipe.ai/api/v1",
|
34 |
+
tags: Dict[str, str] | None = None,
|
35 |
+
**kwargs):
|
36 |
+
super().__init__(
|
37 |
+
model=model,
|
38 |
+
api_key=api_key,
|
39 |
+
base_url=base_url,
|
40 |
+
openpipe_api_key=openpipe_api_key,
|
41 |
+
openpipe_base_url=openpipe_base_url,
|
42 |
+
**kwargs)
|
43 |
+
self._tags = tags
|
44 |
+
|
45 |
+
def create_client(self, api_key=None, base_url=None, **kwargs):
|
46 |
+
openpipe_api_key = kwargs.get("openpipe_api_key") or ""
|
47 |
+
openpipe_base_url = kwargs.get("openpipe_base_url") or ""
|
48 |
+
client = OpenPipeAI(
|
49 |
+
api_key=api_key,
|
50 |
+
base_url=base_url,
|
51 |
+
openpipe={
|
52 |
+
"api_key": openpipe_api_key,
|
53 |
+
"base_url": openpipe_base_url
|
54 |
+
}
|
55 |
+
)
|
56 |
+
return client
|
57 |
+
|
58 |
+
async def get_chat_completions(
|
59 |
+
self,
|
60 |
+
context: OpenAILLMContext,
|
61 |
+
messages: List[ChatCompletionMessageParam]) -> AsyncStream[ChatCompletionChunk]:
|
62 |
+
chunks = await self._client.chat.completions.create(
|
63 |
+
model=self._model,
|
64 |
+
stream=True,
|
65 |
+
messages=messages,
|
66 |
+
openpipe={
|
67 |
+
"tags": self._tags,
|
68 |
+
"log_request": True
|
69 |
+
}
|
70 |
+
)
|
71 |
+
return chunks
|
pipecat/services/playht.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (c) 2024, Daily
|
3 |
+
#
|
4 |
+
# SPDX-License-Identifier: BSD 2-Clause License
|
5 |
+
#
|
6 |
+
|
7 |
+
import io
|
8 |
+
import struct
|
9 |
+
|
10 |
+
from typing import AsyncGenerator
|
11 |
+
|
12 |
+
from pipecat.frames.frames import AudioRawFrame, Frame
|
13 |
+
from pipecat.services.ai_services import TTSService
|
14 |
+
|
15 |
+
from loguru import logger
|
16 |
+
|
17 |
+
try:
|
18 |
+
from pyht.client import TTSOptions
|
19 |
+
from pyht.async_client import AsyncClient
|
20 |
+
from pyht.protos.api_pb2 import Format
|
21 |
+
except ModuleNotFoundError as e:
|
22 |
+
logger.error(f"Exception: {e}")
|
23 |
+
logger.error(
|
24 |
+
"In order to use PlayHT, you need to `pip install pipecat-ai[playht]`. Also, set `PLAY_HT_USER_ID` and `PLAY_HT_API_KEY` environment variables.")
|
25 |
+
raise Exception(f"Missing module: {e}")
|
26 |
+
|
27 |
+
|
28 |
+
class PlayHTTTSService(TTSService):
|
29 |
+
|
30 |
+
def __init__(self, *, api_key: str, user_id: str, voice_url: str, **kwargs):
|
31 |
+
super().__init__(**kwargs)
|
32 |
+
|
33 |
+
self._user_id = user_id
|
34 |
+
self._speech_key = api_key
|
35 |
+
|
36 |
+
self._client = AsyncClient(
|
37 |
+
user_id=self._user_id,
|
38 |
+
api_key=self._speech_key,
|
39 |
+
)
|
40 |
+
self._options = TTSOptions(
|
41 |
+
voice=voice_url,
|
42 |
+
sample_rate=16000,
|
43 |
+
quality="higher",
|
44 |
+
format=Format.FORMAT_WAV)
|
45 |
+
|
46 |
+
def can_generate_metrics(self) -> bool:
|
47 |
+
return True
|
48 |
+
|
49 |
+
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
50 |
+
logger.debug(f"Generating TTS: [{text}]")
|
51 |
+
|
52 |
+
try:
|
53 |
+
b = bytearray()
|
54 |
+
in_header = True
|
55 |
+
|
56 |
+
await self.start_ttfb_metrics()
|
57 |
+
|
58 |
+
playht_gen = self._client.tts(
|
59 |
+
text,
|
60 |
+
voice_engine="PlayHT2.0-turbo",
|
61 |
+
options=self._options)
|
62 |
+
|
63 |
+
async for chunk in playht_gen:
|
64 |
+
# skip the RIFF header.
|
65 |
+
if in_header:
|
66 |
+
b.extend(chunk)
|
67 |
+
if len(b) <= 36:
|
68 |
+
continue
|
69 |
+
else:
|
70 |
+
fh = io.BytesIO(b)
|
71 |
+
fh.seek(36)
|
72 |
+
(data, size) = struct.unpack('<4sI', fh.read(8))
|
73 |
+
while data != b'data':
|
74 |
+
fh.read(size)
|
75 |
+
(data, size) = struct.unpack('<4sI', fh.read(8))
|
76 |
+
in_header = False
|
77 |
+
else:
|
78 |
+
if len(chunk):
|
79 |
+
await self.stop_ttfb_metrics()
|
80 |
+
frame = AudioRawFrame(chunk, 16000, 1)
|
81 |
+
yield frame
|
82 |
+
except Exception as e:
|
83 |
+
logger.exception(f"{self} error generating TTS: {e}")
|