|
from typing import Annotated, Literal, Optional |
|
|
|
from pydantic import BaseModel, Field, conint |
|
|
|
|
|
class ServeReferenceAudio(BaseModel): |
|
audio: bytes |
|
text: str |
|
|
|
|
|
class ServeTTSRequest(BaseModel): |
|
text: str |
|
chunk_length: Annotated[int, conint(ge=100, le=300, strict=True)] = 200 |
|
|
|
format: Literal["wav", "pcm", "mp3"] = "wav" |
|
mp3_bitrate: Literal[64, 128, 192] = 128 |
|
|
|
references: list[ServeReferenceAudio] = [] |
|
|
|
|
|
|
|
reference_id: str | None = None |
|
|
|
normalize: bool = True |
|
mp3_bitrate: Optional[int] = 64 |
|
opus_bitrate: Optional[int] = -1000 |
|
|
|
latency: Literal["normal", "balanced"] = "normal" |
|
|
|
streaming: bool = False |
|
emotion: Optional[str] = None |
|
max_new_tokens: int = 1024 |
|
top_p: Annotated[float, Field(ge=0.1, le=1.0, strict=True)] = 0.7 |
|
repetition_penalty: Annotated[float, Field(ge=0.9, le=2.0, strict=True)] = 1.2 |
|
temperature: Annotated[float, Field(ge=0.1, le=1.0, strict=True)] = 0.7 |
|
|