import io from typing import Union from modules.SentenceSplitter import SentenceSplitter from modules.SynthesizeSegments import SynthesizeSegments, combine_audio_segments from modules import generate_audio as generate from modules.speaker import Speaker from modules.ssml_parser.SSMLParser import SSMLSegment from modules.utils import audio def synthesize_audio( text: str, temperature: float = 0.3, top_P: float = 0.7, top_K: float = 20, spk: Union[int, Speaker] = -1, infer_seed: int = -1, use_decoder: bool = True, prompt1: str = "", prompt2: str = "", prefix: str = "", batch_size: int = 1, spliter_threshold: int = 100, end_of_sentence="", ): spliter = SentenceSplitter(spliter_threshold) sentences = spliter.parse(text) text_segments = [ SSMLSegment( text=s, params={ "temperature": temperature, "top_P": top_P, "top_K": top_K, "spk": spk, "infer_seed": infer_seed, "use_decoder": use_decoder, "prompt1": prompt1, "prompt2": prompt2, "prefix": prefix, }, ) for s in sentences ] synthesizer = SynthesizeSegments( batch_size=batch_size, eos=end_of_sentence, spliter_thr=spliter_threshold ) audio_segments = synthesizer.synthesize_segments(text_segments) combined_audio = combine_audio_segments(audio_segments) return audio.pydub_to_np(combined_audio)