Spaces:
Running
on
Zero
Running
on
Zero
import argparse | |
import shutil | |
import soundfile as sf | |
import app | |
from app import infer | |
def main(): | |
parser = argparse.ArgumentParser(description="F5 TTS - CLI interface") | |
parser.add_argument("--ref_audio", required=True, help="Path to the reference audio file (wav, mp3)") | |
parser.add_argument("--ref_text", default="", help="Reference text. If empty, audio transcription will be performed.") | |
parser.add_argument("--gen_text", required=True, help="Text to generate") | |
parser.add_argument("--exp_name", default="Multi", help="Experiment name / model selection (default: Multi)") | |
parser.add_argument("--remove_silence", action="store_true", help="Remove silence from the output audio") | |
parser.add_argument("--cross_fade_duration", type=float, default=0.15, help="Cross-fade duration (s)") | |
parser.add_argument("--output_audio", default="output.wav", help="Path to the output WAV file") | |
parser.add_argument("--output_spectrogram", default="spectrogram.png", help="Path to save the spectrogram (PNG)") | |
parser.add_argument("--language", default="en-us", help="Synthesized language (default: en-us)") | |
parser.add_argument("--ref_language", default="en-us", help="Reference language (default: en-us)") | |
parser.add_argument("--speed", type=float, default=1.0, help="Audio speed factor (default: 1.0)") | |
args = parser.parse_args() | |
app.language = args.language | |
app.ref_language = args.ref_language | |
app.speed = args.speed | |
(sr, audio_data), spectrogram_path = infer( | |
args.ref_audio, | |
args.ref_text, | |
args.gen_text, | |
args.exp_name, | |
args.remove_silence, | |
args.cross_fade_duration | |
) | |
sf.write(args.output_audio, audio_data.astype("float32"), sr) | |
shutil.copy(spectrogram_path, args.output_spectrogram) | |
print(f"Audio saved in: {args.output_audio}") | |
print(f"Spectrogram saved in: {args.output_spectrogram}") | |
if __name__ == "__main__": | |
main() |