File size: 1,505 Bytes
4dab15f
 
 
 
 
 
27cee60
4dab15f
 
 
 
 
 
 
 
 
 
b315dd9
4dab15f
 
 
 
 
ea89faa
4dab15f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aeff822
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
[build-system]
requires = ["setuptools >= 61.0", "setuptools-scm>=8.0"]
build-backend = "setuptools.build_meta"

[project]
name = "f5-tts"
version = "0.2.1"
description = "F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching"
readme = "README.md"
license = {text = "MIT License"}
classifiers = [
    "License :: OSI Approved :: MIT License",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
]
dependencies = [
    "accelerate>=0.33.0",
    "bitsandbytes>0.37.0; platform_machine != 'arm64' and platform_system != 'Darwin'",
    "cached_path",
    "click",
    "datasets",
    "ema_pytorch>=0.5.2",
    "gradio>=3.45.2",
    "hydra-core>=1.3.0",
    "jieba",
    "librosa",
    "matplotlib",
    "numpy<=1.26.4",
    "pydub",
    "pypinyin",
    "safetensors",
    "soundfile",
    "tomli",
    "torch>=2.0.0",
    "torchaudio>=2.0.0",
    "torchdiffeq",
    "tqdm>=4.65.0",
    "transformers",
    "transformers_stream_generator",
    "vocos",
    "wandb",
    "x_transformers>=1.31.14",
]

[project.optional-dependencies]
eval = [
    "faster_whisper==0.10.1",
    "funasr",
    "jiwer",
    "modelscope",
    "zhconv",
    "zhon",
]

[project.urls]
Homepage = "https://github.com/SWivid/F5-TTS"

[project.scripts]
"f5-tts_infer-cli" = "f5_tts.infer.infer_cli:main"
"f5-tts_infer-gradio" = "f5_tts.infer.infer_gradio:main"
"f5-tts_finetune-cli" = "f5_tts.train.finetune_cli:main"
"f5-tts_finetune-gradio" = "f5_tts.train.finetune_gradio:main"