|
{ |
|
"_class_name": "AudioLDM2Pipeline", |
|
"_diffusers_version": "0.22.0.dev0", |
|
"feature_extractor": [ |
|
"transformers", |
|
"ClapFeatureExtractor" |
|
], |
|
"language_model": [ |
|
"transformers", |
|
"GPT2Model" |
|
], |
|
"projection_model": [ |
|
"audioldm2", |
|
"AudioLDM2ProjectionModel" |
|
], |
|
"scheduler": [ |
|
"diffusers", |
|
"DDIMScheduler" |
|
], |
|
"text_encoder": [ |
|
"transformers", |
|
"ClapModel" |
|
], |
|
"text_encoder_2": [ |
|
"transformers", |
|
"VitsModel" |
|
], |
|
"tokenizer": [ |
|
"transformers", |
|
"RobertaTokenizerFast" |
|
], |
|
"tokenizer_2": [ |
|
"transformers", |
|
"VitsTokenizer" |
|
], |
|
"unet": [ |
|
"audioldm2", |
|
"AudioLDM2UNet2DConditionModel" |
|
], |
|
"vae": [ |
|
"diffusers", |
|
"AutoencoderKL" |
|
], |
|
"vocoder": [ |
|
"transformers", |
|
"SpeechT5HifiGan" |
|
] |
|
} |
|
|