{ "_name_or_path": "/mnt/cloudstorfs/sjtu_home/xuenan.xu/hf_cache/hub/models--wsntxxn--effb2-trm-audiocaps-captioning/snapshots/6f256d69ec63542a9962be02f0c5c8f4b1a73f15", "architectures": [ "Effb2TrmCaptioningModel" ], "attn_emb_dim": 1408, "auto_map": { "AutoConfig": "hf_wrapper.Effb2TrmConfig", "AutoModel": "hf_wrapper.Effb2TrmCaptioningModel" }, "decoder_dropout": 0.2, "decoder_emb_dim": 256, "decoder_n_layers": 2, "decoder_we_tie_weights": true, "fc_emb_dim": 1408, "sample_rate": 16000, "shared_dim": 1024, "tchr_dim": 768, "torch_dtype": "float32", "transformers_version": "4.30.2", "vocab_size": 4981 }