{ "architectures": [ "FusionCrossAttentionSentenceClassifier" ], "dropout": 0.3, "embedding_strategy": "self-att", "fp16": true, "fusion_layers": 2, "fusion_strategy": "dense", "heads": 8, "hidden_size": 768, "k1_backbone": "whisper-encoder-small", "k1_freezed": true, "k1_kwargs": { "load_in_4bit": false }, "k2_backbone": "transformer-prosody-encoder192", "k2_freezed": false, "k2_kwargs": { "dropout": 0.3, "heads": 8, "input_size": 5, "num_layers": 2 }, "labels": 18, "model_type": "fusion-cross-attention-sentence-classifier", "multilabel": true, "q_backbone": "Phi-3-mini-4k-instruct", "q_freezed": true, "q_kwargs": { "load_in_4bit": true }, "torch_dtype": "float32", "transformers_version": "4.41.2" }