{ "audio_drop_path": 0.1, "audio_embed_dim": 768, "audio_kernel_size": 16, "audio_num_blocks": 12, "audio_num_heads": 12, "audio_num_mel_bins": 128, "audio_stride": 10, "audio_target_len": 204, "depth_drop_path": 0.0, "depth_embed_dim": 384, "depth_kernel_size": 16, "depth_num_blocks": 12, "depth_num_heads": 8, "imu_drop_path": 0.7, "imu_embed_dim": 512, "imu_kernel_size": 8, "imu_num_blocks": 6, "imu_num_heads": 8, "kernel_size": [ 2, 14, 14 ], "out_embed_dim": 1024, "text_embed_dim": 1024, "text_num_blocks": 24, "text_num_heads": 16, "thermal_drop_path": 0.0, "thermal_embed_dim": 768, "thermal_kernel_size": 16, "thermal_num_blocks": 12, "thermal_num_heads": 12, "video_frames": 2, "vision_embed_dim": 1280, "vision_num_blocks": 32, "vision_num_heads": 16 }