'GlobalConfig' object has no attribute 'd_model'
#1
by
dgcnz
- opened
I think this is related to (https://github.com/soniajoseph/ViT-Prisma/issues/79), is there any workaround?
I tried to use the convert_legacy_prisma_weights
function in that issue with the following mapping but it didn't work, the resulting model performs poorly (at chance level), so there's definitely something wrong:
def old_to_new(old_config: GlobalConfig) -> HookedViTConfig:
new_config = HookedViTConfig(
image_size=old_config.image.image_size,
patch_size=old_config.image.patch_size,
n_channels=old_config.image.n_channels,
d_model=old_config.transformer.hidden_dim,
n_heads=old_config.transformer.num_heads,
n_layers=old_config.transformer.num_layers,
d_mlp=old_config.transformer.mlp_dim,
activation_name=old_config.transformer.activation_name.lower(), # GELU -> gelu
attn_only=old_config.transformer.attention_only,
d_head=old_config.transformer.hidden_dim // old_config.transformer.num_heads,
n_classes=old_config.classification.num_classes,
classification_type="gaap" if old_config.classification.global_pool else "cls",
return_type="class_logits",
)
return new_config