VictorSanh commited on
Commit
4746d53
·
1 Parent(s): c816c5b
Files changed (1) hide show
  1. vision.py +5 -5
vision.py CHANGED
@@ -84,7 +84,7 @@ class SiglipVisionModelOutput(ModelOutput):
84
 
85
 
86
  class SiglipVisionEmbeddings(nn.Module):
87
- def __init__(self, config: Img2HTMLVisionConfig):
88
  super().__init__()
89
  self.config = config
90
  self.embed_dim = config.hidden_size
@@ -423,7 +423,7 @@ class SiglipMLP(nn.Module):
423
 
424
  # Copied from transformers.models.clip.modeling_clip.CLIPEncoderLayer with CLIP->Siglip
425
  class SiglipEncoderLayer(nn.Module):
426
- def __init__(self, config: Img2HTMLVisionConfig):
427
  super().__init__()
428
  self.embed_dim = config.hidden_size
429
  self.self_attn = (
@@ -552,7 +552,7 @@ class SiglipEncoder(nn.Module):
552
 
553
 
554
  class SiglipVisionTransformer(nn.Module):
555
- def __init__(self, config: Img2HTMLVisionConfig):
556
  super().__init__()
557
  self.config = config
558
  embed_dim = config.hidden_size
@@ -607,7 +607,7 @@ class SiglipVisionTransformer(nn.Module):
607
  class SiglipMultiheadAttentionPoolingHead(nn.Module):
608
  """Multihead Attention Pooling."""
609
 
610
- def __init__(self, config: Img2HTMLVisionConfig):
611
  super().__init__()
612
 
613
  self.probe = nn.Parameter(torch.randn(1, 1, config.hidden_size))
@@ -629,7 +629,7 @@ class SiglipMultiheadAttentionPoolingHead(nn.Module):
629
 
630
 
631
  class SiglipVisionModel(nn.Module):
632
- def __init__(self, config: Img2HTMLVisionConfig):
633
  super().__init__()
634
 
635
  self.config = config
 
84
 
85
 
86
  class SiglipVisionEmbeddings(nn.Module):
87
+ def __init__(self, config: VMistralVisionConfig):
88
  super().__init__()
89
  self.config = config
90
  self.embed_dim = config.hidden_size
 
423
 
424
  # Copied from transformers.models.clip.modeling_clip.CLIPEncoderLayer with CLIP->Siglip
425
  class SiglipEncoderLayer(nn.Module):
426
+ def __init__(self, config: VMistralVisionConfig):
427
  super().__init__()
428
  self.embed_dim = config.hidden_size
429
  self.self_attn = (
 
552
 
553
 
554
  class SiglipVisionTransformer(nn.Module):
555
+ def __init__(self, config: VMistralVisionConfig):
556
  super().__init__()
557
  self.config = config
558
  embed_dim = config.hidden_size
 
607
  class SiglipMultiheadAttentionPoolingHead(nn.Module):
608
  """Multihead Attention Pooling."""
609
 
610
+ def __init__(self, config: VMistralVisionConfig):
611
  super().__init__()
612
 
613
  self.probe = nn.Parameter(torch.randn(1, 1, config.hidden_size))
 
629
 
630
 
631
  class SiglipVisionModel(nn.Module):
632
+ def __init__(self, config: VMistralVisionConfig):
633
  super().__init__()
634
 
635
  self.config = config