bczhou commited on
Commit
3738ba5
1 Parent(s): 2340fa0

Update config.py

Browse files
Files changed (1) hide show
  1. config.py +25 -6
config.py CHANGED
@@ -1,22 +1,35 @@
1
  from dataclasses import dataclass
 
2
 
3
  PREFIX_MAP = {
4
  "openai/clip-vit-base-patch32": 50,
5
- "openai/clip-vit-large-patch14": 257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
7
 
8
 
9
  @dataclass
10
- class LinearMappingConfig:
11
  image_model: str = "openai/clip-vit-base-patch32"
12
  freeze_image_model: bool = True
13
  text_model: str = "gpt2-large"
14
  freeze_text_model: bool = True
15
- image_hidden_size: int = 768
16
- text_hidden_size: int = 1280
17
  linear_mapping_type: int = "linear"
18
- max_seq_length: int = 2048
19
- image_resize: int = 224
20
  add_image_token: bool = True
21
  freeze_ln: bool = False
22
  image_from_pretrained: bool = True
@@ -24,3 +37,9 @@ class LinearMappingConfig:
24
 
25
  def __post_init__(self):
26
  self.prefix_length = PREFIX_MAP[self.image_model]
 
 
 
 
 
 
 
1
  from dataclasses import dataclass
2
+ from transformers import GPT2Config, CLIPVisionConfig
3
 
4
  PREFIX_MAP = {
5
  "openai/clip-vit-base-patch32": 50,
6
+ "openai/clip-vit-base-patch16": 197,
7
+ "openai/clip-vit-large-patch14": 257,
8
+ "openai/clip-vit-large-patch14-336": 577
9
+ }
10
+
11
+ TEXT_HIDDEN_SIZE_MAP = {
12
+ "gpt2": 768,
13
+ "gpt2-medium": 768,
14
+ "gpt2-large": 1280,
15
+ "gpt2-xl": 1600
16
+ }
17
+
18
+ IMAGE_HIDDEN_SIZE_MAP = {
19
+ "openai/clip-vit-base-patch32": 768,
20
+ "openai/clip-vit-base-patch16": 768,
21
+ "openai/clip-vit-large-patch14": 768,
22
+ "openai/clip-vit-large-patch14-336": 768
23
  }
24
 
25
 
26
  @dataclass
27
+ class CLIPGPT2Config:
28
  image_model: str = "openai/clip-vit-base-patch32"
29
  freeze_image_model: bool = True
30
  text_model: str = "gpt2-large"
31
  freeze_text_model: bool = True
 
 
32
  linear_mapping_type: int = "linear"
 
 
33
  add_image_token: bool = True
34
  freeze_ln: bool = False
35
  image_from_pretrained: bool = True
 
37
 
38
  def __post_init__(self):
39
  self.prefix_length = PREFIX_MAP[self.image_model]
40
+ self.image_hidden_size = IMAGE_HIDDEN_SIZE_MAP[self.image_model]
41
+ self.text_hidden_size = TEXT_HIDDEN_SIZE_MAP[self.text_model]
42
+ self.image_resize = 224 if "336" not in self.image_model else 336
43
+ self.text_config = GPT2Config.from_pretrained(self.text_model)
44
+ self.image_config = CLIPVisionConfig.from_pretrained(self.image_model)
45
+ self.vocab_size = self.text_config.vocab_size + self.add_image_token