uf-aice-lab commited on
Commit
71963dd
·
verified ·
1 Parent(s): b955168

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +29 -29
README.md CHANGED
@@ -4,36 +4,36 @@ license: mit
4
 
5
  This is the structure of the BLIPNet model. You can load the model with it, or you can create a bigger model for your task.
6
 
7
- class BLIPNet(torch.nn.Module):
8
- def __init__(self, ):
9
- super().__init__()
10
- #Generation Model
11
- self.model = BlipForConditionalGeneration.from_pretrained(MODEL_NAME, cache_dir="model")
12
- #Same with https://huggingface.co/uf-aice-lab/BLIP-Math
13
- self.ebd_dim = ebd_dim= 443136
14
-
15
- #Classification Model
16
- fc_dim = 64 # You can choose a higher number for better performance, for example, 1024.
17
- self.head = nn.Sequential(
18
- nn.Linear(self.ebd_dim, fc_dim),
19
- nn.ReLU(),
20
- )
21
- self.score = nn.Linear(fc_dim, 5) #5 classes
22
-
23
-
24
- def forward(self, pixel_values, input_ids):
25
- outputs = self.model(input_ids=input_ids, pixel_values=pixel_values, labels=input_ids)
26
- image_text_embeds = self.model.vision_model(pixel_values, return_dict=True).last_hidden_state
27
- image_text_embeds = self.head(image_embeds.view(-1, self.ebd_dim))
28
-
29
- #A classification model is based on embeddings from a generative model to leverage BLIP's powerful image-text encoding capabilities.
30
- logits = self.score(image_embeds)
31
-
32
- #generated text, probabilities of classification
33
- return outputs, logits
34
 
35
- model = BLIPNet()
36
- model.load_state_dict(torch.load(best_model_wts_path) ,strict=False)
37
 
38
  You need to input the sample in the same way as:
39
  https://huggingface.co/uf-aice-lab/BLIP-Math
 
4
 
5
  This is the structure of the BLIPNet model. You can load the model with it, or you can create a bigger model for your task.
6
 
7
+ class BLIPNet(torch.nn.Module):
8
+ def __init__(self, ):
9
+ super().__init__()
10
+ #Generation Model
11
+ self.model = BlipForConditionalGeneration.from_pretrained(MODEL_NAME, cache_dir="model")
12
+ #Same with https://huggingface.co/uf-aice-lab/BLIP-Math
13
+ self.ebd_dim = ebd_dim= 443136
14
+
15
+ #Classification Model
16
+ fc_dim = 64 # You can choose a higher number for better performance, for example, 1024.
17
+ self.head = nn.Sequential(
18
+ nn.Linear(self.ebd_dim, fc_dim),
19
+ nn.ReLU(),
20
+ )
21
+ self.score = nn.Linear(fc_dim, 5) #5 classes
22
+
23
+
24
+ def forward(self, pixel_values, input_ids):
25
+ outputs = self.model(input_ids=input_ids, pixel_values=pixel_values, labels=input_ids)
26
+ image_text_embeds = self.model.vision_model(pixel_values, return_dict=True).last_hidden_state
27
+ image_text_embeds = self.head(image_embeds.view(-1, self.ebd_dim))
28
+
29
+ #A classification model is based on embeddings from a generative model to leverage BLIP's powerful image-text encoding capabilities.
30
+ logits = self.score(image_embeds)
31
+
32
+ #generated text, probabilities of classification
33
+ return outputs, logits
34
 
35
+ model = BLIPNet()
36
+ model.load_state_dict(torch.load(best_model_wts_path) ,strict=False)
37
 
38
  You need to input the sample in the same way as:
39
  https://huggingface.co/uf-aice-lab/BLIP-Math