BAAI
/

Bunny-v1_0-3B-zh

Text Generation

Model card Files Files and versions Community

BoyaWu10 commited on May 30

Commit

26c44d3

•

1 Parent(s): d34218a

Update modeling_bunny_minicpm.py

Files changed (1) hide show

modeling_bunny_minicpm.py +6 -0

modeling_bunny_minicpm.py CHANGED Viewed

@@ -210,11 +210,17 @@ class BunnyMetaForCausalLM(ABC):
         if labels is None:
             labels = torch.full_like(input_ids, IGNORE_INDEX)
         # remove the padding using attention_mask -- TODO: double check
         input_ids = [cur_input_ids[cur_attention_mask] for cur_input_ids, cur_attention_mask in
                      zip(input_ids, attention_mask)]
         labels = [cur_labels[cur_attention_mask] for cur_labels, cur_attention_mask in zip(labels, attention_mask)]
         new_input_embeds = []
         new_labels = []
         cur_image_idx = 0

         if labels is None:
             labels = torch.full_like(input_ids, IGNORE_INDEX)
+        input_ids_temp = input_ids # points to the actual input_ids tensor
         # remove the padding using attention_mask -- TODO: double check
         input_ids = [cur_input_ids[cur_attention_mask] for cur_input_ids, cur_attention_mask in
                      zip(input_ids, attention_mask)]
         labels = [cur_labels[cur_attention_mask] for cur_labels, cur_attention_mask in zip(labels, attention_mask)]
+        # -- TODO: better implementation?
+        # replace IMAGE_TOKEN_INDEX(-200) with 0 to be compatible with repetition penalty
+        input_ids_temp[input_ids_temp == IMAGE_TOKEN_INDEX] = 0
         new_input_embeds = []
         new_labels = []
         cur_image_idx = 0