Spaces:

flax-community
/

dalle-mini

Running

boris commited on Jul 14, 2021

Commit

19946be

1 Parent(s): bf4da91

fix: correct decoder_input_ids and labels

Files changed (1) hide show

seq2seq/run_seq2seq_flax.py CHANGED Viewed

@@ -475,25 +475,20 @@ def main():
         )
         # set up targets
-        # Note: we prepend the bos token instead of doing `shift_tokens_right` because the latter
-        # removes the last token, and we know we don't need padding. In our case, labels
-        # has a length of exactly 1 + 256, while shifting would produce 256 tokens.
-        labels = [[config.decoder_start_token_id] + eval(indices) for indices in examples['encoding']]
         labels = np.asarray(labels)
         # We need the labels, in addition to the decoder_input_ids, for the compute_loss function
-        # In our case, they are the same as decoder_input_ids. Is that correct?
         model_inputs["labels"] = labels
-        # TODO: if data processing prevents correct compilation, we will:
-        #       - have data saved in JSONL (to avoid `eval` which is needed here to convert string "[2]" to list[int])
-        #       - use below `shift_tokens_right_fn`
         # In our case, this prepends the bos token and removes the last one
-#        decoder_input_ids = shift_tokens_right_fn(
-#            jnp.array(labels), config.pad_token_id, config.decoder_start_token_id
-#        )
-        model_inputs["decoder_input_ids"] = labels
         return model_inputs

         )
         # set up targets
+        # Note: labels correspond to our target indices
+        # decoder input ids are the same but shifted to the right with bos at the beginning (and without last token)
+        labels = [[eval(indices) for indices in examples['encoding']]
         labels = np.asarray(labels)
         # We need the labels, in addition to the decoder_input_ids, for the compute_loss function
         model_inputs["labels"] = labels
         # In our case, this prepends the bos token and removes the last one
+        decoder_input_ids = shift_tokens_right_fn(
+            jnp.array(labels), config.pad_token_id, config.decoder_start_token_id
+        )
+        model_inputs["decoder_input_ids"] = decoder_input_ids
         return model_inputs