ctheodoris hchen725 commited on
Commit
075bd53
1 Parent(s): aa25cd2

- Fix typo (d1bbf272387475f1c413cca8679150549abd8bed)


Co-authored-by: Han Chen <hchen725@users.noreply.huggingface.co>

Files changed (1) hide show
  1. geneformer/tokenizer.py +1 -1
geneformer/tokenizer.py CHANGED
@@ -366,7 +366,7 @@ class TranscriptomeTokenizer:
366
  example["length_uncropped"] = len(example["input_ids"])
367
 
368
  # Truncate/Crop input_ids to input size
369
- if tk.special_token:
370
  example["input_ids"] = example["input_ids"][0:self.input_size-2] # truncate to leave space for CLS and SEP token
371
  example["input_ids"] = np.insert(example["input_ids"], 0, self.gene_token_dict.get("<cls>"))
372
  example["input_ids"] = np.insert(example["input_ids"], len(example["input_ids"]), self.gene_token_dict.get("<sep>"))
 
366
  example["length_uncropped"] = len(example["input_ids"])
367
 
368
  # Truncate/Crop input_ids to input size
369
+ if self.special_token:
370
  example["input_ids"] = example["input_ids"][0:self.input_size-2] # truncate to leave space for CLS and SEP token
371
  example["input_ids"] = np.insert(example["input_ids"], 0, self.gene_token_dict.get("<cls>"))
372
  example["input_ids"] = np.insert(example["input_ids"], len(example["input_ids"]), self.gene_token_dict.get("<sep>"))