Fix typo
#301
by
hchen725
- opened
- geneformer/tokenizer.py +1 -1
geneformer/tokenizer.py
CHANGED
@@ -366,7 +366,7 @@ class TranscriptomeTokenizer:
|
|
366 |
example["length_uncropped"] = len(example["input_ids"])
|
367 |
|
368 |
# Truncate/Crop input_ids to input size
|
369 |
-
if
|
370 |
example["input_ids"] = example["input_ids"][0:self.input_size-2] # truncate to leave space for CLS and SEP token
|
371 |
example["input_ids"] = np.insert(example["input_ids"], 0, self.gene_token_dict.get("<cls>"))
|
372 |
example["input_ids"] = np.insert(example["input_ids"], len(example["input_ids"]), self.gene_token_dict.get("<sep>"))
|
|
|
366 |
example["length_uncropped"] = len(example["input_ids"])
|
367 |
|
368 |
# Truncate/Crop input_ids to input size
|
369 |
+
if self.special_token:
|
370 |
example["input_ids"] = example["input_ids"][0:self.input_size-2] # truncate to leave space for CLS and SEP token
|
371 |
example["input_ids"] = np.insert(example["input_ids"], 0, self.gene_token_dict.get("<cls>"))
|
372 |
example["input_ids"] = np.insert(example["input_ids"], len(example["input_ids"]), self.gene_token_dict.get("<sep>"))
|