Christina Theodoris commited on
Commit
abdf980
1 Parent(s): 50e921d

Add error for no files found and suppress loompy import warning

Browse files
Files changed (1) hide show
  1. geneformer/tokenizer.py +13 -0
geneformer/tokenizer.py CHANGED
@@ -17,10 +17,17 @@ Usage:
17
  import pickle
18
  from pathlib import Path
19
 
 
 
 
 
 
20
  import loompy as lp
21
  import numpy as np
22
  from datasets import Dataset
23
 
 
 
24
  GENE_MEDIAN_FILE = Path(__file__).parent / "gene_median_dictionary.pkl"
25
  TOKEN_DICTIONARY_FILE = Path(__file__).parent / "token_dictionary.pkl"
26
 
@@ -111,7 +118,9 @@ class TranscriptomeTokenizer:
111
  cell_metadata = {attr_key: [] for attr_key in self.custom_attr_name_dict.values()}
112
 
113
  # loops through directories to tokenize .loom files
 
114
  for loom_file_path in loom_data_directory.glob("*.loom"):
 
115
  print(f"Tokenizing {loom_file_path}")
116
  file_tokenized_cells, file_cell_metadata = self.tokenize_file(
117
  loom_file_path
@@ -123,6 +132,10 @@ class TranscriptomeTokenizer:
123
  else:
124
  cell_metadata = None
125
 
 
 
 
 
126
  return tokenized_cells, cell_metadata
127
 
128
  def tokenize_file(self, loom_file_path):
 
17
  import pickle
18
  from pathlib import Path
19
 
20
+ import logging
21
+
22
+ import warnings
23
+ warnings.filterwarnings("ignore", message=".*The 'nopython' keyword.*")
24
+
25
  import loompy as lp
26
  import numpy as np
27
  from datasets import Dataset
28
 
29
+ logger = logging.getLogger(__name__)
30
+
31
  GENE_MEDIAN_FILE = Path(__file__).parent / "gene_median_dictionary.pkl"
32
  TOKEN_DICTIONARY_FILE = Path(__file__).parent / "token_dictionary.pkl"
33
 
 
118
  cell_metadata = {attr_key: [] for attr_key in self.custom_attr_name_dict.values()}
119
 
120
  # loops through directories to tokenize .loom files
121
+ file_found = 0
122
  for loom_file_path in loom_data_directory.glob("*.loom"):
123
+ file_found = 1
124
  print(f"Tokenizing {loom_file_path}")
125
  file_tokenized_cells, file_cell_metadata = self.tokenize_file(
126
  loom_file_path
 
132
  else:
133
  cell_metadata = None
134
 
135
+ if file_found == 0:
136
+ logger.error(
137
+ f"No .loom files found in directory {loom_data_directory}.")
138
+ raise
139
  return tokenized_cells, cell_metadata
140
 
141
  def tokenize_file(self, loom_file_path):