ricomnl commited on
Commit
9c62f4c
1 Parent(s): 94e8d23

Fixed issue, loom and h5ad now produce same checksums

Browse files
Files changed (1) hide show
  1. geneformer/tokenizer.py +9 -9
geneformer/tokenizer.py CHANGED
@@ -194,11 +194,11 @@ class TranscriptomeTokenizer:
194
  else:
195
  var_exists = True
196
 
197
- if var_exists is True:
198
  filter_pass_loc = np.where(
199
- [True if i == 1 else False for i in adata.obs["filter_pass"]]
200
  )[0]
201
- elif var_exists is False:
202
  print(
203
  f"{adata_file_path} has no column attribute 'filter_pass'; tokenizing all cells."
204
  )
@@ -208,10 +208,10 @@ class TranscriptomeTokenizer:
208
 
209
  for i in range(0, len(filter_pass_loc), chunk_size):
210
  idx = filter_pass_loc[i:i+chunk_size]
211
- X = adata[idx].X
212
 
213
- X_view = X[:, coding_miRNA_loc]
214
- X_norm = (X_view / X_view.sum(axis=1) * target_sum / norm_factor_vector)
 
215
  X_norm = sp.csr_matrix(X_norm)
216
 
217
  tokenized_cells += [
@@ -258,11 +258,11 @@ class TranscriptomeTokenizer:
258
  else:
259
  var_exists = True
260
 
261
- if var_exists is True:
262
  filter_pass_loc = np.where(
263
- [True if i == 1 else False for i in data.ca["filter_pass"]]
264
  )[0]
265
- elif var_exists is False:
266
  print(
267
  f"{loom_file_path} has no column attribute 'filter_pass'; tokenizing all cells."
268
  )
 
194
  else:
195
  var_exists = True
196
 
197
+ if var_exists:
198
  filter_pass_loc = np.where(
199
+ [i == 1 for i in adata.obs["filter_pass"]]
200
  )[0]
201
+ elif not var_exists:
202
  print(
203
  f"{adata_file_path} has no column attribute 'filter_pass'; tokenizing all cells."
204
  )
 
208
 
209
  for i in range(0, len(filter_pass_loc), chunk_size):
210
  idx = filter_pass_loc[i:i+chunk_size]
 
211
 
212
+ X_view = adata[idx, coding_miRNA_loc].X
213
+ n_counts = adata[idx].obs['n_counts'].values[:, None]
214
+ X_norm = (X_view / n_counts * target_sum / norm_factor_vector)
215
  X_norm = sp.csr_matrix(X_norm)
216
 
217
  tokenized_cells += [
 
258
  else:
259
  var_exists = True
260
 
261
+ if var_exists:
262
  filter_pass_loc = np.where(
263
+ [i == 1 for i in data.ca["filter_pass"]]
264
  )[0]
265
+ elif not var_exists:
266
  print(
267
  f"{loom_file_path} has no column attribute 'filter_pass'; tokenizing all cells."
268
  )