Christina Theodoris
commited on
Commit
·
17f036a
1
Parent(s):
e04975c
change doc formatting
Browse files
geneformer/emb_extractor.py
CHANGED
@@ -395,8 +395,8 @@ class EmbExtractor:
|
|
395 |
"""
|
396 |
Initialize embedding extractor.
|
397 |
|
398 |
-
Parameters
|
399 |
-
|
400 |
model_type : {"Pretrained","GeneClassifier","CellClassifier"}
|
401 |
| Whether model is the pretrained Geneformer or a fine-tuned gene or cell classifier.
|
402 |
num_classes : int
|
@@ -442,8 +442,7 @@ class EmbExtractor:
|
|
442 |
token_dictionary_file : Path
|
443 |
| Path to pickle file containing token dictionary (Ensembl ID:token).
|
444 |
|
445 |
-
Examples
|
446 |
-
~~~~~~~~
|
447 |
|
448 |
.. code-block :: python
|
449 |
|
@@ -532,8 +531,8 @@ class EmbExtractor:
|
|
532 |
"""
|
533 |
Extract embeddings from input data and save as results in output_directory.
|
534 |
|
535 |
-
Parameters
|
536 |
-
|
537 |
model_directory : Path
|
538 |
| Path to directory containing model
|
539 |
input_data_file : Path
|
@@ -548,8 +547,7 @@ class EmbExtractor:
|
|
548 |
cell_state : dict
|
549 |
| Cell state key and value for state embedding extraction.
|
550 |
|
551 |
-
Examples
|
552 |
-
~~~~~~~~
|
553 |
|
554 |
.. code-block :: python
|
555 |
|
@@ -629,8 +627,8 @@ class EmbExtractor:
|
|
629 |
"""
|
630 |
Extract exact mean or exact median cell state embedding positions from input data and save as results in output_directory.
|
631 |
|
632 |
-
Parameters
|
633 |
-
|
634 |
cell_states_to_model : None, dict
|
635 |
| Cell states to model if testing perturbations that achieve goal state change.
|
636 |
| Four-item dictionary with keys: state_key, start_state, goal_state, and alt_states
|
@@ -655,8 +653,8 @@ class EmbExtractor:
|
|
655 |
| Whether or not to also output the embeddings as a tensor.
|
656 |
| Note, if true, will output embeddings as both dataframe and tensor.
|
657 |
|
658 |
-
Outputs
|
659 |
-
|
660 |
| Outputs state_embs_dict for use with in silico perturber.
|
661 |
| Format is dictionary of embedding positions of each cell state to model shifts from/towards.
|
662 |
| Keys specify each possible cell state to model.
|
@@ -721,8 +719,8 @@ class EmbExtractor:
|
|
721 |
"""
|
722 |
Plot embeddings, coloring by provided labels.
|
723 |
|
724 |
-
Parameters
|
725 |
-
|
726 |
embs : pandas.core.frame.DataFrame
|
727 |
| Pandas dataframe containing embeddings output from extract_embs
|
728 |
plot_style : str
|
@@ -738,8 +736,7 @@ class EmbExtractor:
|
|
738 |
kwargs_dict : dict
|
739 |
| Dictionary of kwargs to pass to plotting function.
|
740 |
|
741 |
-
Examples
|
742 |
-
~~~~~~~~
|
743 |
|
744 |
.. code-block :: python
|
745 |
|
|
|
395 |
"""
|
396 |
Initialize embedding extractor.
|
397 |
|
398 |
+
**Parameters:**
|
399 |
+
|
400 |
model_type : {"Pretrained","GeneClassifier","CellClassifier"}
|
401 |
| Whether model is the pretrained Geneformer or a fine-tuned gene or cell classifier.
|
402 |
num_classes : int
|
|
|
442 |
token_dictionary_file : Path
|
443 |
| Path to pickle file containing token dictionary (Ensembl ID:token).
|
444 |
|
445 |
+
**Examples:**
|
|
|
446 |
|
447 |
.. code-block :: python
|
448 |
|
|
|
531 |
"""
|
532 |
Extract embeddings from input data and save as results in output_directory.
|
533 |
|
534 |
+
**Parameters:**
|
535 |
+
|
536 |
model_directory : Path
|
537 |
| Path to directory containing model
|
538 |
input_data_file : Path
|
|
|
547 |
cell_state : dict
|
548 |
| Cell state key and value for state embedding extraction.
|
549 |
|
550 |
+
**Examples:**
|
|
|
551 |
|
552 |
.. code-block :: python
|
553 |
|
|
|
627 |
"""
|
628 |
Extract exact mean or exact median cell state embedding positions from input data and save as results in output_directory.
|
629 |
|
630 |
+
**Parameters:**
|
631 |
+
|
632 |
cell_states_to_model : None, dict
|
633 |
| Cell states to model if testing perturbations that achieve goal state change.
|
634 |
| Four-item dictionary with keys: state_key, start_state, goal_state, and alt_states
|
|
|
653 |
| Whether or not to also output the embeddings as a tensor.
|
654 |
| Note, if true, will output embeddings as both dataframe and tensor.
|
655 |
|
656 |
+
**Outputs**
|
657 |
+
|
658 |
| Outputs state_embs_dict for use with in silico perturber.
|
659 |
| Format is dictionary of embedding positions of each cell state to model shifts from/towards.
|
660 |
| Keys specify each possible cell state to model.
|
|
|
719 |
"""
|
720 |
Plot embeddings, coloring by provided labels.
|
721 |
|
722 |
+
**Parameters:**
|
723 |
+
|
724 |
embs : pandas.core.frame.DataFrame
|
725 |
| Pandas dataframe containing embeddings output from extract_embs
|
726 |
plot_style : str
|
|
|
736 |
kwargs_dict : dict
|
737 |
| Dictionary of kwargs to pass to plotting function.
|
738 |
|
739 |
+
**Examples:**
|
|
|
740 |
|
741 |
.. code-block :: python
|
742 |
|
geneformer/in_silico_perturber.py
CHANGED
@@ -100,8 +100,8 @@ class InSilicoPerturber:
|
|
100 |
"""
|
101 |
Initialize in silico perturber.
|
102 |
|
103 |
-
Parameters
|
104 |
-
|
105 |
perturb_type : {"delete", "overexpress", "inhibit", "activate"}
|
106 |
| Type of perturbation.
|
107 |
| "delete": delete gene from rank value encoding
|
@@ -398,8 +398,8 @@ class InSilicoPerturber:
|
|
398 |
"""
|
399 |
Perturb genes in input data and save as results in output_directory.
|
400 |
|
401 |
-
Parameters
|
402 |
-
|
403 |
model_directory : Path
|
404 |
| Path to directory containing model
|
405 |
input_data_file : Path
|
|
|
100 |
"""
|
101 |
Initialize in silico perturber.
|
102 |
|
103 |
+
**Parameters:**
|
104 |
+
|
105 |
perturb_type : {"delete", "overexpress", "inhibit", "activate"}
|
106 |
| Type of perturbation.
|
107 |
| "delete": delete gene from rank value encoding
|
|
|
398 |
"""
|
399 |
Perturb genes in input data and save as results in output_directory.
|
400 |
|
401 |
+
**Parameters:**
|
402 |
+
|
403 |
model_directory : Path
|
404 |
| Path to directory containing model
|
405 |
input_data_file : Path
|
geneformer/in_silico_perturber_stats.py
CHANGED
@@ -652,8 +652,8 @@ class InSilicoPerturberStats:
|
|
652 |
"""
|
653 |
Initialize in silico perturber stats generator.
|
654 |
|
655 |
-
Parameters
|
656 |
-
|
657 |
mode : {"goal_state_shift", "vs_null", "mixture_model", "aggregate_data", "aggregate_gene_shifts"}
|
658 |
| Type of stats.
|
659 |
| "goal_state_shift": perturbation vs. random for desired cell state shift
|
@@ -854,8 +854,8 @@ class InSilicoPerturberStats:
|
|
854 |
"""
|
855 |
Get stats for in silico perturbation data and save as results in output_directory.
|
856 |
|
857 |
-
Parameters
|
858 |
-
|
859 |
input_data_directory : Path
|
860 |
| Path to directory containing cos_sim dictionary inputs
|
861 |
null_dist_data_directory : Path
|
@@ -867,8 +867,8 @@ class InSilicoPerturberStats:
|
|
867 |
null_dict_list: dict
|
868 |
| List of loaded null distribtion dictionary if more than one comparison vs. the null is to be performed
|
869 |
|
870 |
-
Outputs
|
871 |
-
|
872 |
Definition of possible columns in .csv output file.
|
873 |
|
874 |
| Of note, not all columns will be present in all output files.
|
|
|
652 |
"""
|
653 |
Initialize in silico perturber stats generator.
|
654 |
|
655 |
+
**Parameters:**
|
656 |
+
|
657 |
mode : {"goal_state_shift", "vs_null", "mixture_model", "aggregate_data", "aggregate_gene_shifts"}
|
658 |
| Type of stats.
|
659 |
| "goal_state_shift": perturbation vs. random for desired cell state shift
|
|
|
854 |
"""
|
855 |
Get stats for in silico perturbation data and save as results in output_directory.
|
856 |
|
857 |
+
**Parameters:**
|
858 |
+
|
859 |
input_data_directory : Path
|
860 |
| Path to directory containing cos_sim dictionary inputs
|
861 |
null_dist_data_directory : Path
|
|
|
867 |
null_dict_list: dict
|
868 |
| List of loaded null distribtion dictionary if more than one comparison vs. the null is to be performed
|
869 |
|
870 |
+
**Outputs:**
|
871 |
+
|
872 |
Definition of possible columns in .csv output file.
|
873 |
|
874 |
| Of note, not all columns will be present in all output files.
|
geneformer/tokenizer.py
CHANGED
@@ -87,8 +87,8 @@ class TranscriptomeTokenizer:
|
|
87 |
"""
|
88 |
Initialize tokenizer.
|
89 |
|
90 |
-
Parameters
|
91 |
-
|
92 |
custom_attr_name_dict : None, dict
|
93 |
| Dictionary of custom attributes to be added to the dataset.
|
94 |
| Keys are the names of the attributes in the loom file.
|
@@ -138,8 +138,8 @@ class TranscriptomeTokenizer:
|
|
138 |
"""
|
139 |
Tokenize .loom files in data_directory and save as tokenized .dataset in output_directory.
|
140 |
|
141 |
-
Parameters
|
142 |
-
|
143 |
data_directory : Path
|
144 |
Path to directory containing loom files or anndata files
|
145 |
output_directory : Path
|
|
|
87 |
"""
|
88 |
Initialize tokenizer.
|
89 |
|
90 |
+
**Parameters:**
|
91 |
+
|
92 |
custom_attr_name_dict : None, dict
|
93 |
| Dictionary of custom attributes to be added to the dataset.
|
94 |
| Keys are the names of the attributes in the loom file.
|
|
|
138 |
"""
|
139 |
Tokenize .loom files in data_directory and save as tokenized .dataset in output_directory.
|
140 |
|
141 |
+
**Parameters:**
|
142 |
+
|
143 |
data_directory : Path
|
144 |
Path to directory containing loom files or anndata files
|
145 |
output_directory : Path
|