supercat666 commited on
Commit
0d0c645
1 Parent(s): fc0071d

added cas9 off

Browse files
.idea/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # 默认忽略的文件
2
+ /shelf/
3
+ /workspace.xml
.idea/CRISPRTool.iml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$">
5
+ <excludeFolder url="file://$MODULE_DIR$/venv" />
6
+ </content>
7
+ <orderEntry type="inheritedJdk" />
8
+ <orderEntry type="sourceFolder" forTests="false" />
9
+ </component>
10
+ </module>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (CRISPRTool)" project-jdk-type="Python SDK" />
4
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/CRISPRTool.iml" filepath="$PROJECT_DIR$/.idea/CRISPRTool.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
app.py CHANGED
@@ -1,11 +1,13 @@
1
  import os
2
  import tiger
 
 
3
  import pandas as pd
4
  import streamlit as st
5
  from pathlib import Path
6
 
7
  # title and documentation
8
- st.markdown(Path('tiger.md').read_text(), unsafe_allow_html=True)
9
  st.divider()
10
 
11
  CRISPR_MODELS = ['Cas9', 'Cas12', 'Cas13d']
@@ -13,107 +15,169 @@ CRISPR_MODELS = ['Cas9', 'Cas12', 'Cas13d']
13
  selected_model = st.selectbox('Select CRISPR model:', CRISPR_MODELS, key='selected_model')
14
 
15
 
16
- # Check if the selected model is Cas9
17
- if selected_model == 'Cas9':
18
- # Use a radio button to select enzymes, making sure only one can be selected at a time
19
- enzyme_selection = st.radio(
20
- "Select an enzyme:",
21
- ('SPCas9_U6', 'SPCas9_t7', 'eSPCas9', 'SPCas9_HF1'),
22
- key='enzyme_selection'
23
- )
24
-
25
- # Actions based on the selected enzyme
26
- if enzyme_selection == 'SPCas9_U6':
27
- # Placeholder for action when SPCas9_U6 is selected
28
- pass
29
- elif enzyme_selection == 'SPCas9_t7':
30
- # Placeholder for action when SPCas9_t7 is selected
31
- pass
32
- elif enzyme_selection == 'eSPCas9':
33
- # Placeholder for action when eSPCas9 is selected
34
- pass
35
- elif enzyme_selection == 'SPCas9_HF1':
36
- # Placeholder for action when SPCas9_HF1 is selected
37
- pass
38
- elif selected_model == 'Cas12':
39
- # Placeholder for Cas12 model loading
40
- # TODO: Implement Cas12 model loading logic
41
- raise NotImplementedError("Cas12 model loading not implemented yet.")
42
- elif selected_model == 'Cas13d':
43
- ENTRY_METHODS = dict(
44
- manual='Manual entry of single transcript',
45
- fasta="Fasta file upload (supports multiple transcripts if they have unique ID's)"
46
- )
47
- @st.cache_data
48
- def convert_df(df):
49
  # IMPORTANT: Cache the conversion to prevent computation on every rerun
50
  return df.to_csv().encode('utf-8')
51
 
52
 
53
- def mode_change_callback():
54
- if st.session_state.mode in {tiger.RUN_MODES['all'], tiger.RUN_MODES['titration']}: # TODO: support titration
55
- st.session_state.check_off_targets = False
56
- st.session_state.disable_off_target_checkbox = True
57
- else:
58
- st.session_state.disable_off_target_checkbox = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
 
 
 
60
 
61
- def progress_update(update_text, percent_complete):
62
- with progress.container():
63
- st.write(update_text)
64
- st.progress(percent_complete / 100)
65
 
 
 
 
66
 
67
- def initiate_run():
 
 
 
 
 
 
 
68
 
69
- # initialize state variables
70
- st.session_state.transcripts = None
71
- st.session_state.input_error = None
72
- st.session_state.on_target = None
73
- st.session_state.titration = None
74
- st.session_state.off_target = None
75
 
76
- # initialize transcript DataFrame
77
- transcripts = pd.DataFrame(columns=[tiger.ID_COL, tiger.SEQ_COL])
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
- # manual entry
 
 
 
 
 
 
80
  if st.session_state.entry_method == ENTRY_METHODS['manual']:
81
- transcripts = pd.DataFrame({
82
- tiger.ID_COL: ['ManualEntry'],
83
- tiger.SEQ_COL: [st.session_state.manual_entry]
84
- }).set_index(tiger.ID_COL)
 
 
 
 
 
 
 
 
85
 
86
- # fasta file upload
87
- elif st.session_state.entry_method == ENTRY_METHODS['fasta']:
88
- if st.session_state.fasta_entry is not None:
89
- fasta_path = st.session_state.fasta_entry.name
90
- with open(fasta_path, 'w') as f:
91
- f.write(st.session_state.fasta_entry.getvalue().decode('utf-8'))
92
- transcripts = tiger.load_transcripts([fasta_path], enforce_unique_ids=False)
93
- os.remove(fasta_path)
94
-
95
- # convert to upper case as used by tokenizer
96
- transcripts[tiger.SEQ_COL] = transcripts[tiger.SEQ_COL].apply(lambda s: s.upper().replace('U', 'T'))
97
-
98
- # ensure all transcripts have unique identifiers
99
- if transcripts.index.has_duplicates:
100
- st.session_state.input_error = "Duplicate transcript ID's detected in fasta file"
101
 
102
- # ensure all transcripts only contain nucleotides A, C, G, T, and wildcard N
103
- elif not all(transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))):
104
- st.session_state.input_error = 'Transcript(s) must only contain upper or lower case A, C, G, and Ts or Us'
 
 
 
105
 
106
- # ensure all transcripts satisfy length requirements
107
- elif any(transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN)):
108
- st.session_state.input_error = 'Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN)
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- # run model if we have any transcripts
111
- elif len(transcripts) > 0:
112
- st.session_state.transcripts = transcripts
 
 
 
 
 
113
 
 
 
 
 
 
 
 
 
 
114
 
115
  if __name__ == '__main__':
116
-
117
  # app initialization
118
  if 'mode' not in st.session_state:
119
  st.session_state.mode = tiger.RUN_MODES['all']
@@ -235,5 +299,4 @@ elif selected_model == 'Cas13d':
235
  )
236
  st.session_state.transcripts = None
237
  st.experimental_rerun()
238
- else:
239
- raise ValueError(f"Unknown model: {model_name}")
 
1
  import os
2
  import tiger
3
+ import cas9on
4
+ import cas9off
5
  import pandas as pd
6
  import streamlit as st
7
  from pathlib import Path
8
 
9
  # title and documentation
10
+ st.markdown(Path('crisprTool.md').read_text(), unsafe_allow_html=True)
11
  st.divider()
12
 
13
  CRISPR_MODELS = ['Cas9', 'Cas12', 'Cas13d']
 
15
  selected_model = st.selectbox('Select CRISPR model:', CRISPR_MODELS, key='selected_model')
16
 
17
 
18
+ @st.cache_data
19
+ def convert_df(df):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  # IMPORTANT: Cache the conversion to prevent computation on every rerun
21
  return df.to_csv().encode('utf-8')
22
 
23
 
24
+ def mode_change_callback():
25
+ if st.session_state.mode in {tiger.RUN_MODES['all'], tiger.RUN_MODES['titration']}: # TODO: support titration
26
+ st.session_state.check_off_targets = False
27
+ st.session_state.disable_off_target_checkbox = True
28
+ else:
29
+ st.session_state.disable_off_target_checkbox = False
30
+
31
+
32
+ def progress_update(update_text, percent_complete):
33
+ with progress.container():
34
+ st.write(update_text)
35
+ st.progress(percent_complete / 100)
36
+
37
+
38
+ def initiate_run():
39
+ # initialize state variables
40
+ st.session_state.transcripts = None
41
+ st.session_state.input_error = None
42
+ st.session_state.on_target = None
43
+ st.session_state.titration = None
44
+ st.session_state.off_target = None
45
+
46
+ # initialize transcript DataFrame
47
+ transcripts = pd.DataFrame(columns=[tiger.ID_COL, tiger.SEQ_COL])
48
+
49
+ # manual entry
50
+ if st.session_state.entry_method == ENTRY_METHODS['manual']:
51
+ transcripts = pd.DataFrame({
52
+ tiger.ID_COL: ['ManualEntry'],
53
+ tiger.SEQ_COL: [st.session_state.manual_entry]
54
+ }).set_index(tiger.ID_COL)
55
+
56
+ # fasta file upload
57
+ elif st.session_state.entry_method == ENTRY_METHODS['fasta']:
58
+ if st.session_state.fasta_entry is not None:
59
+ fasta_path = st.session_state.fasta_entry.name
60
+ with open(fasta_path, 'w') as f:
61
+ f.write(st.session_state.fasta_entry.getvalue().decode('utf-8'))
62
+ transcripts = tiger.load_transcripts([fasta_path], enforce_unique_ids=False)
63
+ os.remove(fasta_path)
64
+
65
+ # convert to upper case as used by tokenizer
66
+ transcripts[tiger.SEQ_COL] = transcripts[tiger.SEQ_COL].apply(lambda s: s.upper().replace('U', 'T'))
67
+
68
+ # ensure all transcripts have unique identifiers
69
+ if transcripts.index.has_duplicates:
70
+ st.session_state.input_error = "Duplicate transcript ID's detected in fasta file"
71
 
72
+ # ensure all transcripts only contain nucleotides A, C, G, T, and wildcard N
73
+ elif not all(transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))):
74
+ st.session_state.input_error = 'Transcript(s) must only contain upper or lower case A, C, G, and Ts or Us'
75
 
76
+ # ensure all transcripts satisfy length requirements
77
+ elif any(transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN)):
78
+ st.session_state.input_error = 'Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN)
 
79
 
80
+ # run model if we have any transcripts
81
+ elif len(transcripts) > 0:
82
+ st.session_state.transcripts = transcripts
83
 
84
+ # Check if the selected model is Cas9
85
+ if selected_model == 'Cas9':
86
+ # Use a radio button to select enzymes, making sure only one can be selected at a time
87
+ target_selection = st.radio(
88
+ "Select either on-target or off-target:",
89
+ ('on-target', 'off-target'),
90
+ key='target_selection'
91
+ )
92
 
93
+ # Actions based on the selected enzyme
94
+ if target_selection == 'on-target':
 
 
 
 
95
 
96
+ pass
97
+ elif target_selection == 'off-target':
98
+ ENTRY_METHODS = dict(
99
+ manual='Manual entry of target sequence',
100
+ txt="txt file upload"
101
+ )
102
+ if __name__ == '__main__':
103
+ # app initialization for Cas9 off-target
104
+ if 'target_sequence' not in st.session_state:
105
+ st.session_state.target_sequence = None
106
+ if 'input_error' not in st.session_state:
107
+ st.session_state.input_error = None
108
+ if 'off_target_results' not in st.session_state:
109
+ st.session_state.off_target_results = None
110
 
111
+ # target sequence entry
112
+ st.selectbox(
113
+ label='How would you like to provide target sequences?',
114
+ options=ENTRY_METHODS.values(),
115
+ key='entry_method',
116
+ disabled=st.session_state.target_sequence is not None
117
+ )
118
  if st.session_state.entry_method == ENTRY_METHODS['manual']:
119
+ st.text_input(
120
+ label='Enter on/off sequences:',
121
+ key='manual_entry',
122
+ placeholder='Enter on/off sequences like:GGGTGGGGGGAGTTTGCTCCAGG,AGGTGGGGTGA_TTTGCTCCAGG',
123
+ disabled=st.session_state.target_sequence is not None
124
+ )
125
+ elif st.session_state.entry_method == ENTRY_METHODS['txt']:
126
+ st.file_uploader(
127
+ label='Upload a txt file:',
128
+ key='txt_entry',
129
+ disabled=st.session_state.target_sequence is not None
130
+ )
131
 
132
+ # prediction button
133
+ st.button(label='Predict off-target effects', on_click=cas9off.CRISPR_net_predict,
134
+ disabled=st.session_state.target_sequence is not None)
135
+ progress = st.empty()
 
 
 
 
 
 
 
 
 
 
 
136
 
137
+ # input error display
138
+ error = st.empty()
139
+ if st.session_state.input_error is not None:
140
+ error.error(st.session_state.input_error, icon="🚨")
141
+ else:
142
+ error.empty()
143
 
144
+ # off-target results display
145
+ off_target_results = st.empty()
146
+ if st.session_state.off_target_results is not None:
147
+ with off_target_results.container():
148
+ if len(st.session_state.off_target_results) > 0:
149
+ st.write('Off-target predictions:', st.session_state.off_target_results)
150
+ st.download_button(
151
+ label='Download off-target predictions',
152
+ data=convert_df(st.session_state.off_target_results),
153
+ file_name='off_target_results.csv',
154
+ mime='text/csv'
155
+ )
156
+ else:
157
+ st.write('No significant off-target effects detected!')
158
+ else:
159
+ off_target_results.empty()
160
 
161
+ # running the CRISPR-Net model for off-target predictions
162
+ if st.session_state.target_sequence is not None:
163
+ st.session_state.off_target_results = cas9off.predict_off_targets(
164
+ target_sequence=st.session_state.target_sequence,
165
+ status_update_fn=progress_update
166
+ )
167
+ st.session_state.target_sequence = None
168
+ st.experimental_rerun()
169
 
170
+ elif selected_model == 'Cas12':
171
+ # Placeholder for Cas12 model loading
172
+ # TODO: Implement Cas12 model loading logic
173
+ raise NotImplementedError("Cas12 model loading not implemented yet.")
174
+ elif selected_model == 'Cas13d':
175
+ ENTRY_METHODS = dict(
176
+ manual='Manual entry of single transcript',
177
+ fasta="Fasta file upload (supports multiple transcripts if they have unique ID's)"
178
+ )
179
 
180
  if __name__ == '__main__':
 
181
  # app initialization
182
  if 'mode' not in st.session_state:
183
  st.session_state.mode = tiger.RUN_MODES['all']
 
299
  )
300
  st.session_state.transcripts = None
301
  st.experimental_rerun()
302
+
 
cas9_model/CRISPR_Net_CIRCLE_elevation_SITE_structure.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"class_name": "Model", "config": {"name": "model_1", "layers": [{"name": "main_input", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 1, 24, 7], "dtype": "float32", "sparse": false, "name": "main_input"}, "inbound_nodes": []}, {"name": "conv2d_1", "class_name": "Conv2D", "config": {"name": "conv2d_1", "trainable": true, "filters": 10, "kernel_size": [1, 1], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["main_input", 0, 0, {}]]]}, {"name": "conv2d_2", "class_name": "Conv2D", "config": {"name": "conv2d_2", "trainable": true, "filters": 10, "kernel_size": [1, 2], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["main_input", 0, 0, {}]]]}, {"name": "conv2d_3", "class_name": "Conv2D", "config": {"name": "conv2d_3", "trainable": true, "filters": 10, "kernel_size": [1, 3], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["main_input", 0, 0, {}]]]}, {"name": "conv2d_4", "class_name": "Conv2D", "config": {"name": "conv2d_4", "trainable": true, "filters": 10, "kernel_size": [1, 5], "strides": [1, 1], "padding": "same", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["main_input", 0, 0, {}]]]}, {"name": "activation_1", "class_name": "Activation", "config": {"name": "activation_1", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["conv2d_1", 0, 0, {}]]]}, {"name": "activation_2", "class_name": "Activation", "config": {"name": "activation_2", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["conv2d_2", 0, 0, {}]]]}, {"name": "activation_3", "class_name": "Activation", "config": {"name": "activation_3", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["conv2d_3", 0, 0, {}]]]}, {"name": "activation_4", "class_name": "Activation", "config": {"name": "activation_4", "trainable": true, "activation": "relu"}, "inbound_nodes": [[["conv2d_4", 0, 0, {}]]]}, {"name": "concatenate_1", "class_name": "Concatenate", "config": {"name": "concatenate_1", "trainable": true, "axis": -1}, "inbound_nodes": [[["main_input", 0, 0, {}], ["activation_1", 0, 0, {}], ["activation_2", 0, 0, {}], ["activation_3", 0, 0, {}], ["activation_4", 0, 0, {}]]]}, {"name": "reshape_1", "class_name": "Reshape", "config": {"name": "reshape_1", "trainable": true, "target_shape": [24, 47]}, "inbound_nodes": [[["concatenate_1", 0, 0, {}]]]}, {"name": "bidirectional_1", "class_name": "Bidirectional", "config": {"name": "bidirectional_1", "trainable": true, "layer": {"class_name": "LSTM", "config": {"name": "LSTM_out", "trainable": true, "batch_input_shape": [null, 24, 47], "dtype": "float32", "return_sequences": true, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "units": 15, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 1}}, "merge_mode": "concat"}, "inbound_nodes": [[["reshape_1", 0, 0, {}]]]}, {"name": "flatten_1", "class_name": "Flatten", "config": {"name": "flatten_1", "trainable": true, "data_format": "channels_last"}, "inbound_nodes": [[["bidirectional_1", 0, 0, {}]]]}, {"name": "dense_1", "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 80, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["flatten_1", 0, 0, {}]]]}, {"name": "dense_2", "class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 20, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_1", 0, 0, {}]]]}, {"name": "dropout_1", "class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "rate": 0.35, "noise_shape": null, "seed": null}, "inbound_nodes": [[["dense_2", 0, 0, {}]]]}, {"name": "main_output", "class_name": "Dense", "config": {"name": "main_output", "trainable": true, "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dropout_1", 0, 0, {}]]]}], "input_layers": [["main_input", 0, 0]], "output_layers": [["main_output", 0, 0]]}, "keras_version": "2.2.4", "backend": "tensorflow"}
cas9_model/CRISPR_Net_CIRCLE_elevation_SITE_weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7f6aa381520f5c68fa1f099a6ef3ebc3b8ce846709b97dfde2053f26ca62f80
3
+ size 312432
cas9_model/on-cla.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5acf8f740cf326052ad08db2ca71d7204526c61f6a9fcdca36e15004bc16ad04
3
+ size 34044032
cas9off.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ import pandas as pd
4
+ import os
5
+ import argparse
6
+
7
+ # column names
8
+ ID_COL = 'Transcript ID'
9
+ SEQ_COL = 'Transcript Sequence'
10
+
11
+ # configure GPUs
12
+ for gpu in tf.config.list_physical_devices('GPU'):
13
+ tf.config.experimental.set_memory_growth(gpu, enable=True)
14
+ if len(tf.config.list_physical_devices('GPU')) > 0:
15
+ tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
16
+
17
+ # application configuration
18
+ BATCH_SIZE_COMPUTE = 500
19
+ BATCH_SIZE_SCAN = 20
20
+ BATCH_SIZE_TRANSCRIPTS = 50
21
+ NUM_TOP_GUIDES = 10
22
+ NUM_MISMATCHES = 3
23
+ RUN_MODES = dict(
24
+ all='All on-target guides per transcript',
25
+ top_guides='Top {:d} guides per transcript'.format(NUM_TOP_GUIDES),
26
+ titration='Top {:d} guides per transcript & their titration candidates'.format(NUM_TOP_GUIDES)
27
+ )
28
+
29
+ class Encoder:
30
+ def __init__(self, on_seq, off_seq, with_category = False, label = None, with_reg_val = False, value = None):
31
+ tlen = 24
32
+ self.on_seq = "-" *(tlen-len(on_seq)) + on_seq
33
+ self.off_seq = "-" *(tlen-len(off_seq)) + off_seq
34
+ self.encoded_dict_indel = {'A': [1, 0, 0, 0, 0], 'T': [0, 1, 0, 0, 0],
35
+ 'G': [0, 0, 1, 0, 0], 'C': [0, 0, 0, 1, 0], '_': [0, 0, 0, 0, 1], '-': [0, 0, 0, 0, 0]}
36
+ self.direction_dict = {'A':5, 'G':4, 'C':3, 'T':2, '_':1}
37
+ if with_category:
38
+ self.label = label
39
+ if with_reg_val:
40
+ self.value = value
41
+ self.encode_on_off_dim7()
42
+
43
+ def encode_sgRNA(self):
44
+ code_list = []
45
+ encoded_dict = self.encoded_dict_indel
46
+ sgRNA_bases = list(self.on_seq)
47
+ for i in range(len(sgRNA_bases)):
48
+ if sgRNA_bases[i] == "N":
49
+ sgRNA_bases[i] = list(self.off_seq)[i]
50
+ code_list.append(encoded_dict[sgRNA_bases[i]])
51
+ self.sgRNA_code = np.array(code_list)
52
+
53
+ def encode_off(self):
54
+ code_list = []
55
+ encoded_dict = self.encoded_dict_indel
56
+ off_bases = list(self.off_seq)
57
+ for i in range(len(off_bases)):
58
+ code_list.append(encoded_dict[off_bases[i]])
59
+ self.off_code = np.array(code_list)
60
+
61
+ def encode_on_off_dim7(self):
62
+ self.encode_sgRNA()
63
+ self.encode_off()
64
+ on_bases = list(self.on_seq)
65
+ off_bases = list(self.off_seq)
66
+ on_off_dim7_codes = []
67
+ for i in range(len(on_bases)):
68
+ diff_code = np.bitwise_or(self.sgRNA_code[i], self.off_code[i])
69
+ on_b = on_bases[i]
70
+ off_b = off_bases[i]
71
+ if on_b == "N":
72
+ on_b = off_b
73
+ dir_code = np.zeros(2)
74
+ if on_b == "-" or off_b == "-" or self.direction_dict[on_b] == self.direction_dict[off_b]:
75
+ pass
76
+ else:
77
+ if self.direction_dict[on_b] > self.direction_dict[off_b]:
78
+ dir_code[0] = 1
79
+ else:
80
+ dir_code[1] = 1
81
+ on_off_dim7_codes.append(np.concatenate((diff_code, dir_code)))
82
+ self.on_off_code = np.array(on_off_dim7_codes)
83
+
84
+ def encode_on_off_seq_pairs(input_file):
85
+ inputs = pd.read_csv(input_file, delimiter=",", header=None, names=['on_seq', 'off_seq'])
86
+ input_codes = []
87
+ for idx, row in inputs.iterrows():
88
+ on_seq = row['on_seq']
89
+ off_seq = row['off_seq']
90
+ en = Encoder(on_seq=on_seq, off_seq=off_seq)
91
+ input_codes.append(en.on_off_code)
92
+ input_codes = np.array(input_codes)
93
+ input_codes = input_codes.reshape((len(input_codes), 1, 24, 7))
94
+ y_pred = CRISPR_net_predict(input_codes)
95
+ inputs['CRISPR_Net_score'] = y_pred
96
+ inputs.to_csv("CRISPR_net_results.csv", index=False)
97
+
98
+ def CRISPR_net_predict(X_test):
99
+ json_file = open("cas9_model/CRISPR_Net_CIRCLE_elevation_SITE_structure.json", 'r')
100
+ loaded_model_json = json_file.read()
101
+ json_file.close()
102
+ loaded_model = tf.keras.models.model_from_json(loaded_model_json) # Updated for TensorFlow 2
103
+ loaded_model.load_weights("cas9_model/CRISPR_Net_CIRCLE_elevation_SITE_weights.h5")
104
+ y_pred = loaded_model.predict(X_test).flatten()
105
+ return y_pred
106
+
107
+ if __name__ == '__main__':
108
+ parser = argparse.ArgumentParser(description="CRISPR-Net v1.0 (Aug 10 2019)")
109
+ parser.add_argument("input_file",
110
+ help="input_file example (on-target seq, off-target seq):\n GAGT_CCGAGCAGAAGAAGAATGG,GAGTACCAAGTAGAAGAAAAATTT\n"
111
+ "GTTGCCCCACAGGGCAGTAAAGG,GTGGACACCCCGGGCAGGAAAGG\n"
112
+ "GGGTGGGGGGAGTTTGCTCCAGG,AGGTGGGGTGA_TTTGCTCCAGG")
113
+ args = parser.parse_args()
114
+ file = args.input_file
115
+ if not os.path.exists(args.input_file):
116
+ print("File doesn't exist!")
117
+ else:
118
+ encode_on_off_seq_pairs(file)
119
+ tf.keras.backend.clear_session()
cas9on.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import pandas as pd
3
+ import numpy as np
4
+ from operator import add
5
+ from functools import reduce
6
+
7
+ # configure GPUs
8
+ for gpu in tf.config.list_physical_devices('GPU'):
9
+ tf.config.experimental.set_memory_growth(gpu, enable=True)
10
+ if len(tf.config.list_physical_devices('GPU')) > 0:
11
+ tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
12
+
13
+
14
+ ntmap = {'A': (1, 0, 0, 0),
15
+ 'C': (0, 1, 0, 0),
16
+ 'G': (0, 0, 1, 0),
17
+ 'T': (0, 0, 0, 1)
18
+ }
19
+ epimap = {'A': 1, 'N': 0}
20
+
21
+
22
+ def get_seqcode(seq):
23
+ return np.array(reduce(add, map(lambda c: ntmap[c], seq.upper()))).reshape(
24
+ (1, len(seq), -1))
25
+
26
+
27
+ def get_epicode(eseq):
28
+ return np.array(list(map(lambda c: epimap[c], eseq))).reshape(1, len(eseq), -1)
29
+
30
+ class Episgt:
31
+ def __init__(self, fpath, num_epi_features, with_y=True):
32
+ self._fpath = fpath
33
+ self._ori_df = pd.read_csv(fpath, sep='\t', index_col=None, header=None)
34
+ self._num_epi_features = num_epi_features
35
+ self._with_y = with_y
36
+ self._num_cols = num_epi_features + 2 if with_y else num_epi_features + 1
37
+ self._cols = list(self._ori_df.columns)[-self._num_cols:]
38
+ self._df = self._ori_df[self._cols]
39
+
40
+ @property
41
+ def length(self):
42
+ return len(self._df)
43
+
44
+ def get_dataset(self, x_dtype=np.float32, y_dtype=np.float32):
45
+ x_seq = np.concatenate(list(map(get_seqcode, self._df[self._cols[0]])))
46
+ x_epis = np.concatenate([np.concatenate(list(map(get_epicode, self._df[col]))) for col in
47
+ self._cols[1: 1 + self._num_epi_features]], axis=-1)
48
+ x = np.concatenate([x_seq, x_epis], axis=-1).astype(x_dtype)
49
+ x = x.transpose(0, 2, 1)
50
+ if self._with_y:
51
+ y = np.array(self._df[self._cols[-1]]).astype(y_dtype)
52
+ return x, y
53
+ else:
54
+ return x
55
+
56
+ from keras.models import load_model
57
+
58
+ class DCModelOntar:
59
+ def __init__(self, ontar_model_dir, is_reg=False):
60
+ if is_reg:
61
+ self.model = load_model(ontar_model_dir)
62
+ else:
63
+ self.model = load_model(ontar_model_dir)
64
+
65
+ def ontar_predict(self, x, channel_first=True):
66
+ if channel_first:
67
+ x = x.transpose([0, 2, 3, 1])
68
+ yp = self.model.predict(x)
69
+ return yp.ravel()
70
+
71
+ def predict():
72
+ file_path = 'eg_cls_on_target.episgt'
73
+ input_data = Episgt(file_path, num_epi_features=4, with_y=True)
74
+ x, y = input_data.get_dataset()
75
+ x = np.expand_dims(x, axis=2) # shape(x) = [100, 8, 1, 23]
76
+ dcModel = DCModelOntar('on-cla.h5')
77
+ predicted_on_target = dcModel.ontar_predict(x)
78
+ return predicted_on_target
tiger.md → crisprTool.md RENAMED
File without changes