Spaces:
Sleeping
Sleeping
Commit
·
7274bd3
1
Parent(s):
1d62c05
fixed visual
Browse files
app.py
CHANGED
@@ -136,55 +136,70 @@ if selected_model == 'Cas9':
|
|
136 |
# Now create a Plotly plot with the sorted_predictions
|
137 |
fig = go.Figure()
|
138 |
|
|
|
|
|
|
|
139 |
# Iterate over the sorted predictions to create the plot
|
140 |
for i, prediction in enumerate(sorted_predictions, start=1):
|
141 |
# Extract data for plotting
|
142 |
chrom, start, end, strand, target, gRNA, pred_score = prediction # Adjusted to include the target sequence
|
|
|
|
|
143 |
fig.add_trace(go.Scatter(
|
144 |
x=[start, end],
|
145 |
-
y=[
|
146 |
mode='lines+markers+text',
|
147 |
name=f"gRNA: {gRNA}",
|
148 |
-
text=
|
149 |
hoverinfo='text',
|
150 |
-
hovertext=
|
151 |
-
f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' else '-'}<br>Prediction Score: {pred_score:.4f}",
|
152 |
-
""
|
153 |
-
],
|
154 |
))
|
155 |
|
156 |
# Update the layout of the plot
|
157 |
fig.update_layout(
|
158 |
-
title='
|
159 |
xaxis_title='Genomic Position',
|
160 |
-
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
163 |
)
|
164 |
|
165 |
# Display the plot
|
166 |
st.plotly_chart(fig)
|
167 |
|
168 |
if gene_sequence: # Ensure gene_sequence is not empty
|
|
|
169 |
genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
|
170 |
-
cas9on.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
|
171 |
bed_file_path = f"{gene_symbol}_crispr_targets.bed"
|
172 |
-
|
173 |
-
st.write('Top on-target predictions:')
|
174 |
-
st.dataframe(df)
|
175 |
|
176 |
-
#
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
|
189 |
# # Visualize the GenBank file using pyGenomeViz
|
190 |
# gv = GenomeViz(
|
@@ -336,60 +351,71 @@ elif selected_model == 'Cas12':
|
|
336 |
# Now create a Plotly plot with the sorted_predictions
|
337 |
fig = go.Figure()
|
338 |
|
|
|
|
|
|
|
339 |
# Iterate over the sorted predictions to create the plot
|
340 |
for i, prediction in enumerate(sorted_predictions, start=1):
|
341 |
# Extract data for plotting
|
342 |
-
chrom, start, end, strand,
|
343 |
-
#
|
|
|
344 |
fig.add_trace(go.Scatter(
|
345 |
x=[start, end],
|
346 |
-
y=[
|
347 |
mode='lines+markers+text',
|
348 |
name=f"gRNA: {gRNA}",
|
349 |
-
text=
|
350 |
hoverinfo='text',
|
351 |
-
hovertext=
|
352 |
-
f"Rank: {i}<br>Chromosome: {chrom}<br>Target: {Target}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == 1 else '-'}<br>Prediction Score: {pred_score:.4f}",
|
353 |
-
""
|
354 |
-
],
|
355 |
))
|
|
|
356 |
# Update the layout of the plot
|
357 |
fig.update_layout(
|
358 |
-
title='
|
359 |
xaxis_title='Genomic Position',
|
360 |
-
|
361 |
-
|
362 |
-
|
|
|
|
|
|
|
363 |
)
|
|
|
364 |
# Display the plot
|
365 |
st.plotly_chart(fig)
|
366 |
|
367 |
# Ensure gene_sequence is not empty before generating files
|
368 |
if gene_sequence:
|
|
|
369 |
genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
|
370 |
bed_file_path = f"{gene_symbol}_crispr_targets.bed"
|
|
|
371 |
|
372 |
-
# Generate
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
cas12.generate_bed_file_from_data(df, bed_file_path)
|
377 |
|
378 |
-
|
379 |
-
st.
|
380 |
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
)
|
389 |
-
|
390 |
-
|
391 |
-
st.
|
392 |
-
|
|
|
|
|
|
|
|
|
393 |
|
394 |
# Clean up old files after download buttons are created
|
395 |
clean_up_old_files(gene_symbol)
|
|
|
136 |
# Now create a Plotly plot with the sorted_predictions
|
137 |
fig = go.Figure()
|
138 |
|
139 |
+
# Set y values based on strand information
|
140 |
+
strand_y_values = {'1': 1, '-1': -1}
|
141 |
+
|
142 |
# Iterate over the sorted predictions to create the plot
|
143 |
for i, prediction in enumerate(sorted_predictions, start=1):
|
144 |
# Extract data for plotting
|
145 |
chrom, start, end, strand, target, gRNA, pred_score = prediction # Adjusted to include the target sequence
|
146 |
+
# Assign y value based on strand
|
147 |
+
y_value = strand_y_values[str(strand)] # Convert strand to string for dict lookup
|
148 |
fig.add_trace(go.Scatter(
|
149 |
x=[start, end],
|
150 |
+
y=[y_value] * len(start), # Assign all points the same y value based on strand
|
151 |
mode='lines+markers+text',
|
152 |
name=f"gRNA: {gRNA}",
|
153 |
+
text=f"Rank: {i}", # Place text at the first point
|
154 |
hoverinfo='text',
|
155 |
+
hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' else '-'}<br>Prediction Score: {pred_score:.4f}",
|
|
|
|
|
|
|
156 |
))
|
157 |
|
158 |
# Update the layout of the plot
|
159 |
fig.update_layout(
|
160 |
+
title='CRISPR Targets by Strand',
|
161 |
xaxis_title='Genomic Position',
|
162 |
+
yaxis=dict(
|
163 |
+
title='Strand',
|
164 |
+
tickmode='array',
|
165 |
+
tickvals=[1, -1],
|
166 |
+
ticktext=['+ Strand', '- Strand']
|
167 |
+
)
|
168 |
)
|
169 |
|
170 |
# Display the plot
|
171 |
st.plotly_chart(fig)
|
172 |
|
173 |
if gene_sequence: # Ensure gene_sequence is not empty
|
174 |
+
# Define file paths
|
175 |
genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
|
|
|
176 |
bed_file_path = f"{gene_symbol}_crispr_targets.bed"
|
177 |
+
csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
|
|
|
|
|
178 |
|
179 |
+
# Generate files
|
180 |
+
cas9on.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
|
181 |
+
cas9on.create_bed_file_from_df(df, bed_file_path)
|
182 |
+
cas9on.create_csv_from_df(df, csv_file_path)
|
183 |
+
|
184 |
+
# File download selection
|
185 |
+
file_type = st.selectbox('Select file type to download:', ('GenBank', 'BED', 'CSV'))
|
186 |
+
|
187 |
+
if file_type == 'GenBank':
|
188 |
+
with open(genbank_file_path, "rb") as file:
|
189 |
+
st.download_button(label="Download GenBank File", data=file, file_name=genbank_file_path,
|
190 |
+
mime="text/x-genbank")
|
191 |
+
st.markdown(
|
192 |
+
"GenBank files can be visualized using [PyGenomeViz](https://pygenomeviz.streamlit.app/)")
|
193 |
+
elif file_type == 'BED':
|
194 |
+
with open(bed_file_path, "rb") as file:
|
195 |
+
st.download_button(label="Download BED File", data=file, file_name=bed_file_path,
|
196 |
+
mime="text/plain")
|
197 |
+
st.markdown(
|
198 |
+
"BED files can be used with the [UCSC Genome Browser](https://genome.ucsc.edu/cgi-bin/hgCustom)")
|
199 |
+
elif file_type == 'CSV':
|
200 |
+
with open(csv_file_path, "rb") as file:
|
201 |
+
st.download_button(label="Download CSV File", data=file, file_name=csv_file_path,
|
202 |
+
mime="text/csv")
|
203 |
|
204 |
# # Visualize the GenBank file using pyGenomeViz
|
205 |
# gv = GenomeViz(
|
|
|
351 |
# Now create a Plotly plot with the sorted_predictions
|
352 |
fig = go.Figure()
|
353 |
|
354 |
+
# Set y values based on strand information
|
355 |
+
strand_y_values = {'1': 1, '-1': -1}
|
356 |
+
|
357 |
# Iterate over the sorted predictions to create the plot
|
358 |
for i, prediction in enumerate(sorted_predictions, start=1):
|
359 |
# Extract data for plotting
|
360 |
+
chrom, start, end, strand, target, gRNA, pred_score = prediction # Adjusted to include the target sequence
|
361 |
+
# Assign y value based on strand
|
362 |
+
y_value = strand_y_values[str(strand)] # Convert strand to string for dict lookup
|
363 |
fig.add_trace(go.Scatter(
|
364 |
x=[start, end],
|
365 |
+
y=[y_value] * len(start), # Assign all points the same y value based on strand
|
366 |
mode='lines+markers+text',
|
367 |
name=f"gRNA: {gRNA}",
|
368 |
+
text=f"Rank: {i}", # Place text at the first point
|
369 |
hoverinfo='text',
|
370 |
+
hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' else '-'}<br>Prediction Score: {pred_score:.4f}",
|
|
|
|
|
|
|
371 |
))
|
372 |
+
|
373 |
# Update the layout of the plot
|
374 |
fig.update_layout(
|
375 |
+
title='CRISPR Targets by Strand',
|
376 |
xaxis_title='Genomic Position',
|
377 |
+
yaxis=dict(
|
378 |
+
title='Strand',
|
379 |
+
tickmode='array',
|
380 |
+
tickvals=[1, -1],
|
381 |
+
ticktext=['+ Strand', '- Strand']
|
382 |
+
)
|
383 |
)
|
384 |
+
|
385 |
# Display the plot
|
386 |
st.plotly_chart(fig)
|
387 |
|
388 |
# Ensure gene_sequence is not empty before generating files
|
389 |
if gene_sequence:
|
390 |
+
# Define file paths
|
391 |
genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
|
392 |
bed_file_path = f"{gene_symbol}_crispr_targets.bed"
|
393 |
+
csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
|
394 |
|
395 |
+
# Generate files
|
396 |
+
cas9on.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
|
397 |
+
cas9on.create_bed_file_from_df(df, bed_file_path)
|
398 |
+
cas9on.create_csv_from_df(df, csv_file_path)
|
|
|
399 |
|
400 |
+
# File download selection
|
401 |
+
file_type = st.selectbox('Select file type to download:', ('GenBank', 'BED', 'CSV'))
|
402 |
|
403 |
+
if file_type == 'GenBank':
|
404 |
+
with open(genbank_file_path, "rb") as file:
|
405 |
+
st.download_button(label="Download GenBank File", data=file, file_name=genbank_file_path,
|
406 |
+
mime="text/x-genbank")
|
407 |
+
st.markdown(
|
408 |
+
"GenBank files can be visualized using [PyGenomeViz](https://pygenomeviz.streamlit.app/)")
|
409 |
+
elif file_type == 'BED':
|
410 |
+
with open(bed_file_path, "rb") as file:
|
411 |
+
st.download_button(label="Download BED File", data=file, file_name=bed_file_path,
|
412 |
+
mime="text/plain")
|
413 |
+
st.markdown(
|
414 |
+
"BED files can be used with the [UCSC Genome Browser](https://genome.ucsc.edu/cgi-bin/hgCustom)")
|
415 |
+
elif file_type == 'CSV':
|
416 |
+
with open(csv_file_path, "rb") as file:
|
417 |
+
st.download_button(label="Download CSV File", data=file, file_name=csv_file_path,
|
418 |
+
mime="text/csv")
|
419 |
|
420 |
# Clean up old files after download buttons are created
|
421 |
clean_up_old_files(gene_symbol)
|
cas12.py
CHANGED
@@ -175,6 +175,8 @@ def generate_genbank_file_from_data(formatted_data, gene_sequence, gene_symbol,
|
|
175 |
record.annotations["molecule_type"] = "DNA"
|
176 |
SeqIO.write(record, output_path, "genbank")
|
177 |
|
|
|
|
|
178 |
|
179 |
def generate_bed_file_from_data(formatted_data, output_path):
|
180 |
with open(output_path, 'w') as bed_file:
|
|
|
175 |
record.annotations["molecule_type"] = "DNA"
|
176 |
SeqIO.write(record, output_path, "genbank")
|
177 |
|
178 |
+
def create_csv_from_df(df, output_path):
|
179 |
+
df.to_csv(output_path, index=False)
|
180 |
|
181 |
def generate_bed_file_from_data(formatted_data, output_path):
|
182 |
with open(output_path, 'w') as bed_file:
|
cas9on.py
CHANGED
@@ -170,4 +170,7 @@ def create_bed_file_from_df(df, output_path):
|
|
170 |
strand = '+' if row["Strand"] == '+' else '-'
|
171 |
gRNA = row["gRNA"]
|
172 |
score = str(row["Prediction"]) # Ensure score is converted to string if not already
|
173 |
-
bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\n")
|
|
|
|
|
|
|
|
170 |
strand = '+' if row["Strand"] == '+' else '-'
|
171 |
gRNA = row["gRNA"]
|
172 |
score = str(row["Prediction"]) # Ensure score is converted to string if not already
|
173 |
+
bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\n")
|
174 |
+
|
175 |
+
def create_csv_from_df(df, output_path):
|
176 |
+
df.to_csv(output_path, index=False)
|