Spaces:

NiniCat
/

CRISPRTool

Running

App Files Files Community

supercat666 commited on Feb 21, 2024

Commit

7274bd3

1 Parent(s): 1d62c05

fixed visual

Browse files

Files changed (3) hide show

app.py +83 -57
cas12.py +2 -0
cas9on.py +4 -1

app.py CHANGED Viewed

@@ -136,55 +136,70 @@ if selected_model == 'Cas9':
                 # Now create a Plotly plot with the sorted_predictions
                 fig = go.Figure()
                 # Iterate over the sorted predictions to create the plot
                 for i, prediction in enumerate(sorted_predictions, start=1):
                     # Extract data for plotting
                     chrom, start, end, strand, target, gRNA, pred_score = prediction  # Adjusted to include the target sequence
                     fig.add_trace(go.Scatter(
                         x=[start, end],
-                        y=[i, i],  # Y-values are just the rank of the prediction
                         mode='lines+markers+text',
                         name=f"gRNA: {gRNA}",
-                        text=[f"Rank: {i}", ""],  # Text at the start position only
                         hoverinfo='text',
-                        hovertext=[
-                            f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' else '-'}<br>Prediction Score: {pred_score:.4f}",
-                            ""
-                        ],
                     ))
                 # Update the layout of the plot
                 fig.update_layout(
-                    title='Top 10 gRNA Sequences by Prediction Score',
                     xaxis_title='Genomic Position',
-                    yaxis_title='Rank',
-                    yaxis=dict(showticklabels=False)
-                    # Hide the y-axis labels since the rank is indicated in the hovertext
                 )
                 # Display the plot
                 st.plotly_chart(fig)
                 if gene_sequence:  # Ensure gene_sequence is not empty
                     genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
-                    cas9on.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
                     bed_file_path = f"{gene_symbol}_crispr_targets.bed"
-                    cas9on.create_bed_file_from_df(df, bed_file_path)
-                    st.write('Top on-target predictions:')
-                    st.dataframe(df)
-                    # Add a download button for the GenBank file
-                    with open(genbank_file_path, "rb") as file:
-                        st.download_button(
-                            label="Download GenBank File",
-                            data=file,
-                            file_name=genbank_file_path,
-                            mime="text/x-genbank"
-                        )
-                    # Download button for the BED file
-                    with open(bed_file_path, "rb") as file:
-                        st.download_button(label="Download BED File", data=file,
-                            file_name=bed_file_path, mime="text/plain")
                     # # Visualize the GenBank file using pyGenomeViz
                     # gv = GenomeViz(
@@ -336,60 +351,71 @@ elif selected_model == 'Cas12':
             # Now create a Plotly plot with the sorted_predictions
             fig = go.Figure()
             # Iterate over the sorted predictions to create the plot
             for i, prediction in enumerate(sorted_predictions, start=1):
                 # Extract data for plotting
-                chrom, start, end, strand, Target, gRNA, pred_score = prediction
-                # Strand is not used in this plot, but you could use it to determine marker symbol, for example
                 fig.add_trace(go.Scatter(
                     x=[start, end],
-                    y=[i, i],  # Y-values are just the rank of the prediction
                     mode='lines+markers+text',
                     name=f"gRNA: {gRNA}",
-                    text=[f"Rank: {i}", ""],  # Text at the start position only
                     hoverinfo='text',
-                    hovertext=[
-                        f"Rank: {i}<br>Chromosome: {chrom}<br>Target: {Target}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == 1 else '-'}<br>Prediction Score: {pred_score:.4f}",
-                        ""
-                    ],
                 ))
             # Update the layout of the plot
             fig.update_layout(
-                title='Top 10 gRNA Sequences by Prediction Score',
                 xaxis_title='Genomic Position',
-                yaxis_title='Rank',
-                yaxis=dict(showticklabels=False)
-                # We hide the y-axis labels since the rank is indicated in the hovertext
             )
             # Display the plot
             st.plotly_chart(fig)
             # Ensure gene_sequence is not empty before generating files
             if gene_sequence:
                 genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
                 bed_file_path = f"{gene_symbol}_crispr_targets.bed"
-                # Generate GenBank file
-                cas12.generate_genbank_file_from_data(df, gene_sequence, gene_symbol, genbank_file_path)
-                # Generate BED file
-                cas12.generate_bed_file_from_data(df, bed_file_path)
-                st.write('Top on-target predictions:')
-                st.dataframe(df)
-                # Download buttons
-                with open(genbank_file_path, "rb") as file:
-                    st.download_button(
-                        label="Download GenBank File",
-                        data=file,
-                        file_name=genbank_file_path,
-                        mime="text/x-genbank"
-                    )
-                with open(bed_file_path, "rb") as file:
-                    st.download_button(label="Download BED File", data=file,
-                                       file_name=bed_file_path, mime="text/plain")
                 # Clean up old files after download buttons are created
                 clean_up_old_files(gene_symbol)

                 # Now create a Plotly plot with the sorted_predictions
                 fig = go.Figure()
+                # Set y values based on strand information
+                strand_y_values = {'1': 1, '-1': -1}
                 # Iterate over the sorted predictions to create the plot
                 for i, prediction in enumerate(sorted_predictions, start=1):
                     # Extract data for plotting
                     chrom, start, end, strand, target, gRNA, pred_score = prediction  # Adjusted to include the target sequence
+                    # Assign y value based on strand
+                    y_value = strand_y_values[str(strand)]  # Convert strand to string for dict lookup
                     fig.add_trace(go.Scatter(
                         x=[start, end],
+                        y=[y_value] * len(start),  # Assign all points the same y value based on strand
                         mode='lines+markers+text',
                         name=f"gRNA: {gRNA}",
+                        text=f"Rank: {i}",  # Place text at the first point
                         hoverinfo='text',
+                        hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' else '-'}<br>Prediction Score: {pred_score:.4f}",
                     ))
                 # Update the layout of the plot
                 fig.update_layout(
+                    title='CRISPR Targets by Strand',
                     xaxis_title='Genomic Position',
+                    yaxis=dict(
+                        title='Strand',
+                        tickmode='array',
+                        tickvals=[1, -1],
+                        ticktext=['+ Strand', '- Strand']
+                    )
                 )
                 # Display the plot
                 st.plotly_chart(fig)
                 if gene_sequence:  # Ensure gene_sequence is not empty
+                    # Define file paths
                     genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
                     bed_file_path = f"{gene_symbol}_crispr_targets.bed"
+                    csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
+                    # Generate files
+                    cas9on.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
+                    cas9on.create_bed_file_from_df(df, bed_file_path)
+                    cas9on.create_csv_from_df(df, csv_file_path)
+                    # File download selection
+                    file_type = st.selectbox('Select file type to download:', ('GenBank', 'BED', 'CSV'))
+                    if file_type == 'GenBank':
+                        with open(genbank_file_path, "rb") as file:
+                            st.download_button(label="Download GenBank File", data=file, file_name=genbank_file_path,
+                                               mime="text/x-genbank")
+                        st.markdown(
+                            "GenBank files can be visualized using [PyGenomeViz](https://pygenomeviz.streamlit.app/)")
+                    elif file_type == 'BED':
+                        with open(bed_file_path, "rb") as file:
+                            st.download_button(label="Download BED File", data=file, file_name=bed_file_path,
+                                               mime="text/plain")
+                        st.markdown(
+                            "BED files can be used with the [UCSC Genome Browser](https://genome.ucsc.edu/cgi-bin/hgCustom)")
+                    elif file_type == 'CSV':
+                        with open(csv_file_path, "rb") as file:
+                            st.download_button(label="Download CSV File", data=file, file_name=csv_file_path,
+                                               mime="text/csv")
                     # # Visualize the GenBank file using pyGenomeViz
                     # gv = GenomeViz(
             # Now create a Plotly plot with the sorted_predictions
             fig = go.Figure()
+            # Set y values based on strand information
+            strand_y_values = {'1': 1, '-1': -1}
             # Iterate over the sorted predictions to create the plot
             for i, prediction in enumerate(sorted_predictions, start=1):
                 # Extract data for plotting
+                chrom, start, end, strand, target, gRNA, pred_score = prediction  # Adjusted to include the target sequence
+                # Assign y value based on strand
+                y_value = strand_y_values[str(strand)]  # Convert strand to string for dict lookup
                 fig.add_trace(go.Scatter(
                     x=[start, end],
+                    y=[y_value] * len(start),  # Assign all points the same y value based on strand
                     mode='lines+markers+text',
                     name=f"gRNA: {gRNA}",
+                    text=f"Rank: {i}",  # Place text at the first point
                     hoverinfo='text',
+                    hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' else '-'}<br>Prediction Score: {pred_score:.4f}",
                 ))
             # Update the layout of the plot
             fig.update_layout(
+                title='CRISPR Targets by Strand',
                 xaxis_title='Genomic Position',
+                yaxis=dict(
+                    title='Strand',
+                    tickmode='array',
+                    tickvals=[1, -1],
+                    ticktext=['+ Strand', '- Strand']
+                )
             )
             # Display the plot
             st.plotly_chart(fig)
             # Ensure gene_sequence is not empty before generating files
             if gene_sequence:
+                # Define file paths
                 genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
                 bed_file_path = f"{gene_symbol}_crispr_targets.bed"
+                csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
+                # Generate files
+                cas9on.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
+                cas9on.create_bed_file_from_df(df, bed_file_path)
+                cas9on.create_csv_from_df(df, csv_file_path)
+                # File download selection
+                file_type = st.selectbox('Select file type to download:', ('GenBank', 'BED', 'CSV'))
+                if file_type == 'GenBank':
+                    with open(genbank_file_path, "rb") as file:
+                        st.download_button(label="Download GenBank File", data=file, file_name=genbank_file_path,
+                                           mime="text/x-genbank")
+                    st.markdown(
+                        "GenBank files can be visualized using [PyGenomeViz](https://pygenomeviz.streamlit.app/)")
+                elif file_type == 'BED':
+                    with open(bed_file_path, "rb") as file:
+                        st.download_button(label="Download BED File", data=file, file_name=bed_file_path,
+                                           mime="text/plain")
+                    st.markdown(
+                        "BED files can be used with the [UCSC Genome Browser](https://genome.ucsc.edu/cgi-bin/hgCustom)")
+                elif file_type == 'CSV':
+                    with open(csv_file_path, "rb") as file:
+                        st.download_button(label="Download CSV File", data=file, file_name=csv_file_path,
+                                           mime="text/csv")
                 # Clean up old files after download buttons are created
                 clean_up_old_files(gene_symbol)

cas12.py CHANGED Viewed

@@ -175,6 +175,8 @@ def generate_genbank_file_from_data(formatted_data, gene_sequence, gene_symbol,
     record.annotations["molecule_type"] = "DNA"
     SeqIO.write(record, output_path, "genbank")
 def generate_bed_file_from_data(formatted_data, output_path):
     with open(output_path, 'w') as bed_file:

     record.annotations["molecule_type"] = "DNA"
     SeqIO.write(record, output_path, "genbank")
+def create_csv_from_df(df, output_path):
+    df.to_csv(output_path, index=False)
 def generate_bed_file_from_data(formatted_data, output_path):
     with open(output_path, 'w') as bed_file:

cas9on.py CHANGED Viewed

@@ -170,4 +170,7 @@ def create_bed_file_from_df(df, output_path):
             strand = '+' if row["Strand"] == '+' else '-'
             gRNA = row["gRNA"]
             score = str(row["Prediction"])  # Ensure score is converted to string if not already
-            bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\n")

             strand = '+' if row["Strand"] == '+' else '-'
             gRNA = row["gRNA"]
             score = str(row["Prediction"])  # Ensure score is converted to string if not already
+            bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\n")
+def create_csv_from_df(df, output_path):
+    df.to_csv(output_path, index=False)