Spaces:

NiniCat
/

CRISPRTool

Sleeping

App Files Files Community

supercat666 commited on May 14

Commit

16e89c0

•

1 Parent(s): 9ad0b46

fix app.py

Browse files

Files changed (1) hide show

app.py +90 -65

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import cas9att
 import cas9attvcf
 import cas9off
 import cas12
 import pandas as pd
 import streamlit as st
 import plotly.graph_objs as go
@@ -184,10 +185,7 @@ if selected_model == 'Cas9':
         if predict_button and gene_symbol:
             model_choice = st.radio("mutation or not:", ('normal', 'mutation'))
             with st.spinner('Predicting... Please wait'):
-                if model_choice == 'cas9attvcf':
-                    predictions, gene_sequence, exons = cas9attvcf.process_gene(gene_symbol, cas9att_path)
-                else:
-                    predictions, gene_sequence, exons = cas9att.process_gene(gene_symbol, cas9att_path)
                 sorted_predictions = sorted(predictions)[:10]
                 st.session_state['on_target_results'] = sorted_predictions
@@ -437,83 +435,98 @@ elif selected_model == 'Cas12':
     # Process predictions
     if predict_button and gene_symbol:
-        # Update the current gene symbol
-        st.session_state['current_gene_symbol'] = gene_symbol
-        # Run the prediction process
         with st.spinner('Predicting... Please wait'):
-            predictions, gene_sequence, exons = cas12.process_gene(gene_symbol,cas12_path)
-            sorted_predictions = sorted(predictions, key=lambda x: x[-1], reverse=True)[:10]
             st.session_state['on_target_results'] = sorted_predictions
             st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
             st.session_state['exons'] = exons  # Store exon data
         st.success('Prediction completed!')
-        # Visualization and file generation
         if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
-            df = pd.DataFrame(st.session_state['on_target_results'],
-                              columns=["Chr", "Start Pos", "End Pos", "Strand", "Transcript", "Exon", "Target", "gRNA", "Prediction"])
-            st.dataframe(df)
-            # Now create a Plotly plot with the sorted_predictions
             fig = go.Figure()
-            # Initialize the y position for the positive and negative strands
-            positive_strand_y = 0.1
-            negative_strand_y = -0.1
-            # Use an offset to spread gRNA sequences vertically
-            offset = 0.05
-            # Iterate over the sorted predictions to create the plot
-            for i, prediction in enumerate(sorted_predictions, start=1):
-                # Extract data for plotting and convert start and end to integers
-                chrom, start, end, strand, target, gRNA, pred_score = prediction
-                start, end = int(start), int(end)
-                midpoint = (start + end) / 2
-                # Set the y-value and arrow symbol based on the strand
-                if strand == '1':
-                    y_value = positive_strand_y
-                    arrow_symbol = 'triangle-right'
-                    # Increment the y-value for the next positive strand gRNA
-                    positive_strand_y += offset
-                else:
-                    y_value = negative_strand_y
-                    arrow_symbol = 'triangle-left'
-                    # Decrement the y-value for the next negative strand gRNA
-                    negative_strand_y -= offset
                 fig.add_trace(go.Scatter(
                     x=[midpoint],
-                    y=[y_value],  # Use the y_value set above for the strand
                     mode='markers+text',
-                    marker=dict(symbol=arrow_symbol, size=10),
-                    name=f"gRNA: {gRNA}",
-                    text=f"Rank: {i}",  # Place text at the marker
                     hoverinfo='text',
-                    hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == 1 else '-'}<br>Prediction Score: {pred_score:.4f}",
                 ))
-            # Update the layout of the plot
             fig.update_layout(
-                title='Top 10 gRNA Sequences by Prediction Score',
                 xaxis_title='Genomic Position',
-                yaxis=dict(
-                    title='Strand',
-                    showgrid=True,  # Show horizontal gridlines for clarity
-                    zeroline=True,  # Show a line at y=0 to represent the axis
-                    zerolinecolor='Black',
-                    zerolinewidth=2,
-                    tickvals=[positive_strand_y, negative_strand_y],
-                    ticktext=['+ Strand', '- Strand']
-                ),
-                showlegend=False  # Hide the legend if it's not necessary
             )
             # Display the plot
             st.plotly_chart(fig)
-            # Ensure gene_sequence is not empty before generating files
             if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
                 gene_symbol = st.session_state['current_gene_symbol']
                 gene_sequence = st.session_state['gene_sequence']
@@ -522,26 +535,38 @@ elif selected_model == 'Cas12':
                 genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
                 bed_file_path = f"{gene_symbol}_crispr_targets.bed"
                 csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
                 # Generate files
-                cas12.generate_genbank_file_from_data(df, gene_sequence, gene_symbol, genbank_file_path)
-                cas12.generate_bed_file_from_data(df, bed_file_path)
-                cas12.create_csv_from_df(df, csv_file_path)
                 # Prepare an in-memory buffer for the ZIP file
                 zip_buffer = io.BytesIO()
                 with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
                     # For each file, add it to the ZIP file
-                    zip_file.write(genbank_file_path, arcname=genbank_file_path.split('/')[-1])
-                    zip_file.write(bed_file_path, arcname=bed_file_path.split('/')[-1])
-                    zip_file.write(csv_file_path, arcname=csv_file_path.split('/')[-1])
                 # Important: move the cursor to the beginning of the BytesIO buffer before reading it
                 zip_buffer.seek(0)
                 # Display the download button for the ZIP file
                 st.download_button(
-                    label="Download genbank,.bed,csv files as ZIP",
                     data=zip_buffer.getvalue(),
                     file_name=f"{gene_symbol}_files.zip",
                     mime="application/zip"

 import cas9attvcf
 import cas9off
 import cas12
+import cas12lstm
 import pandas as pd
 import streamlit as st
 import plotly.graph_objs as go
         if predict_button and gene_symbol:
             model_choice = st.radio("mutation or not:", ('normal', 'mutation'))
             with st.spinner('Predicting... Please wait'):
+                predictions, gene_sequence, exons = cas9att.process_gene(gene_symbol, cas9att_path)
                 sorted_predictions = sorted(predictions)[:10]
                 st.session_state['on_target_results'] = sorted_predictions
     # Process predictions
     if predict_button and gene_symbol:
         with st.spinner('Predicting... Please wait'):
+            predictions, gene_sequence, exons = cas12lstm.process_gene(gene_symbol, cas9att_path)
+            sorted_predictions = sorted(predictions)[:10]
             st.session_state['on_target_results'] = sorted_predictions
             st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
             st.session_state['exons'] = exons  # Store exon data
+        # Notify the user once the process is completed successfully.
         st.success('Prediction completed!')
+        st.session_state['prediction_made'] = True
         if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
+            ensembl_id = gene_annotations.get(gene_symbol, 'Unknown')  # Get Ensembl ID or default to 'Unknown'
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                st.markdown("**Genome**")
+                st.markdown("Homo sapiens")
+            with col2:
+                st.markdown("**Gene**")
+                st.markdown(f"{gene_symbol} : {ensembl_id} (primary)")
+            with col3:
+                st.markdown("**Nuclease**")
+                st.markdown("SpCas9")
+            # Include "Target" in the DataFrame's columns
+            try:
+                df = pd.DataFrame(st.session_state['on_target_results'],
+                                  columns=["Chr", "Start Pos", "End Pos", "Strand", "Transcript", "Exon", "Target",
+                                           "gRNA", "Prediction"])
+                st.dataframe(df)
+            except ValueError as e:
+                st.error(f"DataFrame creation error: {e}")
+                # Optionally print or log the problematic data for debugging:
+                print(st.session_state['on_target_results'])
+            # Initialize Plotly figure
             fig = go.Figure()
+            EXON_BASE = 0  # Base position for exons and CDS on the Y axis
+            EXON_HEIGHT = 0.02  # How 'tall' the exon markers should appear
+            # Plot Exons as small markers on the X-axis
+            for exon in st.session_state['exons']:
+                exon_start, exon_end = exon['start'], exon['end']
+                fig.add_trace(go.Bar(
+                    x=[(exon_start + exon_end) / 2],
+                    y=[EXON_HEIGHT],
+                    width=[exon_end - exon_start],
+                    base=EXON_BASE,
+                    marker_color='rgba(128, 0, 128, 0.5)',
+                    name='Exon'
+                ))
+            VERTICAL_GAP = 0.2  # Gap between different ranks
+            # Define max and min Y values based on strand and rank
+            MAX_STRAND_Y = 0.1  # Maximum Y value for positive strand results
+            MIN_STRAND_Y = -0.1  # Minimum Y value for negative strand results
+            # Iterate over top 5 sorted predictions to create the plot
+            for i, prediction in enumerate(st.session_state['on_target_results'][:5], start=1):  # Only top 5
+                chrom, start, end, strand, transcript, exon, target, gRNA, prediction_score = prediction
+                midpoint = (int(start) + int(end)) / 2
+                # Vertical position based on rank, modified by strand
+                y_value = (MAX_STRAND_Y - (i - 1) * VERTICAL_GAP) if strand == '1' or strand == '+' else (
+                        MIN_STRAND_Y + (i - 1) * VERTICAL_GAP)
                 fig.add_trace(go.Scatter(
                     x=[midpoint],
+                    y=[y_value],
                     mode='markers+text',
+                    marker=dict(symbol='triangle-up' if strand == '1' or strand == '+' else 'triangle-down',
+                                size=12),
+                    text=f"Rank: {i}",  # Text label
                     hoverinfo='text',
+                    hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' or strand == '+' else '-'}<br>Transcript: {transcript}<br>Prediction: {prediction_score:.4f}",
                 ))
+            # Update layout for clarity and interaction
             fig.update_layout(
+                title='Top 5 gRNA Sequences by Prediction Score',
                 xaxis_title='Genomic Position',
+                yaxis_title='Strand',
+                yaxis=dict(tickvals=[MAX_STRAND_Y, MIN_STRAND_Y], ticktext=['+', '-']),
+                showlegend=False,
+                hovermode='x unified',
             )
             # Display the plot
             st.plotly_chart(fig)
             if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
                 gene_symbol = st.session_state['current_gene_symbol']
                 gene_sequence = st.session_state['gene_sequence']
                 genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
                 bed_file_path = f"{gene_symbol}_crispr_targets.bed"
                 csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
+                plot_image_path = f"{gene_symbol}_gtracks_plot.png"
                 # Generate files
+                cas12lstm.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
+                cas12lstm.create_bed_file_from_df(df, bed_file_path)
+                cas12lstm.create_csv_from_df(df, csv_file_path)
                 # Prepare an in-memory buffer for the ZIP file
                 zip_buffer = io.BytesIO()
                 with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
                     # For each file, add it to the ZIP file
+                    zip_file.write(genbank_file_path)
+                    zip_file.write(bed_file_path)
+                    zip_file.write(csv_file_path)
                 # Important: move the cursor to the beginning of the BytesIO buffer before reading it
                 zip_buffer.seek(0)
+                # Specify the region you want to visualize
+                min_start = df['Start Pos'].min()
+                max_end = df['End Pos'].max()
+                chromosome = df['Chr'].mode()[0]  # Assumes most common chromosome is the target
+                region = f"{chromosome}:{min_start}-{max_end}"
+                # Generate the pyGenomeTracks plot
+                gtracks_command = f"gtracks {region} {bed_file_path} {plot_image_path}"
+                subprocess.run(gtracks_command, shell=True)
+                st.image(plot_image_path)
                 # Display the download button for the ZIP file
                 st.download_button(
+                    label="Download GenBank, BED, CSV files as ZIP",
                     data=zip_buffer.getvalue(),
                     file_name=f"{gene_symbol}_files.zip",
                     mime="application/zip"