Spaces:

NiniCat
/

CRISPRTool

Sleeping

App Files Files Community

supercat666 commited on Apr 1

Commit

d51aeae

•

1 Parent(s): 114492c

fix

Browse files

Files changed (2) hide show

app.py +16 -21
cas9on.py +56 -93

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from pathlib import Path
 import zipfile
 import io
 import gtracks
@@ -275,34 +276,29 @@ if selected_model == 'Cas9':
                     gene_sequence = st.session_state['gene_sequence']
                     # Define file paths
-                    # genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
-                    # bed_file_path = f"{gene_symbol}_crispr_targets.bed"
-                    # csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
-                    bigwig_file_path = f"{gene_symbol}_crispr_predictions.bw"
-                    # Generate files
-                    # cas9on.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
-                    # cas9on.create_bed_file_from_df(df, bed_file_path)
-                    # cas9on.create_csv_from_df(df, csv_file_path)
-                    # Assuming create_bigwig is a function that generates a BigWig file from the DataFrame
-                    cas9on.create_bigwig(df, bigwig_file_path)
                     # Prepare an in-memory buffer for the ZIP file
                     zip_buffer = io.BytesIO()
                     with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
                         # For each file, add it to the ZIP file
-                        # zip_file.write(genbank_file_path)
-                        # zip_file.write(bed_file_path)
-                        # zip_file.write(csv_file_path)
-                        zip_file.write(bigwig_file_path)
                     # Important: move the cursor to the beginning of the BytesIO buffer before reading it
                     zip_buffer.seek(0)
-                    track = gtracks.Track(bigwig_file_path)
-                    plot = gtracks.Plot(tracks=[track])
                     # Specify the region you want to visualize
                     min_start = df['Start Pos'].min()
                     max_end = df['End Pos'].max()
@@ -310,14 +306,13 @@ if selected_model == 'Cas9':
                     region = f"{chromosome}:{min_start}-{max_end}"
                     # Generate the pyGenomeTracks plot
-                    plot_image_path = f"{gene_symbol}_gtracks_plot.png"
-                    plot.plot(region=region, output_file=plot_image_path)
-                    # Display the pyGenomeTracks plot image in Streamlit
                     st.image(plot_image_path)
                     # Display the download button for the ZIP file
                     st.download_button(
-                        label="Download GenBank, BED, CSV, and BigWig files as ZIP",
                         data=zip_buffer.getvalue(),
                         file_name=f"{gene_symbol}_files.zip",
                         mime="application/zip"

 import zipfile
 import io
 import gtracks
+import subprocess
                     gene_sequence = st.session_state['gene_sequence']
                     # Define file paths
+                    genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
+                    bed_file_path = f"{gene_symbol}_crispr_targets.bed"
+                    csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
+                    plot_image_path = f"{gene_symbol}_gtracks_plot.png"
+                    # Generate files
+                    cas9on.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
+                    cas9on.create_bed_file_from_df(df, bed_file_path)
+                    cas9on.create_csv_from_df(df, csv_file_path)
                     # Prepare an in-memory buffer for the ZIP file
                     zip_buffer = io.BytesIO()
                     with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
                         # For each file, add it to the ZIP file
+                        zip_file.write(genbank_file_path)
+                        zip_file.write(bed_file_path)
+                        zip_file.write(csv_file_path)
                     # Important: move the cursor to the beginning of the BytesIO buffer before reading it
                     zip_buffer.seek(0)
                     # Specify the region you want to visualize
                     min_start = df['Start Pos'].min()
                     max_end = df['End Pos'].max()
                     region = f"{chromosome}:{min_start}-{max_end}"
                     # Generate the pyGenomeTracks plot
+                    gtracks_command = f"gtracks {region} {bed_file_path} {plot_image_path}"
+                    subprocess.run(gtracks_command, shell=True)
                     st.image(plot_image_path)
                     # Display the download button for the ZIP file
                     st.download_button(
+                        label="Download GenBank, BED, CSV files as ZIP",
                         data=zip_buffer.getvalue(),
                         file_name=f"{gene_symbol}_files.zip",
                         mime="application/zip"

cas9on.py CHANGED Viewed

@@ -147,100 +147,63 @@ def process_gene(gene_symbol, model_path):
     return results, all_gene_sequences, all_exons
-# def create_genbank_features(data):
-#     features = []
-#
-#     # If the input data is a DataFrame, convert it to a list of lists
-#     if isinstance(data, pd.DataFrame):
-#         formatted_data = data.values.tolist()
-#     elif isinstance(data, list):
-#         formatted_data = data
-#     else:
-#         raise TypeError("Data should be either a list or a pandas DataFrame.")
-#
-#     for row in formatted_data:
-#         try:
-#             start = int(row[1])
-#             end = int(row[2])
-#         except ValueError as e:
-#             print(f"Error converting start/end to int: {row[1]}, {row[2]} - {e}")
-#             continue
-#
-#         strand = 1 if row[3] == '+' else -1
-#         location = FeatureLocation(start=start, end=end, strand=strand)
-#         feature = SeqFeature(location=location, type="misc_feature", qualifiers={
-#             'label': row[7],  # Use gRNA as the label
-#             'note': f"Prediction: {row[8]}"  # Include the prediction score
-#         })
-#         features.append(feature)
-#
-#     return features
-#
-#
-# def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
-#     features = create_genbank_features(df)
-#     record = SeqRecord(Seq(gene_sequence), id=gene_symbol, name=gene_symbol,
-#                        description=f'CRISPR Cas9 predicted targets for {gene_symbol}', features=features)
-#     record.annotations["molecule_type"] = "DNA"
-#     SeqIO.write(record, output_path, "genbank")
-#
-#
-# def create_bed_file_from_df(df, output_path):
-#     with open(output_path, 'w') as bed_file:
-#         for index, row in df.iterrows():
-#             chrom = row["Chr"]
-#             start = int(row["Start Pos"])  # Assuming 'Start Pos' is the column name in the df
-#             end = int(row["End Pos"])  # Assuming 'End Pos' is the column name in the df
-#             strand = '+' if row["Strand"] == '1' else '-'  # Assuming 'Strand' is the column name in the df
-#             gRNA = row["gRNA"]
-#             score = str(row["Prediction"])
-#             transcript_id = row["Transcript"]  # Assuming 'Transcript' is the column name in the df
-#
-#             bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\t{transcript_id}\n")
-#
-#
-# def create_csv_from_df(df, output_path):
-#     df.to_csv(output_path, index=False)
-def create_bigwig(df, bigwig_path):
-    # Check for required columns in the DataFrame
-    required_columns = ["Chr", "Start Pos", "End Pos", "Prediction"]
-    if not all(column in df.columns for column in required_columns):
-        raise ValueError(f"DataFrame must contain {required_columns} columns.")
-    # Convert columns to the correct types
-    df['Start Pos'] = df['Start Pos'].astype(int)
-    df['End Pos'] = df['End Pos'].astype(int)
-    df['Prediction'] = df['Prediction'].astype(float)
-    # Get the list of all chromosomes present in the DataFrame
-    all_chromosomes = df['Chr'].unique().tolist()
-    # Calculate chromosome sizes for the BigWig header
-    chr_sizes = []
-    for chr in all_chromosomes:
-        chr_group = df[df['Chr'] == chr]
-        max_end_pos = chr_group['End Pos'].max()
-        chr_sizes.append((chr, max_end_pos))
-    # Create the BigWig file and add the header
-    bw = pyBigWig.open(bigwig_path, "w")
-    bw.addHeader(chr_sizes)
-    # Add entries for each chromosome
-    for chr in all_chromosomes:
-        chr_group = df[df['Chr'] == chr]
-        if not chr_group.empty:
-            starts = chr_group['Start Pos'].tolist()
-            ends = chr_group['End Pos'].tolist()
-            values = chr_group['Prediction'].astype(float).tolist()
-            bw.addEntries([chr] * len(starts), starts, ends=ends, values=values)
-        else:
-            # Add empty entries for the missing chromosome
-            bw.addEntries([chr], [0], ends=[1], values=[0.0])
-    # Close the BigWig file
-    bw.close()

     return results, all_gene_sequences, all_exons
+def create_genbank_features(data):
+    features = []
+    # If the input data is a DataFrame, convert it to a list of lists
+    if isinstance(data, pd.DataFrame):
+        formatted_data = data.values.tolist()
+    elif isinstance(data, list):
+        formatted_data = data
+    else:
+        raise TypeError("Data should be either a list or a pandas DataFrame.")
+    for row in formatted_data:
+        try:
+            start = int(row[1])
+            end = int(row[2])
+        except ValueError as e:
+            print(f"Error converting start/end to int: {row[1]}, {row[2]} - {e}")
+            continue
+        strand = 1 if row[3] == '+' else -1
+        location = FeatureLocation(start=start, end=end, strand=strand)
+        feature = SeqFeature(location=location, type="misc_feature", qualifiers={
+            'label': row[7],  # Use gRNA as the label
+            'note': f"Prediction: {row[8]}"  # Include the prediction score
+        })
+        features.append(feature)
+    return features
+def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
+    features = create_genbank_features(df)
+    record = SeqRecord(Seq(gene_sequence), id=gene_symbol, name=gene_symbol,
+                       description=f'CRISPR Cas9 predicted targets for {gene_symbol}', features=features)
+    record.annotations["molecule_type"] = "DNA"
+    SeqIO.write(record, output_path, "genbank")
+def create_bed_file_from_df(df, output_path):
+    with open(output_path, 'w') as bed_file:
+        for index, row in df.iterrows():
+            chrom = row["Chr"]
+            start = int(row["Start Pos"])
+            end = int(row["End Pos"])
+            strand = '+' if row["Strand"] == '1' else '-'
+            gRNA = row["gRNA"]
+            score = str(row["Prediction"])
+            # transcript_id is not typically part of the standard BED columns but added here for completeness
+            transcript_id = row["Transcript"]
+            # Writing only standard BED columns; additional columns can be appended as needed
+            bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\n")
+def create_csv_from_df(df, output_path):
+    df.to_csv(output_path, index=False)