Spaces:

NiniCat
/

CRISPRTool

Sleeping

App Files Files Community

supercat666 commited on May 14, 2024

Commit

bc641c8

1 Parent(s): 6392fd6

fix cas9 and cas12 output

Browse files

Files changed (3) hide show

app.py +2 -4
cas12lstm.py +69 -3
cas9att.py +1 -4

app.py CHANGED Viewed

@@ -185,8 +185,7 @@ if selected_model == 'Cas9':
         if predict_button and gene_symbol:
             with st.spinner('Predicting... Please wait'):
                 predictions, gene_sequence, exons = cas9att.process_gene(gene_symbol, cas9att_path)
-                sorted_predictions = sorted(predictions)[:10]
                 st.session_state['on_target_results'] = sorted_predictions
                 st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
                 st.session_state['exons'] = exons  # Store exon data
@@ -436,8 +435,7 @@ elif selected_model == 'Cas12':
     if predict_button and gene_symbol:
         with st.spinner('Predicting... Please wait'):
             predictions, gene_sequence, exons = cas12lstm.process_gene(gene_symbol, cas9att_path)
-            sorted_predictions = sorted(predictions)[:10]
             st.session_state['on_target_results'] = sorted_predictions
             st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
             st.session_state['exons'] = exons  # Store exon data

         if predict_button and gene_symbol:
             with st.spinner('Predicting... Please wait'):
                 predictions, gene_sequence, exons = cas9att.process_gene(gene_symbol, cas9att_path)
+                sorted_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)[:10]
                 st.session_state['on_target_results'] = sorted_predictions
                 st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
                 st.session_state['exons'] = exons  # Store exon data
     if predict_button and gene_symbol:
         with st.spinner('Predicting... Please wait'):
             predictions, gene_sequence, exons = cas12lstm.process_gene(gene_symbol, cas9att_path)
+            sorted_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)[:10]
             st.session_state['on_target_results'] = sorted_predictions
             st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
             st.session_state['exons'] = exons  # Store exon data

cas12lstm.py CHANGED Viewed

@@ -14,6 +14,10 @@ from functools import reduce
 from operator import add
 import tabulate
 from difflib import SequenceMatcher
 import cyvcf2
 import parasail
@@ -184,9 +188,71 @@ def process_gene(gene_symbol, model_path):
     for result in results:
         for item in result:
             output.append(item)
-    # Sort results based on prediction score (assuming score is at the 8th index)
-    sorted_results = sorted(output, key=lambda x: x[8], reverse=True)
     # Return the sorted output, combined gene sequences, and all exons
-    return sorted_results, all_gene_sequences, all_exons

 from operator import add
 import tabulate
 from difflib import SequenceMatcher
+from Bio import SeqIO
+from Bio.SeqRecord import SeqRecord
+from Bio.SeqFeature import SeqFeature, FeatureLocation
+from Bio.Seq import Seq
 import cyvcf2
 import parasail
     for result in results:
         for item in result:
             output.append(item)
     # Return the sorted output, combined gene sequences, and all exons
+    return results, all_gene_sequences, all_exons
+def create_genbank_features(data):
+    features = []
+    # If the input data is a DataFrame, convert it to a list of lists
+    if isinstance(data, pd.DataFrame):
+        formatted_data = data.values.tolist()
+    elif isinstance(data, list):
+        formatted_data = data
+    else:
+        raise TypeError("Data should be either a list or a pandas DataFrame.")
+    for row in formatted_data:
+        try:
+            start = int(row[1])
+            end = int(row[2])
+        except ValueError as e:
+            print(f"Error converting start/end to int: {row[1]}, {row[2]} - {e}")
+            continue
+        strand = 1 if row[3] == '+' else -1
+        location = FeatureLocation(start=start, end=end, strand=strand)
+        feature = SeqFeature(location=location, type="misc_feature", qualifiers={
+            'label': row[7],  # Use gRNA as the label
+            'note': f"Prediction: {row[8]}"  # Include the prediction score
+        })
+        features.append(feature)
+    return features
+def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
+    # Ensure gene_sequence is a string before creating Seq object
+    if not isinstance(gene_sequence, str):
+        gene_sequence = str(gene_sequence)
+    features = create_genbank_features(df)
+    # Now gene_sequence is guaranteed to be a string, suitable for Seq
+    seq_obj = Seq(gene_sequence)
+    record = SeqRecord(seq_obj, id=gene_symbol, name=gene_symbol,
+                       description=f'CRISPR Cas12 predicted targets for {gene_symbol}', features=features)
+    record.annotations["molecule_type"] = "DNA"
+    SeqIO.write(record, output_path, "genbank")
+def create_bed_file_from_df(df, output_path):
+    with open(output_path, 'w') as bed_file:
+        for index, row in df.iterrows():
+            chrom = row["Chr"]
+            start = int(row["Start Pos"])
+            end = int(row["End Pos"])
+            strand = '+' if row["Strand"] == '1' else '-'
+            gRNA = row["gRNA"]
+            score = str(row["Prediction"])
+            # transcript_id is not typically part of the standard BED columns but added here for completeness
+            transcript_id = row["Transcript"]
+            # Writing only standard BED columns; additional columns can be appended as needed
+            bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\n")
+def create_csv_from_df(df, output_path):
+    df.to_csv(output_path, index=False)

cas9att.py CHANGED Viewed

@@ -228,12 +228,9 @@ def process_gene(gene_symbol, model_path):
     for result in results:
         for item in result:
             output.append(item)
-    # Sort results based on prediction score (assuming score is at the 8th index)
-    sorted_results = sorted(output, key=lambda x: x[8], reverse=True)
     # Return the sorted output, combined gene sequences, and all exons
-    return sorted_results, all_gene_sequences, all_exons
 def create_genbank_features(data):

     for result in results:
         for item in result:
             output.append(item)
     # Return the sorted output, combined gene sequences, and all exons
+    return results, all_gene_sequences, all_exons
 def create_genbank_features(data):