supercat666 commited on
Commit
22fbe15
1 Parent(s): 4026606
Files changed (1) hide show
  1. cas9on.py +17 -8
cas9on.py CHANGED
@@ -146,21 +146,30 @@ def process_gene(gene_symbol, model_path):
146
  # Return the sorted output, combined gene sequences, and all exons
147
  return results, all_gene_sequences, all_exons
148
 
149
- def create_genbank_features(formatted_data):
 
150
  features = []
151
- for data in formatted_data:
152
- strand = 1 if data[3] == '+' else -1
153
- location = FeatureLocation(start=int(data[1]), end=int(data[2]), strand=strand)
 
 
 
 
 
154
  feature = SeqFeature(location=location, type="misc_feature", qualifiers={
155
- 'label': data[7], # Use gRNA as the label
156
- 'target': data[6], # Include the target sequence
157
- 'note': f"Prediction: {data[8]:.4f}" # Include the prediction score
 
 
158
  })
159
  features.append(feature)
160
  return features
161
 
 
162
  def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
163
- features = create_genbank_features(df.values.tolist())
164
  record = SeqRecord(Seq(gene_sequence), id=gene_symbol, name=gene_symbol,
165
  description=f'CRISPR Cas9 predicted targets for {gene_symbol}', features=features)
166
  record.annotations["molecule_type"] = "DNA"
 
146
  # Return the sorted output, combined gene sequences, and all exons
147
  return results, all_gene_sequences, all_exons
148
 
149
+
150
+ def create_genbank_features(df):
151
  features = []
152
+ for index, row in df.iterrows():
153
+ # Convert strand from '+/-' to 1/-1 for Biopython
154
+ strand = 1 if row['Strand'] == '+' else -1 if row['Strand'] == '-' else 0
155
+
156
+ # Create feature location using the 'Start Pos' and 'End Pos'
157
+ location = FeatureLocation(start=int(row['Start Pos']), end=int(row['End Pos']), strand=strand)
158
+
159
+ # Create a SeqFeature
160
  feature = SeqFeature(location=location, type="misc_feature", qualifiers={
161
+ 'label': row['gRNA'], # Use gRNA as the label
162
+ 'target': row['Target'], # Include the target sequence
163
+ 'note': f"Prediction: {row['Prediction']:.4f}", # Include the prediction score
164
+ 'transcript_id': row['Transcript'],
165
+ 'exon_id': row['Exon']
166
  })
167
  features.append(feature)
168
  return features
169
 
170
+
171
  def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
172
+ features = create_genbank_features(df)
173
  record = SeqRecord(Seq(gene_sequence), id=gene_symbol, name=gene_symbol,
174
  description=f'CRISPR Cas9 predicted targets for {gene_symbol}', features=features)
175
  record.annotations["molecule_type"] = "DNA"