fatmacankara commited on
Commit
b9d0e9c
·
1 Parent(s): 298b080

Update code/add_3Dalignment.py

Browse files
Files changed (1) hide show
  1. code/add_3Dalignment.py +68 -6
code/add_3Dalignment.py CHANGED
@@ -11,6 +11,35 @@ import gzip
11
  from pathlib import Path
12
  from Bio.Align import substitution_matrices
13
  aligner = Align.PairwiseAligner()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  def distance(x1, y1, z1, x2, y2, z2):
16
  d = math.sqrt(math.pow(x2 - x1, 2) +
@@ -186,7 +215,40 @@ def get_coords(annot, alignments, coords, resnums_for_sasa, mode):
186
 
187
 
188
  def get_alignments_3D(identifier, model_num, pdb_path, pdbSequence, source, chain, pdbID, mode, path_3D_alignment,file_format = 'gzip'):
 
 
 
189
  if mode == 1:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  atomSequence = ''
191
  coords = []
192
  resnums_for_sasa = []
@@ -206,7 +268,7 @@ def get_alignments_3D(identifier, model_num, pdb_path, pdbSequence, source, chai
206
  atomSequence += threeToOne(line[17:20].strip())
207
  coords.append([line[31:38].strip(), line[39:46].strip(), line[47:54].strip()])
208
  resnums_for_sasa.append(line[22:26].strip())
209
-
210
  #f = open(Path(path_3D_alignment / f'{identifier}_{pdbID}_{str(chain)}_alignment.txt'),"w")
211
 
212
  aligner.mode = 'local'
@@ -249,15 +311,15 @@ def get_alignments_3D(identifier, model_num, pdb_path, pdbSequence, source, chai
249
  atomSequence += threeToOne(line[17:20].strip())
250
  coords.append([line[31:38].strip(), line[39:46].strip(), line[47:54].strip()])
251
  resnums_for_sasa.append(line[22:26].strip())
252
- f = open(Path(path_3D_alignment / f'{identifier}_{str(model_num)}_3Dalignment.txt'),"w")
253
  aligner.mode = 'local'
254
  aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
255
  aligner.open_gap_score = -11
256
  aligner.extend_gap_score = -1
257
  alignments = aligner.align(pdbSequence, atomSequence)
258
  alignments = (list(alignments))
259
- for alignment in alignments:
260
- f.write(str(alignment))
261
- f.write('\n')
262
- f.write('\n')
263
  return alignments, coords, resnums_for_sasa
 
11
  from pathlib import Path
12
  from Bio.Align import substitution_matrices
13
  aligner = Align.PairwiseAligner()
14
+ import requests
15
+ from Bio.PDB import PDBParser, PPBuilder
16
+ from io import StringIO
17
+
18
+
19
+ from Bio.PDB.Polypeptide import *
20
+
21
+
22
+ def convert_non_standard_amino_acids(sequence):
23
+ """
24
+ Convert non-standard or ambiguous amino acid codes to their closest relatives.
25
+ """
26
+
27
+ # Define a dictionary to map non-standard codes to standard amino acids
28
+ conversion_dict = {
29
+ 'B': 'D', # Aspartic Acid (D) is often used for B (Asx)
30
+ 'Z': 'E', # Glutamic Acid (E) is often used for Z (Glx)
31
+ 'X': 'A', # Alanine (A) is a common placeholder for unknown/ambiguous
32
+ 'U': 'C', # Cysteine (C) is often used for Selenocysteine (U)
33
+ 'J': 'L', # Leucine (L) is often used for J (Leu/Ile)
34
+ 'O': 'K', # Lysine (K) is often used for O (Pyrrolysine)
35
+ # '*' or 'Stop' represents a stop codon; you may replace with '' to remove
36
+ '*': '',
37
+ }
38
+
39
+ # Replace non-standard codes with their closest relatives
40
+ converted_sequence = ''.join([conversion_dict.get(aa, aa) for aa in sequence])
41
+
42
+ return converted_sequence
43
 
44
  def distance(x1, y1, z1, x2, y2, z2):
45
  d = math.sqrt(math.pow(x2 - x1, 2) +
 
215
 
216
 
217
  def get_alignments_3D(identifier, model_num, pdb_path, pdbSequence, source, chain, pdbID, mode, path_3D_alignment,file_format = 'gzip'):
218
+ st.write('I am here get alignments 3D')
219
+ uniprotSequence = convert_non_standard_amino_acids(uniprotSequence)
220
+ pdbSequence = convert_non_standard_amino_acids(pdbSequence)
221
  if mode == 1:
222
+ if source != 'modbase':
223
+
224
+ # Step 1: Fetch the PDB file
225
+ pdb_url = f"https://files.rcsb.org/download/{pdbID}.pdb"
226
+
227
+ response = requests.get(pdb_url)
228
+ response.raise_for_status() # Check for a successful response
229
+
230
+ # Step 2: Parse the PDB file from memory
231
+ atoms = [i for i in response.text.split('\n') if i.startswith('ATOM')]
232
+ atoms = [i.split() for i in atoms]
233
+ atoms = [i for i in atoms if (i[2] == 'CA' and i[4] == chain)]
234
+ atomSequence = ''.join([three_to_one(i[3]) for i in atoms])
235
+ coords = [[i[6] ,i[7] ,i[8]] for i in atoms]
236
+ resnums_for_sasa = [i[5] for i in atoms]
237
+ else:
238
+ pdb_url = f"https://files.rcsb.org/download/{pdb_code}.pdb"
239
+
240
+ response = requests.get(pdbID)
241
+ response.raise_for_status() # Check for a successful response
242
+
243
+ # Step 2: Parse the PDB file from memory
244
+ atoms = [i for i in response.text.split('\n') if i.startswith('ATOM')]
245
+ atoms = [i.split() for i in atoms]
246
+ atoms = [i for i in atoms if i[2] == 'CA']
247
+ atomSequence = ''.join([three_to_one(i[3]) for i in atoms])
248
+ coords = [[i[6] ,i[7] ,i[8]] for i in atoms]
249
+ resnums_for_sasa = [i[5] for i in atoms]
250
+
251
+ """
252
  atomSequence = ''
253
  coords = []
254
  resnums_for_sasa = []
 
268
  atomSequence += threeToOne(line[17:20].strip())
269
  coords.append([line[31:38].strip(), line[39:46].strip(), line[47:54].strip()])
270
  resnums_for_sasa.append(line[22:26].strip())
271
+ """
272
  #f = open(Path(path_3D_alignment / f'{identifier}_{pdbID}_{str(chain)}_alignment.txt'),"w")
273
 
274
  aligner.mode = 'local'
 
311
  atomSequence += threeToOne(line[17:20].strip())
312
  coords.append([line[31:38].strip(), line[39:46].strip(), line[47:54].strip()])
313
  resnums_for_sasa.append(line[22:26].strip())
314
+ #f = open(Path(path_3D_alignment / f'{identifier}_{str(model_num)}_3Dalignment.txt'),"w")
315
  aligner.mode = 'local'
316
  aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
317
  aligner.open_gap_score = -11
318
  aligner.extend_gap_score = -1
319
  alignments = aligner.align(pdbSequence, atomSequence)
320
  alignments = (list(alignments))
321
+ #for alignment in alignments:
322
+ # f.write(str(alignment))
323
+ # f.write('\n')
324
+ # f.write('\n')
325
  return alignments, coords, resnums_for_sasa