Erva Ulusoy commited on
Commit
873150b
1 Parent(s): 4cc17e2

merge overlapping domain locations

Browse files
Files changed (1) hide show
  1. run_domain2go_app.py +14 -5
run_domain2go_app.py CHANGED
@@ -4,6 +4,7 @@ from Bio import SeqIO
4
  import os
5
  import time
6
  import pandas as pd
 
7
 
8
  def find_domains(email, sequence, name):
9
 
@@ -72,10 +73,10 @@ def find_domains(email, sequence, name):
72
  entries[entry['accession']]['locations'].extend(location_list)
73
 
74
  entries[entry['accession']]['locations'] = list(set(entries[entry['accession']]['locations']))
75
- entries[entry['accession']]['locations'] = sorted([i.split('-') for i in entries[entry['accession']]['locations']], key=lambda x: (int(x[0]), int(x[1])))
76
- entries[entry['accession']]['locations'] = ['-'.join(i) for i in entries[entry['accession']]['locations']]
77
- # entries[entry['accession']]['locations'] = '|'.join(entries[entry['accession']]['locations'])
78
-
79
  if entries:
80
  result_text = 'Domains found.'
81
 
@@ -92,6 +93,14 @@ def find_domains(email, sequence, name):
92
 
93
  # generate protein function predictions based on domain2go mappings
94
 
 
 
 
 
 
 
 
 
95
  def generate_function_predictions(domains_df, mapping_path):
96
 
97
  # read domain2go mappings
@@ -115,4 +124,4 @@ def generate_function_predictions(domains_df, mapping_path):
115
  # save protein function predictions
116
  protein_name = domains_df['protein_name'].iloc[0]
117
  result_text= 'Function predictions found.'
118
- return [result_text, merged_df]
 
4
  import os
5
  import time
6
  import pandas as pd
7
+ import intervaltree
8
 
9
  def find_domains(email, sequence, name):
10
 
 
73
  entries[entry['accession']]['locations'].extend(location_list)
74
 
75
  entries[entry['accession']]['locations'] = list(set(entries[entry['accession']]['locations']))
76
+ if len(entries[entry['accession']]['locations']) > 1:
77
+ entries[entry['accession']]['locations'] = merge_locations(entries[entry['accession']]['locations'])
78
+ entries[entry['accession']]['locations'] = sorted([i.split('-') for i in entries[entry['accession']]['locations']], key=lambda x: (int(x[0]), int(x[1])))
79
+ entries[entry['accession']]['locations'] = ['-'.join(i) for i in entries[entry['accession']]['locations']]
80
  if entries:
81
  result_text = 'Domains found.'
82
 
 
93
 
94
  # generate protein function predictions based on domain2go mappings
95
 
96
+
97
+ def merge_locations(locations):
98
+ temp_locs = [i.split('-') for i in locations]
99
+ tree = intervaltree.IntervalTree.from_tuples(temp_locs)
100
+ tree.merge_overlaps()
101
+ merged_locations = ['-'.join([i.begin, i.end]) for i in tree]
102
+ return merged_locations
103
+
104
  def generate_function_predictions(domains_df, mapping_path):
105
 
106
  # read domain2go mappings
 
124
  # save protein function predictions
125
  protein_name = domains_df['protein_name'].iloc[0]
126
  result_text= 'Function predictions found.'
127
+ return [result_text, merged_df]