Spaces:
Running
Running
Erva Ulusoy
commited on
Commit
·
c86e7b2
1
Parent(s):
8f00c3f
updated _create_prediction_df function
Browse files- run_prothgt_app.py +29 -18
run_prothgt_app.py
CHANGED
@@ -88,30 +88,41 @@ def _create_prediction_df(predictions, heterodata, protein_ids, go_category):
|
|
88 |
'GO_term_P': 'Biological Process',
|
89 |
'GO_term_C': 'Cellular Component'
|
90 |
}
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
# Number of GO terms for this category
|
95 |
n_go_terms = len(heterodata[go_category]['id_mapping'])
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
# Process predictions for each protein
|
98 |
for i, protein_id in enumerate(protein_ids):
|
99 |
-
# Get
|
100 |
-
|
|
|
|
|
101 |
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
})
|
108 |
-
all_predictions.append(prediction_df)
|
109 |
|
110 |
-
#
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
115 |
|
116 |
def generate_prediction_df(protein_ids, model_paths, model_config_paths, go_category):
|
117 |
all_predictions = []
|
|
|
88 |
'GO_term_P': 'Biological Process',
|
89 |
'GO_term_C': 'Cellular Component'
|
90 |
}
|
91 |
+
|
92 |
+
# Get number of GO terms for this category
|
|
|
|
|
93 |
n_go_terms = len(heterodata[go_category]['id_mapping'])
|
94 |
|
95 |
+
# Create lists to store the data
|
96 |
+
all_proteins = []
|
97 |
+
all_go_terms = []
|
98 |
+
all_categories = []
|
99 |
+
all_probabilities = []
|
100 |
+
|
101 |
+
# Get list of GO terms once
|
102 |
+
go_terms = list(heterodata[go_category]['id_mapping'].keys())
|
103 |
+
|
104 |
# Process predictions for each protein
|
105 |
for i, protein_id in enumerate(protein_ids):
|
106 |
+
# Get predictions for this protein
|
107 |
+
start_idx = i * n_go_terms
|
108 |
+
end_idx = (i + 1) * n_go_terms
|
109 |
+
protein_predictions = predictions[start_idx:end_idx]
|
110 |
|
111 |
+
# Extend the lists
|
112 |
+
all_proteins.extend([protein_id] * n_go_terms)
|
113 |
+
all_go_terms.extend(go_terms)
|
114 |
+
all_categories.extend([go_category_dict[go_category]] * n_go_terms)
|
115 |
+
all_probabilities.extend(protein_predictions.tolist())
|
|
|
|
|
116 |
|
117 |
+
# Create DataFrame
|
118 |
+
prediction_df = pd.DataFrame({
|
119 |
+
'Protein': all_proteins,
|
120 |
+
'GO_term': all_go_terms,
|
121 |
+
'GO_category': all_categories,
|
122 |
+
'Probability': all_probabilities
|
123 |
+
})
|
124 |
+
|
125 |
+
return prediction_df
|
126 |
|
127 |
def generate_prediction_df(protein_ids, model_paths, model_config_paths, go_category):
|
128 |
all_predictions = []
|