Spaces:

anton-bushuiev
/

PPIformer

Running on Zero

App Files Files Community

Anton Bushuiev commited on Jan 25, 2024

Commit

98b3032

1 Parent(s): 0c10579

Improve layout and examples

Browse files

Files changed (2) hide show

app.py +29 -12
assets/readme-dimer-close-up.png +0 -0

app.py CHANGED Viewed

@@ -77,7 +77,7 @@ def process_inputs(inputs, temp_dir):
     return pdb_path, ppi_path, muts
-def plot_3dmol(pdb_path, ppi_path, muts, attn):
     # 3DMol.js adapted from https://huggingface.co/spaces/huhlim/cg2all/blob/main/app.py
     # Read PDB for 3Dmol.js
@@ -92,17 +92,14 @@ def plot_3dmol(pdb_path, ppi_path, muts, attn):
     # Read PPI to customize 3Dmol.js visualization
     ppi_df = PandasPdb().read_pdb(ppi_path).df['ATOM']
     ppi_df = ppi_df.groupby(list(Residue._fields)).apply(lambda df: df[df['atom_name'] == 'CA'].iloc[0]).reset_index(drop=True)
-    chains = ppi_df['chain_id'].unique()
     ppi_df['id'] = ppi_df.apply(lambda row: ':'.join([row['residue_name'], row['chain_id'], str(row['residue_number']), row['insertion']]), axis=1)
     ppi_df['id'] = ppi_df['id'].apply(lambda x: x[:-1] if x[-1] == ':' else x)
-    muts_id = sum([Mutation(mut).wt_to_graphein() for mut in muts], start=[])  # flatten ids of all sp muts
     ppi_df['mutated'] = ppi_df.apply(lambda row: row['id'] in muts_id, axis=1)
     # Prepare attention coeffictients per residue (normalized sum of direct attention from mutated residues)
     attn = torch.nan_to_num(attn, nan=1e-10)
-    # attn_sub = attn[:, 0, :, 0, :, :, :]  # models, layers, heads, tokens, tokens
-    # TODO Generalize to remove hardcoded 0 at dimension 1 correpsonding to useing attention for the 1st mutation
-    attn_sub = attn[:, 0, 0, :, 0, :, :, :]  # models, layers, heads, tokens, tokens
     idx_mutated = torch.from_numpy(ppi_df.index[ppi_df['mutated']].to_numpy())
     attn_sub = fill_diagonal(attn_sub, 1e-10)
     attn_mutated = attn_sub[..., idx_mutated, :]
@@ -112,6 +109,8 @@ def plot_3dmol(pdb_path, ppi_path, muts, attn):
     attns_per_token += 1e-10
     ppi_df['attn'] = attns_per_token.numpy()
     # Customize 3Dmol.js visualization https://3dmol.csb.pitt.edu/doc/
     styles = []
     zoom_atoms = []
@@ -234,10 +233,28 @@ def predict(models, temp_dir, *inputs):
     return df, plot
-app = gr.Blocks()
 with app:
     # Input GUI
     with gr.Row():
         with gr.Column():
             gr.Markdown("## PPI structure")
@@ -253,12 +270,12 @@ with app:
     examples = gr.Examples(
         examples=[
-            ["1BUI", "A,B,C", "SC16A;FC47A;SC16A,FC47A"],
-            ["1KNE", "A,P", ';'.join([f"TP6{a}" for a in AMINO_ACID_CODES_1])],
-            ["1C4Z", "A,B,C,D", "FA690A;KD100A"]
         ],
         inputs=[pdb_code, partners, muts],
-        label="Examples (press line to fill inputs)"
     )
     # Predict GUI
@@ -295,4 +312,4 @@ with app:
     predict = partial(predict, models, temp_dir)
     predict_button.click(predict, inputs=inputs, outputs=outputs)
-app.launch()

     return pdb_path, ppi_path, muts
+def plot_3dmol(pdb_path, ppi_path, muts, attn, mut_id=0):
     # 3DMol.js adapted from https://huggingface.co/spaces/huhlim/cg2all/blob/main/app.py
     # Read PDB for 3Dmol.js
     # Read PPI to customize 3Dmol.js visualization
     ppi_df = PandasPdb().read_pdb(ppi_path).df['ATOM']
     ppi_df = ppi_df.groupby(list(Residue._fields)).apply(lambda df: df[df['atom_name'] == 'CA'].iloc[0]).reset_index(drop=True)
     ppi_df['id'] = ppi_df.apply(lambda row: ':'.join([row['residue_name'], row['chain_id'], str(row['residue_number']), row['insertion']]), axis=1)
     ppi_df['id'] = ppi_df['id'].apply(lambda x: x[:-1] if x[-1] == ':' else x)
+    muts_id = Mutation(muts[mut_id]).wt_to_graphein()  # flatten ids of all sp muts
     ppi_df['mutated'] = ppi_df.apply(lambda row: row['id'] in muts_id, axis=1)
     # Prepare attention coeffictients per residue (normalized sum of direct attention from mutated residues)
     attn = torch.nan_to_num(attn, nan=1e-10)
+    attn_sub = attn[:, mut_id, 0, :, 0, :, :, :]  # models, layers, heads, tokens, tokens
     idx_mutated = torch.from_numpy(ppi_df.index[ppi_df['mutated']].to_numpy())
     attn_sub = fill_diagonal(attn_sub, 1e-10)
     attn_mutated = attn_sub[..., idx_mutated, :]
     attns_per_token += 1e-10
     ppi_df['attn'] = attns_per_token.numpy()
+    chains = ppi_df.sort_values('attn', ascending=False)['chain_id'].unique()
     # Customize 3Dmol.js visualization https://3dmol.csb.pitt.edu/doc/
     styles = []
     zoom_atoms = []
     return df, plot
+app = gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="pink"))
 with app:
     # Input GUI
+    gr.Markdown(value="# PPIformer Web")
+    gr.Image("assets/readme-dimer-close-up.png")
+    gr.Markdown(value="""
+        [PPIformer](https://github.com/anton-bushuiev/PPIformer/tree/main) is a state-of-the-art predictor of the effects of mutations on protein-protein interactions (PPIs),
+        as quantified by the binding energy changes (ddG). The model was pre-trained on the [PPIRef](https://github.com/anton-bushuiev/PPIRef)
+        dataset via a coarse-grained structural masked modeling and fine-tuned on [SKEMPI v2.0](https://life.bsc.es/pid/skempi2) via log odds.
+        PPIformer was shown to successfully identify known favorable mutations of the [staphylokinase thrombolytic](https://pubmed.ncbi.nlm.nih.gov/10942387/)
+        and a [human antibody](https://www.pnas.org/doi/10.1073/pnas.2122954119) against the SARS-CoV-2 spike protein. Please see more details in [our paper](https://arxiv.org/abs/2310.18515).
+        To use PPIformer on your data, please specify the PPI structure (PDB code or file), interacting proteins of interest (chain codes in the file) and mutations
+        (semicolon-separated list or file with mutations in the [standard format](https://foldxsuite.crg.eu/parameter/mutant-file)). For inspiration, you can use one of the examples below:
+        click on one of the rows to pre-fill the inputs. After specifying the inputs, press the button to predict the effects of mutations on the PPI. Currently the model runs on CPU, so the prediction may take a few minutes.
+        After making a prediction with the model, you will see binding free energy changes (ddG values) for each mutation and a 3D visualization of the PPI with mutated residues highlighted in red. The visualization additionally shows
+        the attention coefficients of the model for the nearest neighboring residues, which quantifies the contribution of the residues to the predicted ddG value. The brighted and thicker a reisudes is, the more attention the model paid to it.
+        Currently, the web only visualizes the first mutation in the list.
+    """)
     with gr.Row():
         with gr.Column():
             gr.Markdown("## PPI structure")
     examples = gr.Examples(
         examples=[
+            ["1BUI", "A,B,C", "SC16A,FC47A;SC16A;FC47A"],
+            ["3QIB", "A,B,P,C,D", "YP7F,TP12S;YP7F;TP12S"],
+            ["1KNE", "A,P", ';'.join([f"TP6{a}" for a in AMINO_ACID_CODES_1])]
         ],
         inputs=[pdb_code, partners, muts],
+        label="Examples (click on a line to pre-fill inputs)"
     )
     # Predict GUI
     predict = partial(predict, models, temp_dir)
     predict_button.click(predict, inputs=inputs, outputs=outputs)
+app.launch(allowed_paths=['./assets'])

assets/readme-dimer-close-up.png ADDED Viewed