Spaces:

anton-bushuiev
/

PPIformer

Running on Zero

App Files Files Community

Anton Bushuiev commited on Mar 20, 2024

Commit

c09238a

•

1 Parent(s): b432a65

Implement visualization dropdown and full complex inference

Browse files

Files changed (1) hide show

app.py +111 -27

app.py CHANGED Viewed

@@ -50,7 +50,7 @@ def process_inputs(inputs, temp_dir):
     # Prepare PDB input
     if pdb_path:
-        # remove '-' chars from pdb name
         new_pdb_path = temp_dir / f"pdb/{pdb_path.name.replace('_', '-')}"
         new_pdb_path.parent.mkdir(parents=True, exist_ok=True)
         shutil.copy(str(pdb_path), str(new_pdb_path))
@@ -63,9 +63,13 @@ def process_inputs(inputs, temp_dir):
             download_pdb(pdb_code, path=pdb_path)
         except:
             raise gr.Error("PDB download failed.")
     partners = list(map(lambda x: x.strip(), partners.split(',')))
     # Extract PPI into temp dir
     try:
         ppi_dir = temp_dir / 'ppi'
@@ -80,8 +84,8 @@ def process_inputs(inputs, temp_dir):
         muts_path = Path(muts_path)
         muts = muts_path.read_text()
-    muts = list(map(lambda x: x.strip(), muts.split(';')))
     # Basic format
     try:
         muts = list(map(lambda m: Mutation.from_str(m), muts.split(';')))
@@ -92,7 +96,7 @@ def process_inputs(inputs, temp_dir):
     for mut in muts:
         for pmut in mut.muts:
             if pmut.chain not in partners:
-                raise gr.Error(f'Chain of point mutation {pmut} from {mut} is not in the list of partners {partners}.')
     # Consistency with provided .pdb
     muts_on_interface = []
@@ -110,8 +114,8 @@ def process_inputs(inputs, temp_dir):
     return pdb_path, ppi_path, muts, muts_on_interface
-def plot_3dmol(pdb_path, ppi_path, muts, attn, mut_id=0):
-    # 3DMol.js adapted from https://huggingface.co/spaces/huhlim/cg2all/blob/main/app.py
     # Read PDB for 3Dmol.js
     with open(pdb_path, "r") as fp:
@@ -127,12 +131,12 @@ def plot_3dmol(pdb_path, ppi_path, muts, attn, mut_id=0):
     ppi_df = ppi_df.groupby(list(Residue._fields)).apply(lambda df: df[df['atom_name'] == 'CA'].iloc[0]).reset_index(drop=True)
     ppi_df['id'] = ppi_df.apply(lambda row: ':'.join([row['residue_name'], row['chain_id'], str(row['residue_number']), row['insertion']]), axis=1)
     ppi_df['id'] = ppi_df['id'].apply(lambda x: x[:-1] if x[-1] == ':' else x)
-    muts_id = Mutation(muts[mut_id]).wt_to_graphein()  # flatten ids of all sp muts
     ppi_df['mutated'] = ppi_df.apply(lambda row: row['id'] in muts_id, axis=1)
     # Prepare attention coeffictients per residue (normalized sum of direct attention from mutated residues)
     attn = torch.nan_to_num(attn, nan=1e-10)
-    attn_sub = attn[:, mut_id, 0, :, 0, :, :, :]  # models, layers, heads, tokens, tokens
     idx_mutated = torch.from_numpy(ppi_df.index[ppi_df['mutated']].to_numpy())
     attn_sub = fill_diagonal(attn_sub, 1e-10)
     attn_mutated = attn_sub[..., idx_mutated, :]
@@ -235,7 +239,6 @@ def plot_3dmol(pdb_path, ppi_path, muts, attn, mut_id=0):
         </script>
         </body></html>"""
     )
-    print(html)
     return f"""<iframe style="width: 100%; height: 600px" name="result" allow="midi; geolocation; microphone; camera;
     display-capture; encrypted-media;" sandbox="allow-modals allow-forms
@@ -246,29 +249,105 @@ def plot_3dmol(pdb_path, ppi_path, muts, attn, mut_id=0):
 def predict(models, temp_dir, *inputs):
     # Process input
-    pdb_path, ppi_path, muts = process_inputs(inputs, temp_dir)
-    print(ppi_path, muts)
-    # Predict
-    try:
-        ddg, attn = predict_ddg(models, ppi_path, muts, return_attn=True)
-    except:
-        raise gr.Error("Prediction failed. Please double check your inputs.")
-    # Create dataframe
-    ddg = ddg.detach().numpy().tolist()
-    ddg = np.round(ddg, 3)
-    df = list(zip(muts, ddg))
     # Create dataframe file
     path = 'ppiformer_ddg_predictions.csv'
-    pd.DataFrame(df).rename(columns={0: "Mutation", 1: "ddG [kcal/mol]"}).to_csv(path, index=False)
-    # Create 3DMol plot
-    plot = plot_3dmol(pdb_path, ppi_path, muts, attn)
-    return df, path, plot
 app = gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="pink"))
@@ -303,7 +382,7 @@ with app:
         with gr.Column():
             gr.Markdown("## Mutations")
-            muts = gr.Textbox(placeholder="SC16A;FC47A;SC16A,FC47A", label="List of (multi-point) mutations", info="SC16A,FC47A;SC16A;FC47A for three mutations: serine to alanine at position 16 in chain C, phenylalanine to alanine at position 47 in chain C, and their double-point combination")
             muts_path = gr.File(file_count="single", label="Or file with mutations")
     examples = gr.Examples(
@@ -327,6 +406,8 @@ with app:
         datatype=["str", "number"],
         col_count=(2, "fixed"),
     )
     plot = gr.HTML()
     # Download weights from Zenodo
@@ -347,8 +428,11 @@ with app:
     # Main logic
     inputs = [pdb_code, pdb_path, partners, muts, muts_path]
-    outputs = [df, df_file, plot]
     predict = partial(predict, models, temp_dir)
     predict_button.click(predict, inputs=inputs, outputs=outputs)
 app.launch(allowed_paths=['./assets'])

     # Prepare PDB input
     if pdb_path:
+        # convert file name to PPIRef format
         new_pdb_path = temp_dir / f"pdb/{pdb_path.name.replace('_', '-')}"
         new_pdb_path.parent.mkdir(parents=True, exist_ok=True)
         shutil.copy(str(pdb_path), str(new_pdb_path))
             download_pdb(pdb_code, path=pdb_path)
         except:
             raise gr.Error("PDB download failed.")
+    # Parse partners
     partners = list(map(lambda x: x.strip(), partners.split(',')))
+    # Add partners to file name
+    pdb_path = pdb_path.rename(pdb_path.with_stem(f"{pdb_path.stem}_{'_'.join(partners)}"))
     # Extract PPI into temp dir
     try:
         ppi_dir = temp_dir / 'ppi'
         muts_path = Path(muts_path)
         muts = muts_path.read_text()
+    # Check mutations
     # Basic format
     try:
         muts = list(map(lambda m: Mutation.from_str(m), muts.split(';')))
     for mut in muts:
         for pmut in mut.muts:
             if pmut.chain not in partners:
+                raise gr.Error(f'Chain of point mutation {pmut} is not in the list of partners {partners}.')
     # Consistency with provided .pdb
     muts_on_interface = []
     return pdb_path, ppi_path, muts, muts_on_interface
+def plot_3dmol(pdb_path, ppi_path, mut, attn, attn_mut_id=0):
+    # NOTE 3DMol.js adapted from https://huggingface.co/spaces/huhlim/cg2all/blob/main/app.py
     # Read PDB for 3Dmol.js
     with open(pdb_path, "r") as fp:
     ppi_df = ppi_df.groupby(list(Residue._fields)).apply(lambda df: df[df['atom_name'] == 'CA'].iloc[0]).reset_index(drop=True)
     ppi_df['id'] = ppi_df.apply(lambda row: ':'.join([row['residue_name'], row['chain_id'], str(row['residue_number']), row['insertion']]), axis=1)
     ppi_df['id'] = ppi_df['id'].apply(lambda x: x[:-1] if x[-1] == ':' else x)
+    muts_id = Mutation.from_str(mut).wt_to_graphein()  # flatten ids of all sp muts
     ppi_df['mutated'] = ppi_df.apply(lambda row: row['id'] in muts_id, axis=1)
     # Prepare attention coeffictients per residue (normalized sum of direct attention from mutated residues)
     attn = torch.nan_to_num(attn, nan=1e-10)
+    attn_sub = attn[:, attn_mut_id, 0, :, 0, :, :, :]  # models, layers, heads, tokens, tokens
     idx_mutated = torch.from_numpy(ppi_df.index[ppi_df['mutated']].to_numpy())
     attn_sub = fill_diagonal(attn_sub, 1e-10)
     attn_mutated = attn_sub[..., idx_mutated, :]
         </script>
         </body></html>"""
     )
     return f"""<iframe style="width: 100%; height: 600px" name="result" allow="midi; geolocation; microphone; camera;
     display-capture; encrypted-media;" sandbox="allow-modals allow-forms
 def predict(models, temp_dir, *inputs):
     # Process input
+    pdb_path, ppi_path, muts, muts_on_interface = process_inputs(inputs, temp_dir)
+    # Create dataframe
+    df = pd.DataFrame({
+        'Mutation': muts,
+        'ddG [kcal/mol]': len(muts) * [np.nan],
+        '10A Interface': muts_on_interface,
+        'Attn Id': len(muts) * [np.nan],
+    })
+    # Show warning if some mutations are not on the interface
+    muts_not_on_interface = df[~df['10A Interface']]['Mutation'].tolist()
+    n_muts_not_on_interface = len(muts_not_on_interface)
+    if n_muts_not_on_interface:
+        n_muts_warn = 5
+        muts_not_on_interface = ';'.join(muts_not_on_interface[:n_muts_warn])
+        if n_muts_not_on_interface > n_muts_warn:
+            muts_not_on_interface += f'... (and {n_muts_not_on_interface - n_muts_warn} more)'
+        gr.Warning((
+            f"{muts_not_on_interface} {'is' if n_muts_not_on_interface == 1 else 'are'} not on the interface. "
+            "The model will predict the effects of mutations on the whole complex. "
+            "This may lead to less accurate predictions."
+        ))
+    # Predict using interface for mutations on the interface and using the whole complex otherwise
+    attn_ppi, attn_pdb = None, None
+    for df_sub, path in [
+        [df[df['10A Interface']], ppi_path],
+        [df[~df['10A Interface']], pdb_path]
+    ]:
+        if not len(df_sub):
+            continue
+        # Predict
+        try:
+            ddg, attn = predict_ddg(models, path, df_sub['Mutation'].tolist(), return_attn=True)
+        except:
+            raise gr.Error("Prediction failed. Please double check your inputs.")
+        ddg = ddg.detach().numpy().tolist()
+        # Update dataframe and attention tensor
+        idx = df_sub.index
+        df.loc[idx, 'ddG [kcal/mol]'] = ddg
+        df.loc[idx, 'Attn Id'] = np.arange(len(idx))
+        if path == ppi_path:
+            attn_ppi = attn
+        else:
+            attn_pdb = attn
+    df['Attn Id'] = df['Attn Id'].astype(int)
+    # Round ddG values
+    df['ddG [kcal/mol]'] = df['ddG [kcal/mol]'].round(3)
+    # Create PPI-specific dropdown
+    dropdown = gr.Dropdown(
+        df['Mutation'].tolist(), value=df['Mutation'].iloc[0],
+        interactive=True,  visible=True, label="Mutation to visualize",
+    )
+    # Predefine plot arguments for all dropdown choices
+    dropdown_choices_to_plot_args = {
+        mut: (
+            pdb_path,
+            ppi_path if df[df['Mutation'] == mut]['10A Interface'].iloc[0] else pdb_path,
+            mut,
+            attn_ppi if df[df['Mutation'] == mut]['10A Interface'].iloc[0] else attn_pdb,
+            df[df['Mutation'] == mut]['Attn Id'].iloc[0]
+        )
+        for mut in df['Mutation']
+    }
     # Create dataframe file
     path = 'ppiformer_ddg_predictions.csv'
+    if n_muts_not_on_interface:
+        df = df[['Mutation', 'ddG [kcal/mol]', '10A Interface']]
+        df.to_csv(path, index=False)
+        df = gr.Dataframe(
+            value=df,
+            headers=['Mutation', 'ddG [kcal/mol]', '10A Interface'],
+            datatype=['str', 'number', 'bool'],
+            col_count=(3, 'fixed'),
+        )
+    else:
+        df = df[['Mutation', 'ddG [kcal/mol]']]
+        df.to_csv(path, index=False)
+        df = gr.Dataframe(
+            value=df,
+            headers=['Mutation', 'ddG [kcal/mol]'],
+            datatype=['str', 'number'],
+            col_count=(2, 'fixed'),
+        )
+    return df, path, dropdown, dropdown_choices_to_plot_args
+def update_plot(dropdown, dropdown_choices_to_plot_args):
+    return plot_3dmol(*dropdown_choices_to_plot_args[dropdown])
 app = gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="pink"))
         with gr.Column():
             gr.Markdown("## Mutations")
+            muts = gr.Textbox(placeholder="SC16A;FC47A;SC16A,FC47A", label="List of (multi-point) mutations", info="SC16A;FC47A;SC16A,FC47A for three mutations: serine to alanine at position 16 in chain C, phenylalanine to alanine at position 47 in chain C, and their double-point combination")
             muts_path = gr.File(file_count="single", label="Or file with mutations")
     examples = gr.Examples(
         datatype=["str", "number"],
         col_count=(2, "fixed"),
     )
+    dropdown = gr.Dropdown(interactive=True, visible=False)
+    dropdown_choices_to_plot_args = gr.State([])
     plot = gr.HTML()
     # Download weights from Zenodo
     # Main logic
     inputs = [pdb_code, pdb_path, partners, muts, muts_path]
+    outputs = [df, df_file, dropdown, dropdown_choices_to_plot_args]
     predict = partial(predict, models, temp_dir)
     predict_button.click(predict, inputs=inputs, outputs=outputs)
+    # Update plot on dropdown change
+    dropdown.change(update_plot, inputs=[dropdown, dropdown_choices_to_plot_args], outputs=[plot])
 app.launch(allowed_paths=['./assets'])