from huggingface_hub import from_pretrained_keras import gradio as gr import ast import pandas as pd import numpy as np import tensorflow as tf from rdkit import Chem, RDLogger from rdkit.Chem import BondType from rdkit.Chem.Draw import MolsToGridImage RDLogger.DisableLog("rdApp.*") # Config SMILE_CHARSET = '["C", "B", "F", "I", "H", "O", "N", "S", "P", "Cl", "Br"]' bond_mapping = {"SINGLE": 0, "DOUBLE": 1, "TRIPLE": 2, "AROMATIC": 3} bond_mapping.update( {0: BondType.SINGLE, 1: BondType.DOUBLE, 2: BondType.TRIPLE, 3: BondType.AROMATIC} ) SMILE_CHARSET = ast.literal_eval(SMILE_CHARSET) MAX_MOLSIZE = 109 SMILE_to_index = dict((c, i) for i, c in enumerate(SMILE_CHARSET)) index_to_SMILE = dict((i, c) for i, c in enumerate(SMILE_CHARSET)) atom_mapping = dict(SMILE_to_index) atom_mapping.update(index_to_SMILE) NUM_ATOMS = 120 # Maximum number of atoms ATOM_DIM = 11 # Number of atom types BOND_DIM = 4 + 1 # Number of bond types LATENT_DIM = 435 # Size of the latent space def graph_to_molecule(graph): # Unpack graph adjacency, features = graph # RWMol is a molecule object intended to be edited molecule = Chem.RWMol() # Remove "no atoms" & atoms with no bonds keep_idx = np.where( (np.argmax(features, axis=1) != ATOM_DIM - 1) & (np.sum(adjacency[:-1], axis=(0, 1)) != 0) )[0] features = features[keep_idx] adjacency = adjacency[:, keep_idx, :][:, :, keep_idx] # Add atoms to molecule for atom_type_idx in np.argmax(features, axis=1): atom = Chem.Atom(atom_mapping[atom_type_idx]) _ = molecule.AddAtom(atom) # Add bonds between atoms in molecule; based on the upper triangles # of the [symmetric] adjacency tensor (bonds_ij, atoms_i, atoms_j) = np.where(np.triu(adjacency) == 1) for (bond_ij, atom_i, atom_j) in zip(bonds_ij, atoms_i, atoms_j): if atom_i == atom_j or bond_ij == BOND_DIM - 1: continue bond_type = bond_mapping[bond_ij] molecule.AddBond(int(atom_i), int(atom_j), bond_type) # Sanitize the molecule; for more information on sanitization, see # https://www.rdkit.org/docs/RDKit_Book.html#molecular-sanitization flag = Chem.SanitizeMol(molecule, catchErrors=True) # Let's be strict. If sanitization fails, return None if flag != Chem.SanitizeFlags.SANITIZE_NONE: return None return molecule model = from_pretrained_keras("keras-io/drug-molecule-generation-with-VAE") def inference(num_mol): z = tf.random.normal((1000, LATENT_DIM)) reconstruction_adjacency, reconstruction_features = model.predict(z) # obtain one-hot encoded adjacency tensor adjacency = tf.argmax(reconstruction_adjacency, axis=1) adjacency = tf.one_hot(adjacency, depth=BOND_DIM, axis=1) # Remove potential self-loops from adjacency adjacency = tf.linalg.set_diag(adjacency, tf.zeros(tf.shape(adjacency)[:-1])) # obtain one-hot encoded feature tensor features = tf.argmax(reconstruction_features, axis=2) features = tf.one_hot(features, depth=ATOM_DIM, axis=2) molecules = [ graph_to_molecule([adjacency[i].numpy(), features[i].numpy()]) for i in range(1000)] MolsToGridImage( [m for m in molecules if m is not None][:num_mol], molsPerRow=5, subImgSize=(260, 160) ).save("img.png") return 'img.png' gr.Interface( fn=inference, title="Generating Drug Molecule with VAE", description = "Implementing a Convolutional Variational AutoEncoder (VAE) for Drug Discovery 🔬", inputs=[ gr.inputs.Slider(20, 100, label='Number of Molecular Graphs', step=20, default=40), ], outputs="image", article = "Author: Vu Minh Chien. Based on the keras example from Victor Basu", ).launch(enable_queue=True, debug=True)