from huggingface_hub import from_pretrained_keras
import gradio as gr
import ast
import pandas as pd
import numpy as np
import tensorflow as tf
from rdkit import Chem, RDLogger
from rdkit.Chem import BondType
from rdkit.Chem.Draw import MolsToGridImage
RDLogger.DisableLog("rdApp.*")
# Config
SMILE_CHARSET = '["C", "B", "F", "I", "H", "O", "N", "S", "P", "Cl", "Br"]'
bond_mapping = {"SINGLE": 0, "DOUBLE": 1, "TRIPLE": 2, "AROMATIC": 3}
bond_mapping.update(
{0: BondType.SINGLE, 1: BondType.DOUBLE, 2: BondType.TRIPLE, 3: BondType.AROMATIC}
)
SMILE_CHARSET = ast.literal_eval(SMILE_CHARSET)
MAX_MOLSIZE = 109
SMILE_to_index = dict((c, i) for i, c in enumerate(SMILE_CHARSET))
index_to_SMILE = dict((i, c) for i, c in enumerate(SMILE_CHARSET))
atom_mapping = dict(SMILE_to_index)
atom_mapping.update(index_to_SMILE)
NUM_ATOMS = 120 # Maximum number of atoms
ATOM_DIM = 11 # Number of atom types
BOND_DIM = 4 + 1 # Number of bond types
LATENT_DIM = 435 # Size of the latent space
def graph_to_molecule(graph):
# Unpack graph
adjacency, features = graph
# RWMol is a molecule object intended to be edited
molecule = Chem.RWMol()
# Remove "no atoms" & atoms with no bonds
keep_idx = np.where(
(np.argmax(features, axis=1) != ATOM_DIM - 1)
& (np.sum(adjacency[:-1], axis=(0, 1)) != 0)
)[0]
features = features[keep_idx]
adjacency = adjacency[:, keep_idx, :][:, :, keep_idx]
# Add atoms to molecule
for atom_type_idx in np.argmax(features, axis=1):
atom = Chem.Atom(atom_mapping[atom_type_idx])
_ = molecule.AddAtom(atom)
# Add bonds between atoms in molecule; based on the upper triangles
# of the [symmetric] adjacency tensor
(bonds_ij, atoms_i, atoms_j) = np.where(np.triu(adjacency) == 1)
for (bond_ij, atom_i, atom_j) in zip(bonds_ij, atoms_i, atoms_j):
if atom_i == atom_j or bond_ij == BOND_DIM - 1:
continue
bond_type = bond_mapping[bond_ij]
molecule.AddBond(int(atom_i), int(atom_j), bond_type)
# Sanitize the molecule; for more information on sanitization, see
# https://www.rdkit.org/docs/RDKit_Book.html#molecular-sanitization
flag = Chem.SanitizeMol(molecule, catchErrors=True)
# Let's be strict. If sanitization fails, return None
if flag != Chem.SanitizeFlags.SANITIZE_NONE:
return None
return molecule
model = from_pretrained_keras("keras-io/drug-molecule-generation-with-VAE")
def inference(num_mol):
z = tf.random.normal((1000, LATENT_DIM))
reconstruction_adjacency, reconstruction_features = model.predict(z)
# obtain one-hot encoded adjacency tensor
adjacency = tf.argmax(reconstruction_adjacency, axis=1)
adjacency = tf.one_hot(adjacency, depth=BOND_DIM, axis=1)
# Remove potential self-loops from adjacency
adjacency = tf.linalg.set_diag(adjacency, tf.zeros(tf.shape(adjacency)[:-1]))
# obtain one-hot encoded feature tensor
features = tf.argmax(reconstruction_features, axis=2)
features = tf.one_hot(features, depth=ATOM_DIM, axis=2)
molecules = [ graph_to_molecule([adjacency[i].numpy(), features[i].numpy()]) for i in range(1000)]
MolsToGridImage(
[m for m in molecules if m is not None][:num_mol], molsPerRow=5, subImgSize=(260, 160)
).save("img.png")
return 'img.png'
gr.Interface(
fn=inference,
title="Generating Drug Molecule with VAE",
description = "Implementing a Convolutional Variational AutoEncoder (VAE) for Drug Discovery 🔬",
inputs=[
gr.inputs.Slider(20, 100, label='Number of Molecular Graphs', step=20, default=40),
],
outputs="image",
article = "Author: Vu Minh Chien. Based on the keras example from Victor Basu",
).launch(enable_queue=True, debug=True)