Spaces:
Build error
Build error
from huggingface_hub import from_pretrained_keras | |
import gradio as gr | |
import ast | |
import pandas as pd | |
import numpy as np | |
import tensorflow as tf | |
from rdkit import Chem, RDLogger | |
from rdkit.Chem import BondType | |
from rdkit.Chem.Draw import MolsToGridImage | |
RDLogger.DisableLog("rdApp.*") | |
# Config | |
SMILE_CHARSET = '["C", "B", "F", "I", "H", "O", "N", "S", "P", "Cl", "Br"]' | |
bond_mapping = {"SINGLE": 0, "DOUBLE": 1, "TRIPLE": 2, "AROMATIC": 3} | |
bond_mapping.update( | |
{0: BondType.SINGLE, 1: BondType.DOUBLE, 2: BondType.TRIPLE, 3: BondType.AROMATIC} | |
) | |
SMILE_CHARSET = ast.literal_eval(SMILE_CHARSET) | |
MAX_MOLSIZE = 109 | |
SMILE_to_index = dict((c, i) for i, c in enumerate(SMILE_CHARSET)) | |
index_to_SMILE = dict((i, c) for i, c in enumerate(SMILE_CHARSET)) | |
atom_mapping = dict(SMILE_to_index) | |
atom_mapping.update(index_to_SMILE) | |
NUM_ATOMS = 120 # Maximum number of atoms | |
ATOM_DIM = 11 # Number of atom types | |
BOND_DIM = 4 + 1 # Number of bond types | |
LATENT_DIM = 435 # Size of the latent space | |
def graph_to_molecule(graph): | |
# Unpack graph | |
adjacency, features = graph | |
# RWMol is a molecule object intended to be edited | |
molecule = Chem.RWMol() | |
# Remove "no atoms" & atoms with no bonds | |
keep_idx = np.where( | |
(np.argmax(features, axis=1) != ATOM_DIM - 1) | |
& (np.sum(adjacency[:-1], axis=(0, 1)) != 0) | |
)[0] | |
features = features[keep_idx] | |
adjacency = adjacency[:, keep_idx, :][:, :, keep_idx] | |
# Add atoms to molecule | |
for atom_type_idx in np.argmax(features, axis=1): | |
atom = Chem.Atom(atom_mapping[atom_type_idx]) | |
_ = molecule.AddAtom(atom) | |
# Add bonds between atoms in molecule; based on the upper triangles | |
# of the [symmetric] adjacency tensor | |
(bonds_ij, atoms_i, atoms_j) = np.where(np.triu(adjacency) == 1) | |
for (bond_ij, atom_i, atom_j) in zip(bonds_ij, atoms_i, atoms_j): | |
if atom_i == atom_j or bond_ij == BOND_DIM - 1: | |
continue | |
bond_type = bond_mapping[bond_ij] | |
molecule.AddBond(int(atom_i), int(atom_j), bond_type) | |
# Sanitize the molecule; for more information on sanitization, see | |
# https://www.rdkit.org/docs/RDKit_Book.html#molecular-sanitization | |
flag = Chem.SanitizeMol(molecule, catchErrors=True) | |
# Let's be strict. If sanitization fails, return None | |
if flag != Chem.SanitizeFlags.SANITIZE_NONE: | |
return None | |
return molecule | |
model = from_pretrained_keras("keras-io/drug-molecule-generation-with-VAE") | |
def inference(num_mol): | |
z = tf.random.normal((1000, LATENT_DIM)) | |
reconstruction_adjacency, reconstruction_features = model.predict(z) | |
# obtain one-hot encoded adjacency tensor | |
adjacency = tf.argmax(reconstruction_adjacency, axis=1) | |
adjacency = tf.one_hot(adjacency, depth=BOND_DIM, axis=1) | |
# Remove potential self-loops from adjacency | |
adjacency = tf.linalg.set_diag(adjacency, tf.zeros(tf.shape(adjacency)[:-1])) | |
# obtain one-hot encoded feature tensor | |
features = tf.argmax(reconstruction_features, axis=2) | |
features = tf.one_hot(features, depth=ATOM_DIM, axis=2) | |
molecules = [ graph_to_molecule([adjacency[i].numpy(), features[i].numpy()]) for i in range(1000)] | |
MolsToGridImage( | |
[m for m in molecules if m is not None][:num_mol], molsPerRow=5, subImgSize=(260, 160) | |
).save("img.png") | |
return 'img.png' | |
gr.Interface( | |
fn=inference, | |
title="Generating Drug Molecule with VAE", | |
description = "Implementing a Convolutional Variational AutoEncoder (VAE) for Drug Discovery π¬", | |
inputs=[ | |
gr.inputs.Slider(20, 100, label='Number of Molecular Graphs', step=20, default=40), | |
], | |
outputs="image", | |
article = "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>. Based on the keras example from <a href=\"https://keras.io/examples/generative/molecule_generation/\">Victor Basu</a>", | |
).launch(enable_queue=True, debug=True) |