{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Prepare input structures\n", "\n", "We first start by preparing the input ASE database. In the subfolder `structures` there are 9 example MOF structures. `save_to_db` is an convenient function to conver CIF files into one db." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[32m2024-12-22 11:40:02.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmlip_arena.tasks.mof\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1m\n", "The module 'mlip_arena.tasks.mof' is adapted from the repository: https://github.com/hspark1212/DAC-SIM. \n", "By using this module, you agree to the terms and conditions specified in the following license: \n", "\n", "https://github.com/hspark1212/DAC-SIM/blob/main/LICENSE\n", "\n", "Additionally, please ensure proper attribution by citing the reference: \n", "\n", "Lim, Y., Park, H., Walsh, A., & Kim, J. (2024). Accelerating CO₂ Direct Air Capture Screening for Metal-Organic Frameworks with a Transferable Machine Learning Force Field.\n", "\n", "A local copy of the LICENSE file can be found at: /pscratch/sd/c/cyrusyc/mlip-arena/mlip_arena/tasks/mof/LICENSE.\n", "\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "No module named 'deepmd'\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/pscratch/sd/c/cyrusyc/.conda/mlip-arena/lib/python3.11/site-packages/ase/io/cif.py:408: UserWarning: crystal system 'triclinic' is not interpreted for space group Spacegroup(1, setting=1). This may result in wrong setting!\n", " warnings.warn(\n", "\u001b[32m2024-12-22 11:40:22.686\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmlip_arena.tasks.mof.input\u001b[0m:\u001b[36msave_to_db\u001b[0m:\u001b[36m45\u001b[0m - \u001b[1mmofs.db uploaded to atomind/mlip-arena/mof\u001b[0m\n" ] }, { "data": { "text/plain": [ "PosixPath('mofs.db')" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from dotenv import load_dotenv\n", "load_dotenv()\n", "\n", "import glob\n", "from ase.io import read\n", "from mlip_arena.tasks.mof.input import save_to_db\n", "\n", "files = glob.glob('structures/*.cif')\n", "\n", "atoms_list = []\n", "\n", "for file in files:\n", " atoms = read(file)\n", " atoms_list.append(atoms)\n", "\n", "save_to_db(\n", " atoms_list=atoms_list,\n", " db_path='mofs.db'\n", ")" ] }, { "cell_type": "markdown", "metadata": { "tags": [] }, "source": [ "## Demo: MOF benchmark workflow" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from ase.build import molecule\n", "from mlip_arena.models import MLIPEnum\n", "from mlip_arena.tasks.mof.input import get_atoms_from_db\n", "from mlip_arena.tasks.mof.flow import widom_insertion\n", "from prefect import flow\n", "from tqdm.auto import tqdm\n", "\n", "@flow\n", "def benchmark_one():\n", " results = []\n", " for model in MLIPEnum:\n", " if model.name != \"MACE-MP(M)\":\n", " continue # skip all models other than MACE-MP\n", " for atoms in tqdm(get_atoms_from_db('mofs.db')):\n", " result = widom_insertion(\n", " atoms, \n", " molecule('CO2'),\n", " calculator_name=model.name,\n", " )\n", " results.append(result)\n", " break # only test on first MOF\n", " return [r for r in results]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
15:46:06.786 | INFO    | Flow run 'amigurumi-beagle' - Beginning flow run 'amigurumi-beagle' for flow 'benchmark-one'\n",
       "
\n" ], "text/plain": [ "15:46:06.786 | \u001b[36mINFO\u001b[0m | Flow run\u001b[35m 'amigurumi-beagle'\u001b[0m - Beginning flow run\u001b[35m 'amigurumi-beagle'\u001b[0m for flow\u001b[1;35m 'benchmark-one'\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2a1bc33089b44a308a44cd14979533f7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6c5c9eda59644f528f76c5b2b18b272d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "mof/mofs.db: 0%| | 0.00/168k [00:0015:46:07.619 | INFO | Task run 'Widom Insertion: C28H16O10V2 + CO2 - MACE-MP(M)' - Optimizing structure\n", "\n" ], "text/plain": [ "15:46:07.619 | \u001b[36mINFO\u001b[0m | Task run 'Widom Insertion: C28H16O10V2 + CO2 - MACE-MP(M)' - Optimizing structure\n" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Selected GPU cuda:0 with 40339.31 MB free memory from 1 GPUs\n", "Using device: cuda:0\n", "Selected GPU cuda:0 with 40339.31 MB free memory from 1 GPUs\n", "Default dtype float32 does not match model dtype float64, converting models to float32.\n", "Using calculator: \n", "Using filter: \n", "Using optimizer: \n", " Step Time Energy fmax\n", "FIRE2: 0 15:46:10 -398.611542 0.341164\n", "FIRE2: 1 15:46:10 -398.627014 0.185914\n", "FIRE2: 2 15:46:12 -398.632233 0.157950\n", "FIRE2: 3 15:46:12 -398.636993 0.163124\n", "FIRE2: 4 15:46:12 -398.633881 0.158292\n", "FIRE2: 5 15:46:13 -398.642792 0.152467\n", "FIRE2: 6 15:46:13 -398.643768 0.142569\n", "FIRE2: 7 15:46:13 -398.637024 0.131079\n", "FIRE2: 8 15:46:13 -398.648560 0.115860\n", "FIRE2: 9 15:46:13 -398.648590 0.099157\n", "FIRE2: 10 15:46:13 -398.654022 0.081072\n", "FIRE2: 11 15:46:14 -398.651306 0.062052\n", "FIRE2: 12 15:46:14 -398.661194 0.044053\n" ] }, { "data": { "text/html": [ "
15:46:14.836 | INFO    | Task run 'OPT: C28H16O10V2 - MACE-MP(M)' - Finished in state Completed()\n",
       "
\n" ], "text/plain": [ "15:46:14.836 | \u001b[36mINFO\u001b[0m | Task run 'OPT: C28H16O10V2 - MACE-MP(M)' - Finished in state \u001b[32mCompleted\u001b[0m()\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
15:46:14.840 | INFO    | Task run 'Widom Insertion: C28H16O10V2 + CO2 - MACE-MP(M)' - Optimizing gas molecule\n",
       "
\n" ], "text/plain": [ "15:46:14.840 | \u001b[36mINFO\u001b[0m | Task run 'Widom Insertion: C28H16O10V2 + CO2 - MACE-MP(M)' - Optimizing gas molecule\n" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Selected GPU cuda:0 with 40301.98 MB free memory from 1 GPUs\n", "Using device: cuda:0\n", "Selected GPU cuda:0 with 40301.98 MB free memory from 1 GPUs\n", "Default dtype float32 does not match model dtype float64, converting models to float32.\n", "Using calculator: \n", "Using optimizer: \n", " Step Time Energy fmax\n", "FIRE2: 0 15:46:15 -22.777348 0.437851\n", "FIRE2: 1 15:46:17 -22.778408 0.014392\n" ] }, { "data": { "text/html": [ "
15:46:17.127 | INFO    | Task run 'OPT: CO2 - MACE-MP(M)' - Finished in state Completed()\n",
       "
\n" ], "text/plain": [ "15:46:17.127 | \u001b[36mINFO\u001b[0m | Task run 'OPT: CO2 - MACE-MP(M)' - Finished in state \u001b[32mCompleted\u001b[0m()\n" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Number of accessible positions: 357364 out of total 498623\n", "Selected GPU cuda:0 with 40280.80 MB free memory from 1 GPUs\n", "Using device: cuda:0\n", "Selected GPU cuda:0 with 40280.80 MB free memory from 1 GPUs\n", "Default dtype float32 does not match model dtype float64, converting models to float32.\n", "Using calculator: \n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "986f472b9f3f41f08f9b0f4f63a3e115", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Fold 1/3: 0%| | 0/5000 [00:0015:52:13.884 | INFO | Task run 'Widom Insertion: C28H16O10V2 + CO2 - MACE-MP(M)' - Finished in state Completed()\n", "\n" ], "text/plain": [ "15:52:13.884 | \u001b[36mINFO\u001b[0m | Task run 'Widom Insertion: C28H16O10V2 + CO2 - MACE-MP(M)' - Finished in state \u001b[32mCompleted\u001b[0m()\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
15:52:14.235 | INFO    | Flow run 'amigurumi-beagle' - Finished in state Completed()\n",
       "
\n" ], "text/plain": [ "15:52:14.235 | \u001b[36mINFO\u001b[0m | Flow run\u001b[35m 'amigurumi-beagle'\u001b[0m - Finished in state \u001b[32mCompleted\u001b[0m()\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "[2.3816888372250245e-06, 2.5323794093995965e-06, inf]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result = benchmark_one()\n", "result[0]['henry_coefficient']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Run workflow" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
11:40:36.644 | WARNING | MDAnalysis.coordinates.AMBER - netCDF4 is not available. Writing AMBER ncdf files will be slow.\n",
       "
\n" ], "text/plain": [ "11:40:36.644 | \u001b[38;5;184mWARNING\u001b[0m | MDAnalysis.coordinates.AMBER - netCDF4 is not available. Writing AMBER ncdf files will be slow.\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
11:40:44.431 | INFO    | distributed.http.proxy - To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy\n",
       "
\n" ], "text/plain": [ "11:40:44.431 | \u001b[36mINFO\u001b[0m | distributed.http.proxy - To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
11:40:44.445 | INFO    | distributed.scheduler - State start\n",
       "
\n" ], "text/plain": [ "11:40:44.445 | \u001b[36mINFO\u001b[0m | distributed.scheduler - State start\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
11:40:44.503 | INFO    | distributed.scheduler -   Scheduler at:  tcp://128.55.64.42:36351\n",
       "
\n" ], "text/plain": [ "11:40:44.503 | \u001b[36mINFO\u001b[0m | distributed.scheduler - Scheduler at: tcp://128.55.64.42:36351\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
11:40:44.505 | INFO    | distributed.scheduler -   dashboard at:  http://128.55.64.42:8787/status\n",
       "
\n" ], "text/plain": [ "11:40:44.505 | \u001b[36mINFO\u001b[0m | distributed.scheduler - dashboard at: \u001b[94mhttp://128.55.64.42:8787/status\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
11:40:44.506 | INFO    | distributed.scheduler - Registering Worker plugin shuffle\n",
       "
\n" ], "text/plain": [ "11:40:44.506 | \u001b[36mINFO\u001b[0m | distributed.scheduler - Registering Worker plugin shuffle\n" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "#!/bin/bash\n", "\n", "#SBATCH -A matgen\n", "#SBATCH --mem=0\n", "#SBATCH -t 00:30:00\n", "#SBATCH -J mof\n", "#SBATCH -q regular\n", "#SBATCH -N 1\n", "#SBATCH -C gpu\n", "#SBATCH -G 4\n", "source ~/.bashrc\n", "module load python\n", "source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena\n", "/pscratch/sd/c/cyrusyc/.conda/mlip-arena/bin/python -m distributed.cli.dask_worker tcp://128.55.64.42:36351 --name dummy-name --nthreads 1 --memory-limit 59.60GiB --nanny --death-timeout 60\n", "\n" ] }, { "data": { "text/html": [ "
11:40:44.514 | INFO    | distributed.deploy.adaptive - Adaptive scaling started: minimum=10 maximum=20\n",
       "
\n" ], "text/plain": [ "11:40:44.514 | \u001b[36mINFO\u001b[0m | distributed.deploy.adaptive - Adaptive scaling started: minimum=10 maximum=20\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
11:40:44.522 | INFO    | distributed.scheduler - Receive client connection: Client-a27a9a6e-c09c-11ef-8318-c77ccf4f19b4\n",
       "
\n" ], "text/plain": [ "11:40:44.522 | \u001b[36mINFO\u001b[0m | distributed.scheduler - Receive client connection: Client-a27a9a6e-c09c-11ef-8318-c77ccf4f19b4\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
11:40:44.523 | INFO    | distributed.core - Starting established connection to tcp://128.55.64.42:48148\n",
       "
\n" ], "text/plain": [ "11:40:44.523 | \u001b[36mINFO\u001b[0m | distributed.core - Starting established connection to tcp://128.55.64.42:48148\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
11:40:45.046 | INFO    | prefect.engine - Created flow run 'enormous-hog' for flow 'run'\n",
       "
\n" ], "text/plain": [ "11:40:45.046 | \u001b[36mINFO\u001b[0m | prefect.engine - Created flow run\u001b[35m 'enormous-hog'\u001b[0m for flow\u001b[1;35m 'run'\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
11:40:45.048 | INFO    | prefect.engine - View at https://app.prefect.cloud/account/f7d40474-9362-4bfa-8950-ee6a43ec00f3/workspace/d4bb0913-5f5e-49f7-bfc5-06509088baeb/runs/flow-run/c0c7a3f2-d8d0-4f17-9789-4e070f17bf3b\n",
       "
\n" ], "text/plain": [ "11:40:45.048 | \u001b[36mINFO\u001b[0m | prefect.engine - View at \u001b[94mhttps://app.prefect.cloud/account/f7d40474-9362-4bfa-8950-ee6a43ec00f3/workspace/d4bb0913-5f5e-49f7-bfc5-06509088baeb/runs/flow-run/c0c7a3f2-d8d0-4f17-9789-4e070f17bf3b\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
11:40:45.366 | INFO    | prefect.task_runner.dask - Connecting to existing Dask cluster SLURMCluster(00ac1d39, 'tcp://128.55.64.42:36351', workers=0, threads=0, memory=0 B)\n",
       "
\n" ], "text/plain": [ "11:40:45.366 | \u001b[36mINFO\u001b[0m | prefect.task_runner.dask - Connecting to existing Dask cluster SLURMCluster(00ac1d39, 'tcp://128.55.64.42:36351', workers=0, threads=0, memory=0 B)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
11:40:45.395 | INFO    | distributed.scheduler - Receive client connection: PrefectDaskClient-a2fe06b3-c09c-11ef-8318-c77ccf4f19b4\n",
       "
\n" ], "text/plain": [ "11:40:45.395 | \u001b[36mINFO\u001b[0m | distributed.scheduler - Receive client connection: PrefectDaskClient-a2fe06b3-c09c-11ef-8318-c77ccf4f19b4\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
11:40:45.401 | INFO    | distributed.core - Starting established connection to tcp://128.55.64.42:48168\n",
       "
\n" ], "text/plain": [ "11:40:45.401 | \u001b[36mINFO\u001b[0m | distributed.core - Starting established connection to tcp://128.55.64.42:48168\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c88bb75bf9b84285bfd6d524e7d73650", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
11:40:45.501 | INFO    | Task run 'get_atoms_from_db-6be' - Created task run 'get_atoms_from_db-6be' for task 'get_atoms_from_db'\n",
       "
\n" ], "text/plain": [ "11:40:45.501 | \u001b[36mINFO\u001b[0m | Task run 'get_atoms_from_db-6be' - Created task run 'get_atoms_from_db-6be' for task 'get_atoms_from_db'\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from dask.distributed import Client\n", "from dask_jobqueue import SLURMCluster\n", "from prefect_dask import DaskTaskRunner\n", "from mlip_arena.tasks.mof.flow import run as MOF\n", "\n", "# Orchestrate your awesome dask workflow runner\n", "\n", "nodes_per_alloc = 1\n", "gpus_per_alloc = 4\n", "ntasks = 1\n", "\n", "cluster_kwargs = dict(\n", " cores=1,\n", " memory=\"64 GB\",\n", " shebang=\"#!/bin/bash\",\n", " account=\"matgen\",\n", " walltime=\"00:30:00\",\n", " job_mem=\"0\",\n", " job_script_prologue=[\n", " \"source ~/.bashrc\",\n", " \"module load python\",\n", " \"source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena\",\n", " ],\n", " job_directives_skip=[\"-n\", \"--cpus-per-task\", \"-J\"],\n", " job_extra_directives=[\n", " \"-J mof\",\n", " \"-q regular\",\n", " f\"-N {nodes_per_alloc}\",\n", " \"-C gpu\",\n", " f\"-G {gpus_per_alloc}\",\n", " ],\n", ")\n", "\n", "cluster = SLURMCluster(**cluster_kwargs)\n", "print(cluster.job_script())\n", "cluster.adapt(minimum_jobs=10, maximum_jobs=20)\n", "client = Client(cluster)\n", "\n", "# Run the workflow on HPC cluster in parallel\n", "\n", "results = MOF.with_options(\n", " task_runner=DaskTaskRunner(address=client.scheduler.address),\n", " # log_prints=True,\n", ")()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "mlip-arena", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": {}, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 4 }