jannisborn
commited on
update
Browse files- app.py +2 -1
- model_cards/article.md +2 -1
- model_cards/description.md +24 -4
- model_cards/examples.csv +1 -1
- utils.py +8 -4
app.py
CHANGED
@@ -32,7 +32,8 @@ def run_inference(
|
|
32 |
model = MoLeR(configuration=config)
|
33 |
samples = list(model.sample(number_of_samples))
|
34 |
|
35 |
-
|
|
|
36 |
|
37 |
|
38 |
if __name__ == "__main__":
|
|
|
32 |
model = MoLeR(configuration=config)
|
33 |
samples = list(model.sample(number_of_samples))
|
34 |
|
35 |
+
seed_mols = [] if scaffolds == "" else scaffolds.split(".")
|
36 |
+
return draw_grid_generate(seed_mols, samples)
|
37 |
|
38 |
|
39 |
if __name__ == "__main__":
|
model_cards/article.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
#
|
2 |
|
3 |
## Parameters
|
4 |
|
@@ -6,6 +6,7 @@
|
|
6 |
Which model checkpoint to use (trained on different datasets).
|
7 |
|
8 |
### Scaffolds
|
|
|
9 |
|
10 |
### Number of samples:
|
11 |
How many samples should be generated (between 1 and 50).
|
|
|
1 |
+
# MoLeR -- Documentation
|
2 |
|
3 |
## Parameters
|
4 |
|
|
|
6 |
Which model checkpoint to use (trained on different datasets).
|
7 |
|
8 |
### Scaffolds
|
9 |
+
One or multiple scaffolds (or seed molecules), provided as '.'-separated SMILES. If empty, no scaffolds are used.
|
10 |
|
11 |
### Number of samples:
|
12 |
How many samples should be generated (between 1 and 50).
|
model_cards/description.md
CHANGED
@@ -1,7 +1,27 @@
|
|
1 |
|
2 |
-
|
3 |
|
4 |
-
|
5 |
-
This yields a dichotomous language model that seamlessly integrates property prediction with property-driven conditional generation. For details see the [arXiv preprint](https://arxiv.org/abs/2202.01338), the [development code](https://github.com/IBM/regression-transformer) and the [GT4SD endpoint](https://github.com/GT4SD/gt4sd-core) for inference.
|
6 |
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
2 |
+
# MoLeR (MOlecule-LEvel Representation)
|
3 |
|
4 |
+
<img src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="800">
|
|
|
5 |
|
6 |
+
### Model card
|
7 |
+
|
8 |
+
*Model Details*: MoLeR is a graph-based molecular generative model that can be conditioned (primed) on scaffolds. The model decorates scaffolds with realistic structural motifs.
|
9 |
+
*Developers*: Krzysztof Maziarz and co-authors from Microsoft Research and Novartis (full reference at bottom).
|
10 |
+
*Distributors*: Developer's code wrapped and distributed by GT4SD Team (2023) from IBM Research.
|
11 |
+
*Model date*: Released around March 2022.
|
12 |
+
*Model version*: Model provided by original authors, see:
|
13 |
+
*Model type*: An encoder-decoder-based GNN for molecular generation.
|
14 |
+
*Information about training algorithms, parameters, fairness constraints or other applied approaches, and features*: Trained by the original authors with the default parameters provided [on GitHub](https://github.com/microsoft/molecule-generation).
|
15 |
+
*Paper or other resource for more information*: Learning to Extend Molecular Scaffolds with Structural Motifs (ICLR 2022).
|
16 |
+
*License*: MIT
|
17 |
+
*Where to send questions or comments about the model*: Open an issue on original author's [GitHub repository](https://github.com/microsoft/molecule-generation).
|
18 |
+
*Intended Use. Use cases that were envisioned during development*: Chemical research, in particular drug discovery.
|
19 |
+
*Primary intended uses/users*: Researchers and computational chemists using the model for model comparison or research exploration purposes.
|
20 |
+
*Out-of-scope use cases*: Production-level inference, producing molecules with harmful properties.
|
21 |
+
*Factors*: Not applicable.
|
22 |
+
*Metrics*: Validation loss on decoding correct molecules. Evaluated on several downstream tasks.
|
23 |
+
*Datasets*: 1.5M drug-like molecules from GuacaMol benchmark. Finetuning on 20 molecular optimization tasks from GuacaMol.
|
24 |
+
*Ethical Considerations*: Unclear, please consult with original authors in case of questions.
|
25 |
+
*Caveats and Recommendations*: Unclear, please consult with original authors in case of questions.
|
26 |
+
|
27 |
+
Model card prototype inspired by [*Mitchell et al. (2019), Proceedings of the Conference on Fairness, Accountability, and Transparency*](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)
|
model_cards/examples.csv
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
v0,,1,4,0
|
2 |
v0,CC(=O)NC1=NC2=CC(OCC3=CC=CN(CC4=CC=C(Cl)C=C4)C3=O)=CC=C2N1,1,10,0
|
3 |
-
v0,C12C=CC=NN1C(C#CC1=C(C)C=CC3C(NC4=CC(C(F)(F)F)=CC=C4)=NOC1=3)=CN=2,3,5,5
|
4 |
|
5 |
|
|
|
1 |
v0,,1,4,0
|
2 |
v0,CC(=O)NC1=NC2=CC(OCC3=CC=CN(CC4=CC=C(Cl)C=C4)C3=O)=CC=C2N1,1,10,0
|
3 |
+
v0,C12C=CC=NN1C(C#CC1=C(C)C=CC3C(NC4=CC(C(F)(F)F)=CC=C4)=NOC1=3)=CN=2.CCO,3,5,5
|
4 |
|
5 |
|
utils.py
CHANGED
@@ -14,8 +14,9 @@ logger.addHandler(logging.NullHandler())
|
|
14 |
|
15 |
|
16 |
def draw_grid_generate(
|
17 |
-
|
18 |
-
|
|
|
19 |
size=(140, 200),
|
20 |
) -> str:
|
21 |
"""
|
@@ -32,11 +33,14 @@ def draw_grid_generate(
|
|
32 |
|
33 |
result = defaultdict(list)
|
34 |
result.update(
|
35 |
-
{
|
|
|
|
|
|
|
|
|
36 |
)
|
37 |
|
38 |
result_df = pd.DataFrame(result)
|
39 |
-
print("RESTULT", result_df)
|
40 |
obj = mols2grid.display(
|
41 |
result_df,
|
42 |
tooltip=list(result.keys()),
|
|
|
14 |
|
15 |
|
16 |
def draw_grid_generate(
|
17 |
+
seeds: List[str],
|
18 |
+
samples: List[str],
|
19 |
+
n_cols: int = 3,
|
20 |
size=(140, 200),
|
21 |
) -> str:
|
22 |
"""
|
|
|
33 |
|
34 |
result = defaultdict(list)
|
35 |
result.update(
|
36 |
+
{
|
37 |
+
"SMILES": seeds + samples,
|
38 |
+
"Name": [f"Seed_{i}" for i in range(len(seeds))]
|
39 |
+
+ [f"Generated_{i}" for i in range(len(samples))],
|
40 |
+
},
|
41 |
)
|
42 |
|
43 |
result_df = pd.DataFrame(result)
|
|
|
44 |
obj = mols2grid.display(
|
45 |
result_df,
|
46 |
tooltip=list(result.keys()),
|