jannisborn commited on
Commit
779b3f2
·
unverified ·
1 Parent(s): 82457c0
app.py CHANGED
@@ -32,7 +32,8 @@ def run_inference(
32
  model = MoLeR(configuration=config)
33
  samples = list(model.sample(number_of_samples))
34
 
35
- return draw_grid_generate(samples)
 
36
 
37
 
38
  if __name__ == "__main__":
 
32
  model = MoLeR(configuration=config)
33
  samples = list(model.sample(number_of_samples))
34
 
35
+ seed_mols = [] if scaffolds == "" else scaffolds.split(".")
36
+ return draw_grid_generate(seed_mols, samples)
37
 
38
 
39
  if __name__ == "__main__":
model_cards/article.md CHANGED
@@ -1,4 +1,4 @@
1
- # Usage instructions -- MoLeR
2
 
3
  ## Parameters
4
 
@@ -6,6 +6,7 @@
6
  Which model checkpoint to use (trained on different datasets).
7
 
8
  ### Scaffolds
 
9
 
10
  ### Number of samples:
11
  How many samples should be generated (between 1 and 50).
 
1
+ # MoLeR -- Documentation
2
 
3
  ## Parameters
4
 
 
6
  Which model checkpoint to use (trained on different datasets).
7
 
8
  ### Scaffolds
9
+ One or multiple scaffolds (or seed molecules), provided as '.'-separated SMILES. If empty, no scaffolds are used.
10
 
11
  ### Number of samples:
12
  How many samples should be generated (between 1 and 50).
model_cards/description.md CHANGED
@@ -1,7 +1,27 @@
1
 
2
- ### Concurrent sequence regression and generation for molecular language modeling
3
 
4
- The RT is a multitask Transformer that reformulates regression as a conditional sequence modeling task.
5
- This yields a dichotomous language model that seamlessly integrates property prediction with property-driven conditional generation. For details see the [arXiv preprint](https://arxiv.org/abs/2202.01338), the [development code](https://github.com/IBM/regression-transformer) and the [GT4SD endpoint](https://github.com/GT4SD/gt4sd-core) for inference.
6
 
7
- Each `algorithm_version` refers to one trained model. Each model can be used for **two tasks**, either to *predict* one (or multiple) properties of a molecule or to *generate* a molecule (given a seed molecule and a property constraint).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
+ # MoLeR (MOlecule-LEvel Representation)
3
 
4
+ <img src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="800">
 
5
 
6
+ ### Model card
7
+
8
+ *Model Details*: MoLeR is a graph-based molecular generative model that can be conditioned (primed) on scaffolds. The model decorates scaffolds with realistic structural motifs.
9
+ *Developers*: Krzysztof Maziarz and co-authors from Microsoft Research and Novartis (full reference at bottom).
10
+ *Distributors*: Developer's code wrapped and distributed by GT4SD Team (2023) from IBM Research.
11
+ *Model date*: Released around March 2022.
12
+ *Model version*: Model provided by original authors, see:
13
+ *Model type*: An encoder-decoder-based GNN for molecular generation.
14
+ *Information about training algorithms, parameters, fairness constraints or other applied approaches, and features*: Trained by the original authors with the default parameters provided [on GitHub](https://github.com/microsoft/molecule-generation).
15
+ *Paper or other resource for more information*: Learning to Extend Molecular Scaffolds with Structural Motifs (ICLR 2022).
16
+ *License*: MIT
17
+ *Where to send questions or comments about the model*: Open an issue on original author's [GitHub repository](https://github.com/microsoft/molecule-generation).
18
+ *Intended Use. Use cases that were envisioned during development*: Chemical research, in particular drug discovery.
19
+ *Primary intended uses/users*: Researchers and computational chemists using the model for model comparison or research exploration purposes.
20
+ *Out-of-scope use cases*: Production-level inference, producing molecules with harmful properties.
21
+ *Factors*: Not applicable.
22
+ *Metrics*: Validation loss on decoding correct molecules. Evaluated on several downstream tasks.
23
+ *Datasets*: 1.5M drug-like molecules from GuacaMol benchmark. Finetuning on 20 molecular optimization tasks from GuacaMol.
24
+ *Ethical Considerations*: Unclear, please consult with original authors in case of questions.
25
+ *Caveats and Recommendations*: Unclear, please consult with original authors in case of questions.
26
+
27
+ Model card prototype inspired by [*Mitchell et al. (2019), Proceedings of the Conference on Fairness, Accountability, and Transparency*](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)
model_cards/examples.csv CHANGED
@@ -1,5 +1,5 @@
1
  v0,,1,4,0
2
  v0,CC(=O)NC1=NC2=CC(OCC3=CC=CN(CC4=CC=C(Cl)C=C4)C3=O)=CC=C2N1,1,10,0
3
- v0,C12C=CC=NN1C(C#CC1=C(C)C=CC3C(NC4=CC(C(F)(F)F)=CC=C4)=NOC1=3)=CN=2,3,5,5
4
 
5
 
 
1
  v0,,1,4,0
2
  v0,CC(=O)NC1=NC2=CC(OCC3=CC=CN(CC4=CC=C(Cl)C=C4)C3=O)=CC=C2N1,1,10,0
3
+ v0,C12C=CC=NN1C(C#CC1=C(C)C=CC3C(NC4=CC(C(F)(F)F)=CC=C4)=NOC1=3)=CN=2.CCO,3,5,5
4
 
5
 
utils.py CHANGED
@@ -14,8 +14,9 @@ logger.addHandler(logging.NullHandler())
14
 
15
 
16
  def draw_grid_generate(
17
- samples: List[Tuple[str]],
18
- n_cols: int = 5,
 
19
  size=(140, 200),
20
  ) -> str:
21
  """
@@ -32,11 +33,14 @@ def draw_grid_generate(
32
 
33
  result = defaultdict(list)
34
  result.update(
35
- {"SMILES": samples, "Name": [f"sample_{i}" for i in range(len(samples))]}
 
 
 
 
36
  )
37
 
38
  result_df = pd.DataFrame(result)
39
- print("RESTULT", result_df)
40
  obj = mols2grid.display(
41
  result_df,
42
  tooltip=list(result.keys()),
 
14
 
15
 
16
  def draw_grid_generate(
17
+ seeds: List[str],
18
+ samples: List[str],
19
+ n_cols: int = 3,
20
  size=(140, 200),
21
  ) -> str:
22
  """
 
33
 
34
  result = defaultdict(list)
35
  result.update(
36
+ {
37
+ "SMILES": seeds + samples,
38
+ "Name": [f"Seed_{i}" for i in range(len(seeds))]
39
+ + [f"Generated_{i}" for i in range(len(samples))],
40
+ },
41
  )
42
 
43
  result_df = pd.DataFrame(result)
 
44
  obj = mols2grid.display(
45
  result_df,
46
  tooltip=list(result.keys()),