ipd commited on
Commit
45bbb54
1 Parent(s): d135347

update app.py

Browse files
Files changed (2) hide show
  1. app.py +21 -13
  2. models/fm4m.py +14 -7
app.py CHANGED
@@ -79,7 +79,7 @@ smiles_image_mapping = {
79
  "Mol 5": {"smiles": "C=CCS[C@@H](C)CC(=O)OCC", "image": "img/img5.png"} # Example SMILES for chloroethane
80
  }
81
 
82
- datasets = ["","BACE", "ESOL", "Custom Dataset"]
83
 
84
  models_enabled = ["SELFIES-TED", "MHG-GED", "MolFormer", "SMI-TED"]
85
 
@@ -115,12 +115,26 @@ def evaluate_and_log(models, dataset, task_type, eval_output):
115
  return log_df
116
 
117
 
118
- log_df = pd.read_csv('log.csv', index_col=0)
 
 
 
 
 
 
 
 
 
 
119
 
120
 
121
  # Load images for selection
122
  def load_image(path):
123
- return Image.open(smiles_image_mapping[path]["image"])# Image.1open(path)
 
 
 
 
124
 
125
 
126
  # Function to handle image selection
@@ -199,6 +213,7 @@ def generate_canonical(smiles):
199
  latent_vec, mask = encode([selfie])
200
  gen_mol = None
201
  for i in range(5, 51):
 
202
  noise = i / 10
203
  perturbed_latent = perturb_latent(latent_vec, noise_scale=noise)
204
  gen = generate(perturbed_latent, mask)
@@ -207,6 +222,7 @@ def generate_canonical(smiles):
207
 
208
  if gen_mol:
209
  # Calculate properties for ref and gen molecules
 
210
  ref_properties = calculate_properties(smiles)
211
  gen_properties = calculate_properties(gen_mol)
212
  tanimoto_similarity = calculate_tanimoto(smiles, gen_mol)
@@ -221,6 +237,7 @@ def generate_canonical(smiles):
221
  df = pd.DataFrame(data)
222
 
223
  # Display molecule image of canonical smiles
 
224
  mol_image = smiles_to_image(gen_mol)
225
 
226
  return df, gen_mol, mol_image
@@ -393,6 +410,7 @@ def display_plot(plot_type):
393
 
394
  # Predefined dataset paths (these should be adjusted to your file paths)
395
  predefined_datasets = {
 
396
  "BACE": f"./data/bace/train.csv, ./data/bace/test.csv, smiles, Class",
397
  "ESOL": f"./data/esol/train.csv, ./data/esol/test.csv, smiles, prop",
398
  }
@@ -426,16 +444,6 @@ def handle_dataset_selection(selected_dataset):
426
  return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(
427
  visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
428
  else:
429
- #[dataset_name, train_file, train_display, test_file, test_display, predefined_display,
430
- # input_column_selector, output_column_selector]
431
-
432
-
433
-
434
- # Load the predefined dataset from its local path
435
- #return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(
436
- # visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
437
- #return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(
438
- # visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
439
  return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(
440
  visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
441
 
 
79
  "Mol 5": {"smiles": "C=CCS[C@@H](C)CC(=O)OCC", "image": "img/img5.png"} # Example SMILES for chloroethane
80
  }
81
 
82
+ datasets = [" ", "BACE", "ESOL", "Load Custom Dataset"]
83
 
84
  models_enabled = ["SELFIES-TED", "MHG-GED", "MolFormer", "SMI-TED"]
85
 
 
115
  return log_df
116
 
117
 
118
+ try:
119
+ log_df = pd.read_csv('log.csv', index_col=0)
120
+ except:
121
+ log_df = pd.DataFrame({"":[],
122
+ 'Selected Models': [],
123
+ 'Dataset': [],
124
+ 'Task': [],
125
+ 'Result': []
126
+ })
127
+ csv_file_path = 'log.csv'
128
+ log_df.to_csv(csv_file_path, index=False)
129
 
130
 
131
  # Load images for selection
132
  def load_image(path):
133
+ try:
134
+ return Image.open(smiles_image_mapping[path]["image"])# Image.1open(path)
135
+ except:
136
+ pass
137
+
138
 
139
 
140
  # Function to handle image selection
 
213
  latent_vec, mask = encode([selfie])
214
  gen_mol = None
215
  for i in range(5, 51):
216
+ print("Searching Latent space")
217
  noise = i / 10
218
  perturbed_latent = perturb_latent(latent_vec, noise_scale=noise)
219
  gen = generate(perturbed_latent, mask)
 
222
 
223
  if gen_mol:
224
  # Calculate properties for ref and gen molecules
225
+ print("calculating properties")
226
  ref_properties = calculate_properties(smiles)
227
  gen_properties = calculate_properties(gen_mol)
228
  tanimoto_similarity = calculate_tanimoto(smiles, gen_mol)
 
237
  df = pd.DataFrame(data)
238
 
239
  # Display molecule image of canonical smiles
240
+ print("Getting image")
241
  mol_image = smiles_to_image(gen_mol)
242
 
243
  return df, gen_mol, mol_image
 
410
 
411
  # Predefined dataset paths (these should be adjusted to your file paths)
412
  predefined_datasets = {
413
+ " ": " ",
414
  "BACE": f"./data/bace/train.csv, ./data/bace/test.csv, smiles, Class",
415
  "ESOL": f"./data/esol/train.csv, ./data/esol/test.csv, smiles, prop",
416
  }
 
444
  return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(
445
  visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
446
  else:
 
 
 
 
 
 
 
 
 
 
447
  return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(
448
  visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
449
 
models/fm4m.py CHANGED
@@ -308,7 +308,8 @@ def single_modal(model,dataset, downstream_model,params):
308
  verbose=False)
309
  n_samples = np.minimum(1000, len(x_batch))
310
  features_umap = reducer.fit_transform(x_batch[:n_samples])
311
- x = y_batch.values[:n_samples]
 
312
  index_0 = [index for index in range(len(x)) if x[index] == 0]
313
  index_1 = [index for index in range(len(x)) if x[index] == 1]
314
 
@@ -340,7 +341,8 @@ def single_modal(model,dataset, downstream_model,params):
340
  reducer = umap.UMAP(metric='euclidean', n_neighbors= 10, n_components=2, low_memory=True, min_dist=0.1, verbose=False)
341
  n_samples = np.minimum(1000,len(x_batch))
342
  features_umap = reducer.fit_transform(x_batch[:n_samples])
343
- x = y_batch.values[:n_samples]
 
344
  index_0 = [index for index in range(len(x)) if x[index] == 0]
345
  index_1 = [index for index in range(len(x)) if x[index] == 1]
346
 
@@ -371,7 +373,8 @@ def single_modal(model,dataset, downstream_model,params):
371
  verbose=False)
372
  n_samples = np.minimum(1000, len(x_batch))
373
  features_umap = reducer.fit_transform(x_batch[:n_samples])
374
- x = y_batch.values[:n_samples]
 
375
  #index_0 = [index for index in range(len(x)) if x[index] == 0]
376
  #index_1 = [index for index in range(len(x)) if x[index] == 1]
377
 
@@ -398,7 +401,8 @@ def single_modal(model,dataset, downstream_model,params):
398
  verbose=False)
399
  n_samples = np.minimum(1000, len(x_batch))
400
  features_umap = reducer.fit_transform(x_batch[:n_samples])
401
- x = y_batch.values[:n_samples]
 
402
  # index_0 = [index for index in range(len(x)) if x[index] == 0]
403
  # index_1 = [index for index in range(len(x)) if x[index] == 1]
404
 
@@ -426,7 +430,8 @@ def single_modal(model,dataset, downstream_model,params):
426
  verbose=False)
427
  n_samples = np.minimum(1000, len(x_batch))
428
  features_umap = reducer.fit_transform(x_batch[:n_samples])
429
- x = y_batch.values[:n_samples]
 
430
  # index_0 = [index for index in range(len(x)) if x[index] == 0]
431
  # index_1 = [index for index in range(len(x)) if x[index] == 1]
432
 
@@ -454,7 +459,8 @@ def single_modal(model,dataset, downstream_model,params):
454
  verbose=False)
455
  n_samples = np.minimum(1000, len(x_batch))
456
  features_umap = reducer.fit_transform(x_batch[:n_samples])
457
- x = y_batch.values[:n_samples]
 
458
  # index_0 = [index for index in range(len(x)) if x[index] == 0]
459
  # index_1 = [index for index in range(len(x)) if x[index] == 1]
460
 
@@ -546,7 +552,8 @@ def multi_modal(model_list,dataset, downstream_model,params):
546
  features_umap = reducer.fit_transform(x_batch[:n_samples])
547
 
548
  if "Classifier" in downstream_model:
549
- x = y_batch.values[:n_samples]
 
550
  index_0 = [index for index in range(len(x)) if x[index] == 0]
551
  index_1 = [index for index in range(len(x)) if x[index] == 1]
552
 
 
308
  verbose=False)
309
  n_samples = np.minimum(1000, len(x_batch))
310
  features_umap = reducer.fit_transform(x_batch[:n_samples])
311
+ try:x = y_batch.values[:n_samples]
312
+ except:x = y_batch[:n_samples]
313
  index_0 = [index for index in range(len(x)) if x[index] == 0]
314
  index_1 = [index for index in range(len(x)) if x[index] == 1]
315
 
 
341
  reducer = umap.UMAP(metric='euclidean', n_neighbors= 10, n_components=2, low_memory=True, min_dist=0.1, verbose=False)
342
  n_samples = np.minimum(1000,len(x_batch))
343
  features_umap = reducer.fit_transform(x_batch[:n_samples])
344
+ try:x = y_batch.values[:n_samples]
345
+ except:x = y_batch[:n_samples]
346
  index_0 = [index for index in range(len(x)) if x[index] == 0]
347
  index_1 = [index for index in range(len(x)) if x[index] == 1]
348
 
 
373
  verbose=False)
374
  n_samples = np.minimum(1000, len(x_batch))
375
  features_umap = reducer.fit_transform(x_batch[:n_samples])
376
+ try:x = y_batch.values[:n_samples]
377
+ except:x = y_batch[:n_samples]
378
  #index_0 = [index for index in range(len(x)) if x[index] == 0]
379
  #index_1 = [index for index in range(len(x)) if x[index] == 1]
380
 
 
401
  verbose=False)
402
  n_samples = np.minimum(1000, len(x_batch))
403
  features_umap = reducer.fit_transform(x_batch[:n_samples])
404
+ try:x = y_batch.values[:n_samples]
405
+ except:x = y_batch[:n_samples]
406
  # index_0 = [index for index in range(len(x)) if x[index] == 0]
407
  # index_1 = [index for index in range(len(x)) if x[index] == 1]
408
 
 
430
  verbose=False)
431
  n_samples = np.minimum(1000, len(x_batch))
432
  features_umap = reducer.fit_transform(x_batch[:n_samples])
433
+ try:x = y_batch.values[:n_samples]
434
+ except:x = y_batch[:n_samples]
435
  # index_0 = [index for index in range(len(x)) if x[index] == 0]
436
  # index_1 = [index for index in range(len(x)) if x[index] == 1]
437
 
 
459
  verbose=False)
460
  n_samples = np.minimum(1000, len(x_batch))
461
  features_umap = reducer.fit_transform(x_batch[:n_samples])
462
+ try:x = y_batch.values[:n_samples]
463
+ except:x = y_batch[:n_samples]
464
  # index_0 = [index for index in range(len(x)) if x[index] == 0]
465
  # index_1 = [index for index in range(len(x)) if x[index] == 1]
466
 
 
552
  features_umap = reducer.fit_transform(x_batch[:n_samples])
553
 
554
  if "Classifier" in downstream_model:
555
+ try:x = y_batch.values[:n_samples]
556
+ except: x = y_batch[:n_samples]
557
  index_0 = [index for index in range(len(x)) if x[index] == 0]
558
  index_1 = [index for index in range(len(x)) if x[index] == 1]
559