BioMike commited on
Commit
018e5d9
1 Parent(s): b605b7a

Delete utils

Browse files
Files changed (4) hide show
  1. utils/__init__.py +0 -3
  2. utils/login.py +0 -18
  3. utils/main_model.py +0 -52
  4. utils/rdkit_utils.py +0 -39
utils/__init__.py DELETED
@@ -1,3 +0,0 @@
1
- from .main_model import ChemicalConverter
2
- from .rdkit_utils import validate_smiles2iupac, plot_mol
3
- from .login import login
 
 
 
 
utils/login.py DELETED
@@ -1,18 +0,0 @@
1
- import hashlib
2
-
3
- def hash_password(access_code):
4
- """Hash a password for storing."""
5
- sha256 = hashlib.sha256()
6
- sha256.update(access_code.encode('utf-8'))
7
- return sha256.hexdigest()
8
-
9
-
10
- def login(access_code):
11
- if access_code is None:
12
- return False
13
- hashed_code = hash_password(access_code.replace(" ", ""))
14
- with open("hashed_codes.txt") as file:
15
- for line in file:
16
- if hashed_code in line:
17
- return True
18
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/main_model.py DELETED
@@ -1,52 +0,0 @@
1
- from modeling import MT5ForConditionalGeneration
2
- from transformers import AutoTokenizer
3
- import os
4
-
5
-
6
- class ChemicalConverter:
7
- def __init__(self, mode: str):
8
- self.mode = mode
9
- model_directory = os.path.abspath("models")
10
- model_path = os.path.join(model_directory, mode)
11
-
12
- if mode == "SMILES2IUPAC":
13
- model_path = "knowledgator/SMILES2IUPAC-canonical-base"
14
- else:
15
- model_path = "knowledgator/IUPAC2SMILES-canonical-small"
16
-
17
- self.model = MT5ForConditionalGeneration.from_pretrained(model_path)
18
- self.smiles_tokenizer = AutoTokenizer.from_pretrained("knowledgator/SMILES-FAST-TOKENIZER")
19
- self.iupac_tokenizer = AutoTokenizer.from_pretrained("knowledgator/IUPAC-FAST-TOKENIZER")
20
- self.smiles_max_len = 128
21
- self.iupac_max_len = 156
22
-
23
- def convert(self, input):
24
- input = input.replace(" ", "")
25
- if self.mode == "SMILES2IUPAC":
26
- tokenizer = self.smiles_tokenizer
27
- reverse_tokenizer = self.iupac_tokenizer
28
- max_length = self.smiles_max_len
29
- else:
30
- tokenizer = self.iupac_tokenizer
31
- reverse_tokenizer = self.smiles_tokenizer
32
- max_length = self.iupac_max_len
33
-
34
- encoding = tokenizer(input,
35
- return_tensors='pt',
36
- padding="max_length",
37
- truncation=True,
38
- max_length=max_length)
39
- # Move the input tensor to GPU
40
- encoding = {key: value.to(self.model.device) for key, value in encoding.items()}
41
-
42
- # Generate names
43
- output = self.model.generate(input_ids=encoding['input_ids'],
44
- attention_mask=encoding['attention_mask'],
45
- max_new_tokens=156,
46
- num_beams=1,
47
- num_return_sequences=1)
48
-
49
- # Decode names
50
- output = [reverse_tokenizer.decode(ids, skip_special_tokens=True) for ids in output]
51
-
52
- return output[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/rdkit_utils.py DELETED
@@ -1,39 +0,0 @@
1
- from rdkit import DataStructs, Chem
2
- from rdkit.Chem import AllChem
3
- from rdkit.Chem import Draw
4
- from PIL import Image
5
- import io
6
- from .main_model import ChemicalConverter
7
-
8
- def validate_smiles2iupac(input_smiles, predicted_iupac):
9
- converter = ChemicalConverter(mode="IUPAC2SMILES")
10
- predicted_smiles = converter.convert(predicted_iupac)
11
-
12
- ms = [Chem.MolFromSmiles(input_smiles), Chem.MolFromSmiles(predicted_smiles[6:])]
13
-
14
- if None in ms:
15
- return None
16
-
17
- fpgen = AllChem.GetRDKitFPGenerator()
18
- fps = [fpgen.GetFingerprint(x) for x in ms]
19
-
20
- return DataStructs.TanimotoSimilarity(fps[0], fps[1])
21
-
22
- def plot_mol(smiles):
23
- # Convert the SMILES string to an RDKit molecule object
24
- mol = Chem.MolFromSmiles(smiles)
25
-
26
- # Use RDKit to draw the molecule to an image, with original intended size
27
- img = Draw.MolToImage(mol, size=(185, 185))
28
-
29
- # Create a new, blank image with the desired final size (800x190 pixels) with a white background
30
- final_img = Image.new('RGB', (890, 185), 'white')
31
-
32
- # Calculate the position to paste the original image onto the blank image to keep it centered
33
- left = (890 - 185) // 2
34
- top = (185 - 185) // 2 # This will be zero in this case but included for clarity
35
-
36
- # Paste the original image onto the blank image
37
- final_img.paste(img, (left, top))
38
-
39
- return final_img