Spaces:
Build error
Build error
File size: 3,306 Bytes
f274d93 77cbf82 f274d93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
from langchain.agents import Tool, tool
import requests
from langchain import OpenAI
from langchain import LLMMathChain, SerpAPIWrapper
import os
from rdkit import Chem
@tool
def query2smiles(text):
'''This function queries the one given molecule name and returns a SMILES string from the record'''
try:#query the PubChem database
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/' + text + '/property/IsomericSMILES/JSON')
#convert the response to a json object
data = r.json()
#return the SMILES string
smi = data['PropertyTable']['Properties'][0]['IsomericSMILES']
# remove salts
return smi
except:
f"Could not find the IUPAC name for {text}"
@tool
def smiles2IUPAC(text):
'''This function queries the one given smiles name and returns a IUPAC name from the record'''
#query the PubChem database
try:
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/' + text + '/property/IUPACName/JSON')
data = r.json()
smi = data["PropertyTable"]["Properties"][0]["IUPACName"]
return smi
except:
return f"Could not find the IUPAC name for {text}"
@tool
def formula2IUPAC(text):
'''This function queries the one given chemical formula and returns a material name from the record.'''
try:
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/formula/' + text + '/property/IUPACName/JSON')
data = r.json()
print(data)
smi = data["PropertyTable"]["Properties"][0]["IUPACName"]
return smi
except:
return f"Could not find the IUPAC name for {text}"
@tool
def name2formula(text):
'''This function queries the one given material name and returns a chemical formula from the record.'''
try:
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/' + text + '/property/MolecularFormula/JSON')
data = r.json()
print(data)
smi = data["PropertyTable"]["Properties"][0]["MolecularFormula"]
return smi
except:
return f"Could not find the molecular formula for {text}"
@tool
def canonicalizeSMILES(smiles):
'''Given a smiles representation, this function returns a canonicalized version of the same smiles.
It's better to search for molecules in its canonicalized form'''
return Chem.MolToSmiles(Chem.MolFromSmiles(smiles))
@tool
def web_search(keywords, search_engine="google"):
'''Useful to do a simple google search.
Use this tool to find general information from websites.
Use keywords for your search.
'''
return SerpAPIWrapper(
serpapi_api_key=os.getenv("SERP_API_KEY"),
search_engine=search_engine
).run(keywords)
@tool
def LLM_predict(prompt):
''' This function receives a prompt generate with context by the create_context_prompt tool and request a completion to a language model. Then returns the completion'''
llm = OpenAI(
model_name='text-ada-001', #TODO: Maybe change to gpt-4 when ready
temperature=0.7,
n=1,
best_of=5,
top_p=1.0,
stop=["\n\n", "###", "#", "##"],
# model_kwargs=kwargs,
)
return llm.generate([prompt]).generations[0][0].text
common_tools = [
query2smiles,
smiles2IUPAC,
# formula2IUPAC,
# name2formula,
canonicalizeSMILES,
web_search,
LLM_predict
] |