from mp_api.client import MPRester from emmet.core.summary import HasProps import openai import langchain from langchain import OpenAI from langchain import agents from langchain.agents import initialize_agent from langchain.agents import Tool, tool from langchain import LLMMathChain, SerpAPIWrapper from gpt_index import GPTListIndex, GPTIndexMemory from langchain import SerpAPIWrapper from langchain.prompts.few_shot import FewShotPromptTemplate from langchain.prompts.prompt import PromptTemplate from langchain.vectorstores import FAISS, Chroma from langchain.embeddings import OpenAIEmbeddings from langchain.prompts.example_selector import (MaxMarginalRelevanceExampleSelector, SemanticSimilarityExampleSelector) import requests from rdkit import Chem import pandas as pd import os class MAPITools: def __init__(self): self.model = 'text-ada-001' #maybe change to gpt-4 when ready self.k=10 def get_material_atoms(self, formula): '''Receives a material formula and returns the atoms symbols present in it separated by comma.''' import re pattern = re.compile(r"([A-Z][a-z]*)(\d*)") matches = pattern.findall(formula) atoms = [] for m in matches: atom, count = m count = int(count) if count else 1 atoms.append((atom, count)) return ",".join([a[0] for a in atoms]) def check_prop_by_formula(self, formula): raise NotImplementedError('Should be implemented in children classes') def search_similars_by_atom(self, atoms): '''This function receives a string with the atoms separated by comma as input and returns a list of similar materials''' atoms = atoms.replace(" ", "") with MPRester(os.getenv("MAPI_API_KEY")) as mpr: docs = mpr.summary.search(elements=atoms.split(','), fields=["formula_pretty", self.prop]) return docs def create_context_prompt(self, formula): raise NotImplementedError('Should be implemented in children classes') def LLM_predict(self, prompt): ''' This function receives a prompt generate with context by the create_context_prompt tool and request a completion to a language model. Then returns the completion''' llm = OpenAI( model_name=self.model, temperature=0.7, n=1, best_of=5, top_p=1.0, stop=["\n\n", "###", "#", "##"], # model_kwargs=kwargs, ) return llm.generate([prompt]).generations[0][0].text def get_tools(self): return [ Tool( name = "Get atoms in material", func = self.get_material_atoms, description = ( "Receives a material formula and returns the atoms symbols present in it separated by comma." ) ), Tool( name = f"Checks if material is {self.prop_name} by formula", func = self.check_prop_by_formula, description = ( f"This functions searches in the material project's API for the formula and returns if it is {self.prop_name} or not." ) ), # Tool( # name = "Search similar materials by atom", # func = self.search_similars_by_atom, # description = ( # "This function receives a string with the atoms separated by comma as input and returns a list of similar materials." # ) # ), Tool( name = f"Create {self.prop_name} context to LLM search", func = self.create_context_prompt, description = ( f"This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict if the material is {self.prop_name}." if isinstance(self, MAPI_class_tools) else f"This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict the {self.prop_name} of a material." ) ), Tool(name = "LLM predictiom", func = self.LLM_predict, description = ( "This function receives a prompt generate with context by the create_context_prompt tool and request a completion to a language model. Then returns the completion" ) ) ] class MAPI_class_tools(MAPITools): def __init__(self, prop, prop_name, p_label, n_label): super().__init__() self.prop = prop self.prop_name = prop_name self.p_label = p_label self.n_label = n_label def check_prop_by_formula(self, formula): f''' This functions searches in the material project's API for the formula and returns if it is {self.prop_name} or not''' with MPRester(os.getenv("MAPI_API_KEY")) as mpr: docs = mpr.summary.search(formula=formula, fields=["formula_pretty", self.prop]) if docs: if docs[0].formula_pretty == formula: return self.p_label if docs[0].dict()[self.prop] else self.n_label return f"Could not find any material while searching {formula}" def create_context_prompt(self, formula): '''This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict if the formula is a stable material ''' elements = self.get_material_atoms(formula) similars = self.search_similars_by_atom(elements) similars = [ {'formula': ex.formula_pretty, 'prop': self.p_label if ex.dict()[self.prop] else self.n_label } for ex in similars ] examples = pd.DataFrame(similars).drop_duplicates().to_dict(orient="records") example_selector = MaxMarginalRelevanceExampleSelector.from_examples( examples, OpenAIEmbeddings(), FAISS, k=self.k, ) prefix=( f'You are a bot who can predict if a material is {self.prop_name}.\n' f'Given this list of known materials and the information if they are {self.p_label} or {self.n_label}, \n' f'you need to answer the question if the last material is {self.prop_name}:' ) prompt_template=PromptTemplate( input_variables=["formula", "prop"], template=f"Is {{formula}} a {self.prop_name} material?@@@\n{{prop}}###", ) suffix = f"Is {{formula}} a {self.prop_name} material?@@@\n" prompt = FewShotPromptTemplate( # examples=examples, example_prompt=prompt_template, example_selector=example_selector, prefix=prefix, suffix=suffix, input_variables=["formula"]) return prompt.format(formula=formula) class MAPI_reg_tools(MAPITools): # TODO: deal with units def __init__(self, prop, prop_name): super().__init__() self.prop = prop self.prop_name = prop_name def check_prop_by_formula(self, formula): ''' This functions searches in the material project's API for the formula and returns if it is stable or not''' with MPRester(os.getenv("MAPI_API_KEY")) as mpr: docs = mpr.summary.search(formula=formula, fields=["formula_pretty", self.prop]) if docs: if docs[0].formula_pretty == formula: return docs[0].dict()[self.prop] elif docs[0].dict()[self.prop] is None: return f"There is no record of {self.prop_name} for {formula}" return f"Could not find any material while searching {formula}" def create_context_prompt(self, formula): f'''This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict the {self.prop_name} of the material ''' elements = self.get_material_atoms(formula) similars = self.search_similars_by_atom(elements) similars = [ {'formula': ex.formula_pretty, 'prop': f"{ex.dict()[self.prop]:2f}" if ex.dict()[self.prop] is not None else None } for ex in similars ] examples = pd.DataFrame(similars).drop_duplicates().dropna().to_dict(orient="records") example_selector = MaxMarginalRelevanceExampleSelector.from_examples( examples, OpenAIEmbeddings(), FAISS, k=self.k, ) prefix=( f'You are a bot who can predict the {self.prop_name} of a material .\n' f'Given this list of known materials and the measurement of their {self.prop_name}, \n' f'you need to answer the what is the {self.prop_name} of the material:' 'The answer should be numeric and finish with ###' ) prompt_template=PromptTemplate( input_variables=["formula", "prop"], template=f"What is the {self.prop_name} for {{formula}}?@@@\n{{prop}}###", ) suffix = f"What is the {self.prop_name} for {{formula}}?@@@\n" prompt = FewShotPromptTemplate( # examples=examples, example_prompt=prompt_template, example_selector=example_selector, prefix=prefix, suffix=suffix, input_variables=["formula"]) return prompt.format(formula=formula)