from pydantic import NoneStr import os from langchain.chains.question_answering import load_qa_chain from langchain.document_loaders import UnstructuredFileLoader from langchain.embeddings.openai import OpenAIEmbeddings from langchain.llms import OpenAI from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS import gradio as gr import openai class ChemicalIdentifier: def __init__(self): openai.api_key = os.getenv["OPENAI_API_KEY"] def get_empty_state(self): """ Create empty Knowledge base""" return {"knowledge_base": None} def create_knowledge_base(self,docs): """Create a knowledge base from the given documents. Args: docs (List[str]): List of documents. Returns: FAISS: Knowledge base built from the documents. """ # Initialize a CharacterTextSplitter to split the documents into chunks # Each chunk has a maximum length of 500 characters # There is no overlap between the chunks text_splitter = CharacterTextSplitter( separator="\n", chunk_size=500, chunk_overlap=0, length_function=len ) # Split the documents into chunks using the text_splitter chunks = text_splitter.split_documents(docs) # Initialize an OpenAIEmbeddings model to compute embeddings of the chunks embeddings = OpenAIEmbeddings() # Build a knowledge base using FAISS from the chunks and their embeddings knowledge_base = FAISS.from_documents(chunks, embeddings) # Return the resulting knowledge base return knowledge_base def upload_file(self, file_obj): """Upload a file and create a knowledge base from its contents. Args: file_obj (file-like object): The file to upload. Returns: tuple: A tuple containing the file name and the knowledge base. """ try: # Initialize an UnstructuredFileLoader to load the contents of the file # The loader uses a "fast" strategy for efficient loading loader = UnstructuredFileLoader(file_obj.name, strategy="fast") # Load the contents of the file using the loader docs = loader.load() # Create a knowledge base from the loaded documents using the create_knowledge_base() method knowledge_base = self.create_knowledge_base(docs) except: # If an error occurs during file loading return file name and an empty string return file_obj.name, "" # Return a tuple containing the file name and the knowledge base return file_obj.name, {"knowledge_base": knowledge_base} def answer_question(self, state): """Answer a question based on the current knowledge base. Args: state (dict): The current state containing the knowledge base. Returns: str: The answer to the question. """ try: # Retrieve the knowledge base from the state dictionary knowledge_base = state["knowledge_base"] # Set the question for which we want to find the answer question = "Identify the chemical capabilities" # Perform a similarity search on the knowledge base to retrieve relevant documents docs = knowledge_base.similarity_search(question) # Initialize an OpenAI language model for question answering llm = OpenAI(temperature=0.4) # Load a question-answering chain using the language model chain = load_qa_chain(llm, chain_type="stuff") # Run the question-answering chain on the input documents and question response = chain.run(input_documents=docs, question=question) # Return the response as the answer to the question return response except: # If an error occurs, return a default error message return "Please upload Proper Document" def gradio_interface(self): """Create the Gradio interface for the Chemical Identifier.""" with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo: state = gr.State(self.get_empty_state()) gr.HTML(""" """) with gr.Column(elem_id="col-container"): gr.HTML( """