Spaces:

syngent
/

Chemical_Identifier

Runtime error

App Files Files Community

robertselvam commited on Jul 12, 2023

Commit

3f7ab5b

1 Parent(s): 6e68a4a

Upload app.py

Browse files

Files changed (1) hide show

app.py +167 -0

app.py ADDED Viewed

	@@ -0,0 +1,167 @@

+from pydantic import NoneStr
+import os
+from langchain.chains.question_answering import load_qa_chain
+from langchain.document_loaders import UnstructuredFileLoader
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.llms import OpenAI
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import FAISS
+import gradio as gr
+import openai
+class ChemicalIdentifier:
+    def __init__(self):
+        openai_api_key = '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'
+        os.environ["OPENAI_API_KEY"] = openai_api_key
+    def get_empty_state(self):
+        """ Create empty Knowledge base"""
+        return {"knowledge_base": None}
+    def create_knowledge_base(self,docs):
+        """Create a knowledge base from the given documents.
+        Args:
+            docs (List[str]): List of documents.
+        Returns:
+            FAISS: Knowledge base built from the documents.
+        """
+        # Initialize a CharacterTextSplitter to split the documents into chunks
+        # Each chunk has a maximum length of 500 characters
+        # There is no overlap between the chunks
+        text_splitter = CharacterTextSplitter(
+            separator="\n", chunk_size=500, chunk_overlap=0, length_function=len
+        )
+        # Split the documents into chunks using the text_splitter
+        chunks = text_splitter.split_documents(docs)
+        # Initialize an OpenAIEmbeddings model to compute embeddings of the chunks
+        embeddings = OpenAIEmbeddings()
+        # Build a knowledge base using FAISS from the chunks and their embeddings
+        knowledge_base = FAISS.from_documents(chunks, embeddings)
+        # Return the resulting knowledge base
+        return knowledge_base
+    def upload_file(self, file_obj):
+        """Upload a file and create a knowledge base from its contents.
+        Args:
+            file_obj (file-like object): The file to upload.
+        Returns:
+            tuple: A tuple containing the file name and the knowledge base.
+        """
+        try:
+            # Initialize an UnstructuredFileLoader to load the contents of the file
+            # The loader uses a "fast" strategy for efficient loading
+            loader = UnstructuredFileLoader(file_obj.name, strategy="fast")
+            # Load the contents of the file using the loader
+            docs = loader.load()
+            # Create a knowledge base from the loaded documents using the create_knowledge_base() method
+            knowledge_base = self.create_knowledge_base(docs)
+        except:
+            # If an error occurs during file loading return file name and an empty string
+            return file_obj.name, ""
+        # Return a tuple containing the file name and the knowledge base
+        return file_obj.name, {"knowledge_base": knowledge_base}
+    def answer_question(self, state):
+        """Answer a question based on the current knowledge base.
+        Args:
+            state (dict): The current state containing the knowledge base.
+        Returns:
+            str: The answer to the question.
+        """
+        try:
+            # Retrieve the knowledge base from the state dictionary
+            knowledge_base = state["knowledge_base"]
+            # Set the question for which we want to find the answer
+            question = "Identify the chemical capabilities"
+            # Perform a similarity search on the knowledge base to retrieve relevant documents
+            docs = knowledge_base.similarity_search(question)
+            # Initialize an OpenAI language model for question answering
+            llm = OpenAI(temperature=0.4)
+            # Load a question-answering chain using the language model
+            chain = load_qa_chain(llm, chain_type="stuff")
+            # Run the question-answering chain on the input documents and question
+            response = chain.run(input_documents=docs, question=question)
+            # Return the response as the answer to the question
+            return response
+        except:
+            # If an error occurs, return a default error message
+            return "Please upload Proper Document"
+    def gradio_interface(self):
+        """Create the Gradio interface for the Chemical Identifier."""
+        with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
+          state = gr.State(self.get_empty_state())
+          gr.HTML("""<img class="leftimage" align="left" src="https://templates.images.credential.net/1612472097627370951721412474196.png" alt="Image" width="210" height="210">
+          <img class="rightimage" align="right" src="https://logos-download.com/wp-content/uploads/2016/06/Syngenta_logo.png" alt="Image" width="150" height="140">""")
+          with gr.Column(elem_id="col-container"):
+              gr.HTML(
+                  """<hr style="border-top: 5px solid white;">"""
+                  )
+              gr.HTML(
+                  """<br>
+                  <h1 style="text-align:center;">
+                      Syngenta Chemical Identifier
+                    </h1> """
+              )
+              gr.HTML(
+                  """<hr style="border-top: 5px solid white;">"""
+                  )
+              gr.Markdown("**Upload your file**")
+              with gr.Row(elem_id="row-flex"):
+                  with gr.Column(scale=0.90, min_width=160):
+                      file_output = gr.File(elem_classes="filenameshow")
+                  with gr.Column(scale=0.10, min_width=160):
+                      upload_button = gr.UploadButton(
+                          "Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"],
+                          elem_classes="filenameshow")
+              with gr.Row():
+                with gr.Column(scale=1, min_width=0):
+                  analyse_btn = gr.Button(value="Analyse")
+              with gr.Row():
+                with gr.Column(scale=1, min_width=0):
+                  answer = gr.Textbox(value="",label='Answer Box :',show_label=True, placeholder="",lines=5)
+          upload_button.upload(self.upload_file, upload_button, [file_output,state])
+          analyse_btn.click(self.answer_question, [state], [answer])
+        demo.queue().launch(share=True)
+if __name__=="__main__":
+    chemical = ChemicalIdentifier()
+    chemical.gradio_interface()