robertselvam's picture
Update app.py
e7a5bf7
raw
history blame
6.24 kB
from pydantic import NoneStr
import os
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import UnstructuredFileLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
import gradio as gr
import openai
class ChemicalIdentifier:
def __init__(self):
openai.api_key = os.getenv["OPENAI_API_KEY"]
def get_empty_state(self):
""" Create empty Knowledge base"""
return {"knowledge_base": None}
def create_knowledge_base(self,docs):
"""Create a knowledge base from the given documents.
Args:
docs (List[str]): List of documents.
Returns:
FAISS: Knowledge base built from the documents.
"""
# Initialize a CharacterTextSplitter to split the documents into chunks
# Each chunk has a maximum length of 500 characters
# There is no overlap between the chunks
text_splitter = CharacterTextSplitter(
separator="\n", chunk_size=500, chunk_overlap=0, length_function=len
)
# Split the documents into chunks using the text_splitter
chunks = text_splitter.split_documents(docs)
# Initialize an OpenAIEmbeddings model to compute embeddings of the chunks
embeddings = OpenAIEmbeddings()
# Build a knowledge base using FAISS from the chunks and their embeddings
knowledge_base = FAISS.from_documents(chunks, embeddings)
# Return the resulting knowledge base
return knowledge_base
def upload_file(self, file_obj):
"""Upload a file and create a knowledge base from its contents.
Args:
file_obj (file-like object): The file to upload.
Returns:
tuple: A tuple containing the file name and the knowledge base.
"""
try:
# Initialize an UnstructuredFileLoader to load the contents of the file
# The loader uses a "fast" strategy for efficient loading
loader = UnstructuredFileLoader(file_obj.name, strategy="fast")
# Load the contents of the file using the loader
docs = loader.load()
# Create a knowledge base from the loaded documents using the create_knowledge_base() method
knowledge_base = self.create_knowledge_base(docs)
except:
# If an error occurs during file loading return file name and an empty string
return file_obj.name, ""
# Return a tuple containing the file name and the knowledge base
return file_obj.name, {"knowledge_base": knowledge_base}
def answer_question(self, state):
"""Answer a question based on the current knowledge base.
Args:
state (dict): The current state containing the knowledge base.
Returns:
str: The answer to the question.
"""
try:
# Retrieve the knowledge base from the state dictionary
knowledge_base = state["knowledge_base"]
# Set the question for which we want to find the answer
question = "Identify the chemical capabilities"
# Perform a similarity search on the knowledge base to retrieve relevant documents
docs = knowledge_base.similarity_search(question)
# Initialize an OpenAI language model for question answering
llm = OpenAI(temperature=0.4)
# Load a question-answering chain using the language model
chain = load_qa_chain(llm, chain_type="stuff")
# Run the question-answering chain on the input documents and question
response = chain.run(input_documents=docs, question=question)
# Return the response as the answer to the question
return response
except:
# If an error occurs, return a default error message
return "Please upload Proper Document"
def gradio_interface(self):
"""Create the Gradio interface for the Chemical Identifier."""
with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
state = gr.State(self.get_empty_state())
gr.HTML("""<img class="leftimage" align="left" src="https://templates.images.credential.net/1612472097627370951721412474196.png" alt="Image" width="210" height="210">
<img class="rightimage" align="right" src="https://logos-download.com/wp-content/uploads/2016/06/Syngenta_logo.png" alt="Image" width="150" height="140">""")
with gr.Column(elem_id="col-container"):
gr.HTML(
"""<hr style="border-top: 5px solid white;">"""
)
gr.HTML(
"""<br>
<h1 style="text-align:center;">
Syngenta Chemical Identifier
</h1> """
)
gr.HTML(
"""<hr style="border-top: 5px solid white;">"""
)
gr.Markdown("**Upload your file**")
with gr.Row(elem_id="row-flex"):
with gr.Column(scale=0.90, min_width=160):
file_output = gr.File(elem_classes="filenameshow")
with gr.Column(scale=0.10, min_width=160):
upload_button = gr.UploadButton(
"Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"],
elem_classes="filenameshow")
with gr.Row():
with gr.Column(scale=1, min_width=0):
analyse_btn = gr.Button(value="Analyse")
with gr.Row():
with gr.Column(scale=1, min_width=0):
answer = gr.Textbox(value="",label='Answer Box :',show_label=True, placeholder="",lines=5)
upload_button.upload(self.upload_file, upload_button, [file_output,state])
analyse_btn.click(self.answer_question, [state], [answer])
demo.queue().launch()
if __name__=="__main__":
chemical = ChemicalIdentifier()
chemical.gradio_interface()