Spaces:

syngent
/

Chemical_Identifier

Runtime error

App Files Files Community

Chemical_Identifier / app.py

robertselvam

Update app.py

81949da over 1 year ago

raw

history blame contribute delete

6.23 kB

	from pydantic import NoneStr
	import os
	from langchain.chains.question_answering import load_qa_chain
	from langchain.document_loaders import UnstructuredFileLoader
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.llms import OpenAI
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.vectorstores import FAISS
	import gradio as gr
	import openai


	class ChemicalIdentifier:
	def __init__(self):
	openai.api_key = os.getenv("OPENAI_API_KEY")

	def get_empty_state(self):

	""" Create empty Knowledge base"""

	return {"knowledge_base": None}

	def create_knowledge_base(self,docs):

	"""Create a knowledge base from the given documents.

	Args:
	docs (List[str]): List of documents.

	Returns:
	FAISS: Knowledge base built from the documents.
	"""

	# Initialize a CharacterTextSplitter to split the documents into chunks
	# Each chunk has a maximum length of 500 characters
	# There is no overlap between the chunks
	text_splitter = CharacterTextSplitter(
	separator="\n", chunk_size=500, chunk_overlap=0, length_function=len
	)

	# Split the documents into chunks using the text_splitter
	chunks = text_splitter.split_documents(docs)

	# Initialize an OpenAIEmbeddings model to compute embeddings of the chunks
	embeddings = OpenAIEmbeddings()

	# Build a knowledge base using FAISS from the chunks and their embeddings
	knowledge_base = FAISS.from_documents(chunks, embeddings)

	# Return the resulting knowledge base
	return knowledge_base


	def upload_file(self, file_obj):
	"""Upload a file and create a knowledge base from its contents.

	Args:
	file_obj (file-like object): The file to upload.

	Returns:
	tuple: A tuple containing the file name and the knowledge base.
	"""

	try:
	# Initialize an UnstructuredFileLoader to load the contents of the file
	# The loader uses a "fast" strategy for efficient loading
	loader = UnstructuredFileLoader(file_obj.name, strategy="fast")

	# Load the contents of the file using the loader
	docs = loader.load()

	# Create a knowledge base from the loaded documents using the create_knowledge_base() method
	knowledge_base = self.create_knowledge_base(docs)
	except:
	# If an error occurs during file loading return file name and an empty string
	return file_obj.name, ""

	# Return a tuple containing the file name and the knowledge base
	return file_obj.name, {"knowledge_base": knowledge_base}


	def answer_question(self, state):
	"""Answer a question based on the current knowledge base.

	Args:
	state (dict): The current state containing the knowledge base.

	Returns:
	str: The answer to the question.
	"""

	try:
	# Retrieve the knowledge base from the state dictionary
	knowledge_base = state["knowledge_base"]

	# Set the question for which we want to find the answer
	question = "Identify the chemical capabilities"

	# Perform a similarity search on the knowledge base to retrieve relevant documents
	docs = knowledge_base.similarity_search(question)

	# Initialize an OpenAI language model for question answering
	llm = OpenAI(temperature=0.4)

	# Load a question-answering chain using the language model
	chain = load_qa_chain(llm, chain_type="stuff")

	# Run the question-answering chain on the input documents and question
	response = chain.run(input_documents=docs, question=question)

	# Return the response as the answer to the question
	return response
	except:
	# If an error occurs, return a default error message
	return "Please upload Proper Document"


	def gradio_interface(self):

	"""Create the Gradio interface for the Chemical Identifier."""

	with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
	state = gr.State(self.get_empty_state())
	gr.HTML("""<img class="leftimage" align="left" src="https://templates.images.credential.net/1612472097627370951721412474196.png" alt="Image" width="210" height="210">
	<img class="rightimage" align="right" src="https://logos-download.com/wp-content/uploads/2016/06/Syngenta_logo.png" alt="Image" width="150" height="140">""")
	with gr.Column(elem_id="col-container"):
	gr.HTML(
	"""<hr style="border-top: 5px solid white;">"""
	)
	gr.HTML(
	"""<br>
	<h1 style="text-align:center;">
	Syngenta Chemical Identifier
	</h1> """
	)
	gr.HTML(
	"""<hr style="border-top: 5px solid white;">"""
	)

	gr.Markdown("Upload your file")
	with gr.Row(elem_id="row-flex"):
	with gr.Column(scale=0.90, min_width=160):
	file_output = gr.File(elem_classes="heightfit")
	with gr.Column(scale=0.10, min_width=160):
	upload_button = gr.UploadButton(
	"Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"],
	elem_classes="heightfit")


	with gr.Row():
	with gr.Column(scale=1, min_width=0):
	analyse_btn = gr.Button(value="Analyse")
	with gr.Row():
	with gr.Column(scale=1, min_width=0):
	answer = gr.Textbox(value="",label='Chemicals :',show_label=True, placeholder="",lines=5)

	upload_button.upload(self.upload_file, upload_button, [file_output,state])

	analyse_btn.click(self.answer_question, [state], [answer])

	demo.queue().launch()

	if __name__=="__main__":
	chemical = ChemicalIdentifier()
	chemical.gradio_interface()