robertselvam commited on
Commit
3f7ab5b
1 Parent(s): 6e68a4a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -0
app.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import NoneStr
2
+ import os
3
+ from langchain.chains.question_answering import load_qa_chain
4
+ from langchain.document_loaders import UnstructuredFileLoader
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from langchain.llms import OpenAI
7
+ from langchain.text_splitter import CharacterTextSplitter
8
+ from langchain.vectorstores import FAISS
9
+ import gradio as gr
10
+ import openai
11
+
12
+
13
+ class ChemicalIdentifier:
14
+ def __init__(self):
15
+
16
+ openai_api_key = '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'
17
+ os.environ["OPENAI_API_KEY"] = openai_api_key
18
+
19
+ def get_empty_state(self):
20
+
21
+ """ Create empty Knowledge base"""
22
+
23
+ return {"knowledge_base": None}
24
+
25
+ def create_knowledge_base(self,docs):
26
+
27
+ """Create a knowledge base from the given documents.
28
+
29
+ Args:
30
+ docs (List[str]): List of documents.
31
+
32
+ Returns:
33
+ FAISS: Knowledge base built from the documents.
34
+ """
35
+
36
+ # Initialize a CharacterTextSplitter to split the documents into chunks
37
+ # Each chunk has a maximum length of 500 characters
38
+ # There is no overlap between the chunks
39
+ text_splitter = CharacterTextSplitter(
40
+ separator="\n", chunk_size=500, chunk_overlap=0, length_function=len
41
+ )
42
+
43
+ # Split the documents into chunks using the text_splitter
44
+ chunks = text_splitter.split_documents(docs)
45
+
46
+ # Initialize an OpenAIEmbeddings model to compute embeddings of the chunks
47
+ embeddings = OpenAIEmbeddings()
48
+
49
+ # Build a knowledge base using FAISS from the chunks and their embeddings
50
+ knowledge_base = FAISS.from_documents(chunks, embeddings)
51
+
52
+ # Return the resulting knowledge base
53
+ return knowledge_base
54
+
55
+
56
+ def upload_file(self, file_obj):
57
+ """Upload a file and create a knowledge base from its contents.
58
+
59
+ Args:
60
+ file_obj (file-like object): The file to upload.
61
+
62
+ Returns:
63
+ tuple: A tuple containing the file name and the knowledge base.
64
+ """
65
+
66
+ try:
67
+ # Initialize an UnstructuredFileLoader to load the contents of the file
68
+ # The loader uses a "fast" strategy for efficient loading
69
+ loader = UnstructuredFileLoader(file_obj.name, strategy="fast")
70
+
71
+ # Load the contents of the file using the loader
72
+ docs = loader.load()
73
+
74
+ # Create a knowledge base from the loaded documents using the create_knowledge_base() method
75
+ knowledge_base = self.create_knowledge_base(docs)
76
+ except:
77
+ # If an error occurs during file loading return file name and an empty string
78
+ return file_obj.name, ""
79
+
80
+ # Return a tuple containing the file name and the knowledge base
81
+ return file_obj.name, {"knowledge_base": knowledge_base}
82
+
83
+
84
+ def answer_question(self, state):
85
+ """Answer a question based on the current knowledge base.
86
+
87
+ Args:
88
+ state (dict): The current state containing the knowledge base.
89
+
90
+ Returns:
91
+ str: The answer to the question.
92
+ """
93
+
94
+ try:
95
+ # Retrieve the knowledge base from the state dictionary
96
+ knowledge_base = state["knowledge_base"]
97
+
98
+ # Set the question for which we want to find the answer
99
+ question = "Identify the chemical capabilities"
100
+
101
+ # Perform a similarity search on the knowledge base to retrieve relevant documents
102
+ docs = knowledge_base.similarity_search(question)
103
+
104
+ # Initialize an OpenAI language model for question answering
105
+ llm = OpenAI(temperature=0.4)
106
+
107
+ # Load a question-answering chain using the language model
108
+ chain = load_qa_chain(llm, chain_type="stuff")
109
+
110
+ # Run the question-answering chain on the input documents and question
111
+ response = chain.run(input_documents=docs, question=question)
112
+
113
+ # Return the response as the answer to the question
114
+ return response
115
+ except:
116
+ # If an error occurs, return a default error message
117
+ return "Please upload Proper Document"
118
+
119
+
120
+ def gradio_interface(self):
121
+
122
+ """Create the Gradio interface for the Chemical Identifier."""
123
+
124
+ with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
125
+ state = gr.State(self.get_empty_state())
126
+ gr.HTML("""<img class="leftimage" align="left" src="https://templates.images.credential.net/1612472097627370951721412474196.png" alt="Image" width="210" height="210">
127
+ <img class="rightimage" align="right" src="https://logos-download.com/wp-content/uploads/2016/06/Syngenta_logo.png" alt="Image" width="150" height="140">""")
128
+ with gr.Column(elem_id="col-container"):
129
+ gr.HTML(
130
+ """<hr style="border-top: 5px solid white;">"""
131
+ )
132
+ gr.HTML(
133
+ """<br>
134
+ <h1 style="text-align:center;">
135
+ Syngenta Chemical Identifier
136
+ </h1> """
137
+ )
138
+ gr.HTML(
139
+ """<hr style="border-top: 5px solid white;">"""
140
+ )
141
+
142
+ gr.Markdown("**Upload your file**")
143
+ with gr.Row(elem_id="row-flex"):
144
+ with gr.Column(scale=0.90, min_width=160):
145
+ file_output = gr.File(elem_classes="filenameshow")
146
+ with gr.Column(scale=0.10, min_width=160):
147
+ upload_button = gr.UploadButton(
148
+ "Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"],
149
+ elem_classes="filenameshow")
150
+
151
+
152
+ with gr.Row():
153
+ with gr.Column(scale=1, min_width=0):
154
+ analyse_btn = gr.Button(value="Analyse")
155
+ with gr.Row():
156
+ with gr.Column(scale=1, min_width=0):
157
+ answer = gr.Textbox(value="",label='Answer Box :',show_label=True, placeholder="",lines=5)
158
+
159
+ upload_button.upload(self.upload_file, upload_button, [file_output,state])
160
+
161
+ analyse_btn.click(self.answer_question, [state], [answer])
162
+
163
+ demo.queue().launch(share=True)
164
+
165
+ if __name__=="__main__":
166
+ chemical = ChemicalIdentifier()
167
+ chemical.gradio_interface()