Spaces:
Runtime error
Runtime error
robertselvam
commited on
Commit
•
3f7ab5b
1
Parent(s):
6e68a4a
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import NoneStr
|
2 |
+
import os
|
3 |
+
from langchain.chains.question_answering import load_qa_chain
|
4 |
+
from langchain.document_loaders import UnstructuredFileLoader
|
5 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
6 |
+
from langchain.llms import OpenAI
|
7 |
+
from langchain.text_splitter import CharacterTextSplitter
|
8 |
+
from langchain.vectorstores import FAISS
|
9 |
+
import gradio as gr
|
10 |
+
import openai
|
11 |
+
|
12 |
+
|
13 |
+
class ChemicalIdentifier:
|
14 |
+
def __init__(self):
|
15 |
+
|
16 |
+
openai_api_key = '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'
|
17 |
+
os.environ["OPENAI_API_KEY"] = openai_api_key
|
18 |
+
|
19 |
+
def get_empty_state(self):
|
20 |
+
|
21 |
+
""" Create empty Knowledge base"""
|
22 |
+
|
23 |
+
return {"knowledge_base": None}
|
24 |
+
|
25 |
+
def create_knowledge_base(self,docs):
|
26 |
+
|
27 |
+
"""Create a knowledge base from the given documents.
|
28 |
+
|
29 |
+
Args:
|
30 |
+
docs (List[str]): List of documents.
|
31 |
+
|
32 |
+
Returns:
|
33 |
+
FAISS: Knowledge base built from the documents.
|
34 |
+
"""
|
35 |
+
|
36 |
+
# Initialize a CharacterTextSplitter to split the documents into chunks
|
37 |
+
# Each chunk has a maximum length of 500 characters
|
38 |
+
# There is no overlap between the chunks
|
39 |
+
text_splitter = CharacterTextSplitter(
|
40 |
+
separator="\n", chunk_size=500, chunk_overlap=0, length_function=len
|
41 |
+
)
|
42 |
+
|
43 |
+
# Split the documents into chunks using the text_splitter
|
44 |
+
chunks = text_splitter.split_documents(docs)
|
45 |
+
|
46 |
+
# Initialize an OpenAIEmbeddings model to compute embeddings of the chunks
|
47 |
+
embeddings = OpenAIEmbeddings()
|
48 |
+
|
49 |
+
# Build a knowledge base using FAISS from the chunks and their embeddings
|
50 |
+
knowledge_base = FAISS.from_documents(chunks, embeddings)
|
51 |
+
|
52 |
+
# Return the resulting knowledge base
|
53 |
+
return knowledge_base
|
54 |
+
|
55 |
+
|
56 |
+
def upload_file(self, file_obj):
|
57 |
+
"""Upload a file and create a knowledge base from its contents.
|
58 |
+
|
59 |
+
Args:
|
60 |
+
file_obj (file-like object): The file to upload.
|
61 |
+
|
62 |
+
Returns:
|
63 |
+
tuple: A tuple containing the file name and the knowledge base.
|
64 |
+
"""
|
65 |
+
|
66 |
+
try:
|
67 |
+
# Initialize an UnstructuredFileLoader to load the contents of the file
|
68 |
+
# The loader uses a "fast" strategy for efficient loading
|
69 |
+
loader = UnstructuredFileLoader(file_obj.name, strategy="fast")
|
70 |
+
|
71 |
+
# Load the contents of the file using the loader
|
72 |
+
docs = loader.load()
|
73 |
+
|
74 |
+
# Create a knowledge base from the loaded documents using the create_knowledge_base() method
|
75 |
+
knowledge_base = self.create_knowledge_base(docs)
|
76 |
+
except:
|
77 |
+
# If an error occurs during file loading return file name and an empty string
|
78 |
+
return file_obj.name, ""
|
79 |
+
|
80 |
+
# Return a tuple containing the file name and the knowledge base
|
81 |
+
return file_obj.name, {"knowledge_base": knowledge_base}
|
82 |
+
|
83 |
+
|
84 |
+
def answer_question(self, state):
|
85 |
+
"""Answer a question based on the current knowledge base.
|
86 |
+
|
87 |
+
Args:
|
88 |
+
state (dict): The current state containing the knowledge base.
|
89 |
+
|
90 |
+
Returns:
|
91 |
+
str: The answer to the question.
|
92 |
+
"""
|
93 |
+
|
94 |
+
try:
|
95 |
+
# Retrieve the knowledge base from the state dictionary
|
96 |
+
knowledge_base = state["knowledge_base"]
|
97 |
+
|
98 |
+
# Set the question for which we want to find the answer
|
99 |
+
question = "Identify the chemical capabilities"
|
100 |
+
|
101 |
+
# Perform a similarity search on the knowledge base to retrieve relevant documents
|
102 |
+
docs = knowledge_base.similarity_search(question)
|
103 |
+
|
104 |
+
# Initialize an OpenAI language model for question answering
|
105 |
+
llm = OpenAI(temperature=0.4)
|
106 |
+
|
107 |
+
# Load a question-answering chain using the language model
|
108 |
+
chain = load_qa_chain(llm, chain_type="stuff")
|
109 |
+
|
110 |
+
# Run the question-answering chain on the input documents and question
|
111 |
+
response = chain.run(input_documents=docs, question=question)
|
112 |
+
|
113 |
+
# Return the response as the answer to the question
|
114 |
+
return response
|
115 |
+
except:
|
116 |
+
# If an error occurs, return a default error message
|
117 |
+
return "Please upload Proper Document"
|
118 |
+
|
119 |
+
|
120 |
+
def gradio_interface(self):
|
121 |
+
|
122 |
+
"""Create the Gradio interface for the Chemical Identifier."""
|
123 |
+
|
124 |
+
with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
|
125 |
+
state = gr.State(self.get_empty_state())
|
126 |
+
gr.HTML("""<img class="leftimage" align="left" src="https://templates.images.credential.net/1612472097627370951721412474196.png" alt="Image" width="210" height="210">
|
127 |
+
<img class="rightimage" align="right" src="https://logos-download.com/wp-content/uploads/2016/06/Syngenta_logo.png" alt="Image" width="150" height="140">""")
|
128 |
+
with gr.Column(elem_id="col-container"):
|
129 |
+
gr.HTML(
|
130 |
+
"""<hr style="border-top: 5px solid white;">"""
|
131 |
+
)
|
132 |
+
gr.HTML(
|
133 |
+
"""<br>
|
134 |
+
<h1 style="text-align:center;">
|
135 |
+
Syngenta Chemical Identifier
|
136 |
+
</h1> """
|
137 |
+
)
|
138 |
+
gr.HTML(
|
139 |
+
"""<hr style="border-top: 5px solid white;">"""
|
140 |
+
)
|
141 |
+
|
142 |
+
gr.Markdown("**Upload your file**")
|
143 |
+
with gr.Row(elem_id="row-flex"):
|
144 |
+
with gr.Column(scale=0.90, min_width=160):
|
145 |
+
file_output = gr.File(elem_classes="filenameshow")
|
146 |
+
with gr.Column(scale=0.10, min_width=160):
|
147 |
+
upload_button = gr.UploadButton(
|
148 |
+
"Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"],
|
149 |
+
elem_classes="filenameshow")
|
150 |
+
|
151 |
+
|
152 |
+
with gr.Row():
|
153 |
+
with gr.Column(scale=1, min_width=0):
|
154 |
+
analyse_btn = gr.Button(value="Analyse")
|
155 |
+
with gr.Row():
|
156 |
+
with gr.Column(scale=1, min_width=0):
|
157 |
+
answer = gr.Textbox(value="",label='Answer Box :',show_label=True, placeholder="",lines=5)
|
158 |
+
|
159 |
+
upload_button.upload(self.upload_file, upload_button, [file_output,state])
|
160 |
+
|
161 |
+
analyse_btn.click(self.answer_question, [state], [answer])
|
162 |
+
|
163 |
+
demo.queue().launch(share=True)
|
164 |
+
|
165 |
+
if __name__=="__main__":
|
166 |
+
chemical = ChemicalIdentifier()
|
167 |
+
chemical.gradio_interface()
|