OuroborosM commited on
Commit
e5e6ba2
·
1 Parent(s): 692c112

Add application file

Browse files
Files changed (2) hide show
  1. baby.py +201 -0
  2. requirements.txt +5 -0
baby.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import os
3
+ from langchain.vectorstores import Chroma
4
+ from langchain.embeddings.openai import OpenAIEmbeddings
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.chat_models import AzureChatOpenAI
7
+ from langchain.document_loaders import DirectoryLoader
8
+ from langchain.chains import RetrievalQA
9
+ from langchain.vectorstores import Pinecone
10
+ import pinecone
11
+ from pinecone.core.client.configuration import Configuration as OpenApiConfiguration
12
+ import gradio as gr
13
+ import time
14
+
15
+ # socks.set_default_proxy(socks.SOCKS5, "http://u477827:4rfgt54r@http.internetpsa.inetpsa.com", 80)
16
+ # socket.socket = socks.socksocket
17
+
18
+
19
+
20
+ # with open('2.txt') as f:
21
+ # state_of_the_union = f.read()
22
+ # text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
23
+ # texts = text_splitter.create_documents([state_of_the_union])
24
+ # print(texts[0])
25
+ # print(texts[1])
26
+
27
+ os.environ["OPENAI_API_TYPE"] = "azure"
28
+ os.environ["OPENAI_API_KEY"] = "f930f70cf65f48a8a750a22c813ba1b3"
29
+ os.environ["OPENAI_API_BASE"] = "https://stla-baby.openai.azure.com/"
30
+ os.environ["OPENAI_API_VERSION"] = "2023-05-15"
31
+ os.environ["OPENAI_PROXY"] = 'http://u477827:4rfgt54r@http.internetpsa.inetpsa.com:80'
32
+
33
+ # openai.api_type = "azure"
34
+ # openai.api_key = "f930f70cf65f48a8a750a22c813ba1b3"
35
+ # openai.api_base = "https://stla-baby.openai.azure.com/"
36
+ # openai.api_version = "2023-05-15" # subject to change
37
+ # # openai.proxy = 'http://u477827:4rfgt54r@http.internetpsa.inetpsa.com:80'
38
+ # openai.proxy = 'http://u477827:4rfgt54r@http.ntlm.internetpsa.inetpsa.com:8080'
39
+
40
+
41
+ chat = AzureChatOpenAI(
42
+ deployment_name="Chattester",
43
+ temperature=0,
44
+ )
45
+
46
+ embeddings = OpenAIEmbeddings(deployment="model_embedding")
47
+
48
+
49
+ openapi_config = OpenApiConfiguration.get_default_copy()
50
+ # openapi_config.verify_ssl = True
51
+ openapi_config.proxy = "http://u477827:4rfgt54r@http.internetpsa.inetpsa.com:80"
52
+ # openapi_config.proxy = "http://u477827:4rfgt54r@http.ntlm.internetpsa.inetpsa.com:8080"
53
+
54
+ pinecone.init(
55
+ api_key='0def3ea0-93cd-4ead-b0c6-2ab44b3ede21',
56
+ environment='asia-southeast1-gcp-free',
57
+ openapi_config=openapi_config
58
+ )
59
+ index_name = 'stla-baby'
60
+ index = pinecone.Index(index_name)
61
+ # index.delete(delete_all=True, namespace='')
62
+ # print(pinecone.whoami())
63
+ # print(index.describe_index_stats())
64
+
65
+
66
+
67
+ global vectordb
68
+ vectordb = Chroma(persist_directory='db', embedding_function=embeddings)
69
+ global vectordb_p
70
+ vectordb_p = Pinecone.from_existing_index(index_name, embeddings)
71
+
72
+ # loader = DirectoryLoader('./documents', glob='**/*.txt')
73
+ # documents = loader.load()
74
+ # text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=200)
75
+ # split_docs = text_splitter.split_documents(documents)
76
+ # print(split_docs)
77
+ # vectordb = Chroma.from_documents(split_docs, embeddings, persist_directory='db')
78
+
79
+
80
+
81
+ # question = "what is LCDV ?"
82
+ # rr = vectordb.similarity_search(query=question, k=4)
83
+ # vectordb.similarity_search(question)
84
+ # print(type(rr))
85
+ # print(rr)
86
+ def chathmi(message, history):
87
+ response = "I don't know"
88
+ print(message)
89
+ response = QAQuery_p(message)
90
+ time.sleep(0.3)
91
+ print(history)
92
+ return response
93
+
94
+ # chatbot = gr.Chatbot().style(color_map =("blue", "pink"))
95
+ # chatbot = gr.Chatbot(color_map =("blue", "pink"))
96
+
97
+ demo = gr.ChatInterface(
98
+ chathmi,
99
+ title="STLA BABY - YOUR FRIENDLY GUIDE",
100
+ )
101
+
102
+ # demo = gr.Interface(
103
+ # chathmi,
104
+ # ["text", "state"],
105
+ # [chatbot, "state"],
106
+ # allow_flagging="never",
107
+ # )
108
+
109
+ def CreatDb_P():
110
+ global vectordb_p
111
+ index_name = 'stla-baby'
112
+ loader = DirectoryLoader('./documents', glob='**/*.txt')
113
+ documents = loader.load()
114
+ text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=200)
115
+ split_docs = text_splitter.split_documents(documents)
116
+ print(split_docs)
117
+ pinecone.Index(index_name).delete(delete_all=True, namespace='')
118
+ vectordb_p = Pinecone.from_documents(split_docs, embeddings, index_name = "stla-baby")
119
+ print("Pinecone Updated Done")
120
+ print(index.describe_index_stats())
121
+
122
+ def QAQuery_p(question: str):
123
+ global vectordb_p
124
+ # vectordb = Chroma(persist_directory='db', embedding_function=embeddings)
125
+ retriever = vectordb_p.as_retriever()
126
+ retriever.search_kwargs['k'] = 3
127
+ # retriever.search_kwargs['fetch_k'] = 100
128
+
129
+ qa = RetrievalQA.from_chain_type(llm=chat, chain_type="stuff", retriever=retriever, return_source_documents = True)
130
+ # qa = VectorDBQA.from_chain_type(llm=chat, chain_type="stuff", vectorstore=vectordb, return_source_documents=True)
131
+ # res = qa.run(question)
132
+ res = qa({"query": question})
133
+
134
+ print("-" * 20)
135
+ print("Question:", question)
136
+ # print("Answer:", res)
137
+ print("Answer:", res['result'])
138
+ print("-" * 20)
139
+ print("Source:", res['source_documents'])
140
+ response = res['result']
141
+ # response = res['source_documents']
142
+ return response
143
+
144
+ def CreatDb():
145
+ global vectordb
146
+ loader = DirectoryLoader('./documents', glob='**/*.txt')
147
+ documents = loader.load()
148
+ text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=200)
149
+ split_docs = text_splitter.split_documents(documents)
150
+ print(split_docs)
151
+ vectordb = Chroma.from_documents(split_docs, embeddings, persist_directory='db')
152
+ vectordb.persist()
153
+
154
+ def QAQuery(question: str):
155
+ global vectordb
156
+ # vectordb = Chroma(persist_directory='db', embedding_function=embeddings)
157
+ retriever = vectordb.as_retriever()
158
+ retriever.search_kwargs['k'] = 3
159
+ # retriever.search_kwargs['fetch_k'] = 100
160
+
161
+ qa = RetrievalQA.from_chain_type(llm=chat, chain_type="stuff", retriever=retriever, return_source_documents = True)
162
+ # qa = VectorDBQA.from_chain_type(llm=chat, chain_type="stuff", vectorstore=vectordb, return_source_documents=True)
163
+ # res = qa.run(question)
164
+ res = qa({"query": question})
165
+
166
+ print("-" * 20)
167
+ print("Question:", question)
168
+ # print("Answer:", res)
169
+ print("Answer:", res['result'])
170
+ print("-" * 20)
171
+ print("Source:", res['source_documents'])
172
+
173
+
174
+ # Used to complete content
175
+ def completeText(Text):
176
+ deployment_id="Chattester"
177
+ prompt = Text
178
+ completion = openai.Completion.create(deployment_id=deployment_id,
179
+ prompt=prompt, temperature=0)
180
+ print(f"{prompt}{completion['choices'][0]['text']}.")
181
+
182
+ # Used to chat
183
+ def chatText(Text):
184
+ deployment_id="Chattester"
185
+ conversation = [{"role": "system", "content": "You are a helpful assistant."}]
186
+ user_input = Text
187
+ conversation.append({"role": "user", "content": user_input})
188
+ response = openai.ChatCompletion.create(messages=conversation,
189
+ deployment_id="Chattester")
190
+ print("\n" + response["choices"][0]["message"]["content"] + "\n")
191
+
192
+ if __name__ == '__main__':
193
+ # chatText("what is AI?")
194
+ # CreatDb()
195
+ # QAQuery("what is COFOR ?")
196
+ # CreatDb_P()
197
+ # QAQuery_p("what is GST ?")
198
+ demo.queue().launch()
199
+ pass
200
+
201
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ chromadb
2
+ langchain
3
+ openai
4
+ gradio
5
+ pinecone-client