dataroadmap commited on
Commit
b379823
1 Parent(s): f680743

updated BAM models

Browse files
Files changed (1) hide show
  1. app.py +315 -0
app.py ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import datetime
3
+ import openai
4
+ import uuid
5
+ import gradio as gr
6
+ from langchain.embeddings import OpenAIEmbeddings
7
+ from langchain.vectorstores import Chroma
8
+ from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
9
+ from langchain.chains import ConversationalRetrievalChain
10
+ from langchain.chains import RetrievalQA
11
+ from langchain.embeddings import SentenceTransformerEmbeddings
12
+
13
+ import os
14
+ from langchain.chat_models import ChatOpenAI
15
+ from langchain import OpenAI
16
+ from langchain.document_loaders import WebBaseLoader, TextLoader, Docx2txtLoader, PyMuPDFLoader
17
+ from whatsapp_chat_custom import WhatsAppChatLoader # use this instead of from langchain.document_loaders import WhatsAppChatLoader
18
+
19
+ from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
20
+ from ibm_watson_machine_learning.foundation_models.utils.enums import DecodingMethods
21
+ from ibm_watson_machine_learning.foundation_models import Model
22
+ from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM
23
+ from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes
24
+
25
+ import genai
26
+
27
+ from collections import deque
28
+ import re
29
+ from bs4 import BeautifulSoup
30
+ import requests
31
+ from urllib.parse import urlparse
32
+ import mimetypes
33
+ from pathlib import Path
34
+ import tiktoken
35
+ from ttyd_functions import *
36
+ from ttyd_consts import *
37
+
38
+ ###############################################################################################
39
+
40
+ load_dotenv()
41
+ TTYD_MODE = os.getenv("TTYD_MODE",'')
42
+
43
+
44
+ # select the mode when starting container - modes options are in ttyd_consts.py
45
+ if TTYD_MODE.split('_')[0]=='personalBot':
46
+ mode = mode_arslan
47
+ if TTYD_MODE!='personalBot_Arslan':
48
+ user = TTYD_MODE.split('_')[1]
49
+ mode.title='## Talk to '+user
50
+ mode.welcomeMsg= welcomeMsgUser(user)
51
+
52
+ elif os.getenv("TTYD_MODE",'')=='nustian':
53
+ mode = mode_nustian
54
+ else:
55
+ mode = mode_general
56
+
57
+
58
+ if mode.type!='userInputDocs':
59
+ # local vector store as opposed to gradio state vector store, if we the user is not uploading the docs
60
+ vsDict_hard = localData_vecStore(getPersonalBotApiKey(), inputDir=mode.inputDir, file_list=mode.file_list, url_list=mode.url_list, gGrUrl=mode.gDriveFolder)
61
+
62
+ ###############################################################################################
63
+
64
+ # Gradio
65
+
66
+ ###############################################################################################
67
+
68
+ def setOaiApiKey(creds):
69
+ creds = getOaiCreds(creds)
70
+ try:
71
+ openai.Model.list(api_key=creds.get('oai_key','Null')) # test the API key
72
+ api_key_st = creds
73
+ return 'OpenAI credentials accepted.', *[x.update(interactive=False) for x in credComps_btn_tb], api_key_st
74
+ except Exception as e:
75
+ gr.Warning(str(e))
76
+ return [x.update() for x in credComps_op]
77
+
78
+ def setBamApiKey(creds):
79
+ creds = getBamCreds(creds)
80
+ try:
81
+ bam_models = genai.Model.models(credentials=creds['bam_creds'])
82
+ bam_models = sorted(x.id for x in bam_models)
83
+ api_key_st = creds
84
+ return 'BAM credentials accepted.', *[x.update(interactive=False) for x in credComps_btn_tb], api_key_st, model_dd.update(choices=getModelChoices(openAi_models, ModelTypes, bam_models))
85
+ except Exception as e:
86
+ gr.Warning(str(e))
87
+ return *[x.update() for x in credComps_op], model_dd.update()
88
+
89
+ def setWxApiKey(key, p_id):
90
+ creds = getWxCreds(key, p_id)
91
+ try:
92
+ Model(model_id='google/flan-ul2', credentials=creds['credentials'], project_id=creds['project_id']) # test the API key
93
+ api_key_st = creds
94
+ return 'Watsonx credentials accepted.', *[x.update(interactive=False) for x in credComps_btn_tb], api_key_st
95
+ except Exception as e:
96
+ gr.Warning(str(e))
97
+ return [x.update() for x in credComps_op]
98
+
99
+
100
+ # convert user uploaded data to vectorstore
101
+ def uiData_vecStore(userFiles, userUrls, api_key_st, vsDict_st={}, progress=gr.Progress()):
102
+ opComponents = [data_ingest_btn, upload_fb, urls_tb, initChatbot_btn]
103
+ # parse user data
104
+ file_paths = []
105
+ documents = []
106
+ if userFiles is not None:
107
+ if not isinstance(userFiles, list): userFiles = [userFiles]
108
+ file_paths = [file.name for file in userFiles]
109
+ userUrls = [x.strip() for x in userUrls.split(",")] if userUrls else []
110
+ #create documents
111
+ documents = data_ingestion(file_list=file_paths, url_list=userUrls, prog=progress)
112
+ if documents:
113
+ for file in file_paths:
114
+ os.remove(file)
115
+ else:
116
+ gr.Error('No documents found')
117
+ return {}, '', *[x.update() for x in opComponents]
118
+ # Splitting and Chunks
119
+ docs = split_docs(documents)
120
+ # Embeddings
121
+ try:
122
+ embeddings = getEmbeddingFunc(api_key_st)
123
+ except Exception as e:
124
+ gr.Error(str(e))
125
+ return {}, '', *[x.update() for x in opComponents]
126
+
127
+ progress(0.5, 'Creating Vector Database')
128
+ vsDict_st = getVsDict(embeddings, docs, vsDict_st)
129
+ # get sources from metadata
130
+ src_str = getSourcesFromMetadata(vsDict_st['chromaClient'].get()['metadatas'])
131
+ src_str = str(src_str[1]) + ' source document(s) successfully loaded in vector store.'+'\n\n' + src_str[0]
132
+
133
+ progress(1, 'Data loaded')
134
+ return vsDict_st, src_str, *[x.update(interactive=False) for x in [data_ingest_btn, upload_fb]], urls_tb.update(interactive=False, placeholder=''), initChatbot_btn.update(interactive=True)
135
+
136
+ # initialize chatbot function sets the QA Chain, and also sets/updates any other components to start chatting. updateQaChain function only updates QA chain and will be called whenever Adv Settings are updated.
137
+ def initializeChatbot(temp, k, modelNameDD, stdlQs, api_key_st, vsDict_st, progress=gr.Progress()):
138
+ progress(0.1, waitText_initialize)
139
+ chainTuple = updateQaChain(temp, k, modelNameDD, stdlQs, api_key_st, vsDict_st)
140
+ qa_chain_st = chainTuple[0]
141
+ progress(0.5, waitText_initialize)
142
+ #generate welcome message
143
+ if mode.welcomeMsg:
144
+ welMsg = mode.welcomeMsg
145
+ else:
146
+ welMsg = welcomeMsgDefault
147
+ print('Chatbot initialized at ', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
148
+
149
+ return qa_chain_st, chainTuple[1], btn.update(interactive=True), initChatbot_btn.update('Chatbot ready. Now visit the chatbot Tab.', interactive=False)\
150
+ , status_tb.update(), gr.Tabs.update(selected='cb'), chatbot.update(value=[('', welMsg)])
151
+
152
+ # just update the QA Chain, no updates to any UI
153
+ def updateQaChain(temp, k, modelNameDD, stdlQs, api_key_st, vsDict_st):
154
+ # if we are not adding data from ui, then use vsDict_hard as vectorstore
155
+ if vsDict_st=={} and mode.type!='userInputDocs': vsDict_st=vsDict_hard
156
+
157
+ if api_key_st['service']=='openai':
158
+ if not 'openai' in modelNameDD:
159
+ modelNameDD = changeModel(modelNameDD, OaiDefaultModel)
160
+ llm = getOaiLlm(temp, modelNameDD, api_key_st)
161
+ elif api_key_st['service']=='watsonx':
162
+ if not 'watsonx' in modelNameDD:
163
+ modelNameDD = changeModel(modelNameDD, WxDefaultModel)
164
+ llm = getWxLlm(temp, modelNameDD, api_key_st)
165
+ elif api_key_st['service']=='bam':
166
+ if not 'bam' in modelNameDD:
167
+ modelNameDD = changeModel(modelNameDD, BamDefaultModel)
168
+ llm = getBamLlm(temp, modelNameDD, api_key_st)
169
+ else:
170
+ raise Exception('Error: Invalid or None Credentials')
171
+ # settingsUpdated = 'Settings updated:'+ ' Model=' + modelName + ', Temp=' + str(temp)+ ', k=' + str(k)
172
+ # gr.Info(settingsUpdated)
173
+
174
+ if 'meta-llama/llama-2' in modelNameDD:
175
+ prompt = promptLlama
176
+ else:
177
+ prompt = None
178
+
179
+ # Now create QA Chain using the LLM
180
+ if stdlQs==0: # 0th index i.e. first option
181
+ qa_chain_st = RetrievalQA.from_llm(
182
+ llm=llm,
183
+ retriever=vsDict_st['chromaClient'].as_retriever(search_type="similarity", search_kwargs={"k": int(k)}),
184
+ return_source_documents=True,
185
+ prompt=prompt,
186
+ input_key = 'question', output_key='answer' # to align with ConversationalRetrievalChain for downstream functions
187
+ )
188
+ else:
189
+ rephQs = False if stdlQs==1 else True
190
+ qa_chain_st = ConversationalRetrievalChain.from_llm(
191
+ llm=llm,
192
+ retriever=vsDict_st['chromaClient'].as_retriever(search_type="similarity", search_kwargs={"k": int(k)}),
193
+ rephrase_question=rephQs,
194
+ return_source_documents=True,
195
+ return_generated_question=True,
196
+ combine_docs_chain_kwargs={'prompt':promptLlama}
197
+ )
198
+
199
+ return qa_chain_st, model_dd.update(value=modelNameDD)
200
+
201
+
202
+ def respond(message, chat_history, qa_chain):
203
+ result = qa_chain({'question': message, "chat_history": [tuple(x) for x in chat_history]})
204
+ src_docs = getSourcesFromMetadata([x.metadata for x in result["source_documents"]], sourceOnly=False)[0]
205
+ # streaming
206
+ streaming_answer = ""
207
+ for ele in "".join(result['answer']):
208
+ streaming_answer += ele
209
+ yield "", chat_history + [(message, streaming_answer)], src_docs, btn.update('Please wait...', interactive=False)
210
+
211
+ chat_history.extend([(message, result['answer'])])
212
+ yield "", chat_history, src_docs, btn.update('Send Message', interactive=True)
213
+
214
+ #####################################################################################################
215
+
216
+ with gr.Blocks(theme=gr.themes.Default(primary_hue='orange', secondary_hue='gray', neutral_hue='blue'), css="footer {visibility: hidden}") as demo:
217
+
218
+ # Initialize state variables - stored in this browser session - these can only be used within input or output of .click/.submit etc, not as a python var coz they are not stored in backend, only as a frontend gradio component
219
+ # but if you initialize it with a default value, that value will be stored in backend and accessible across all users. You can also change it with statear.value='newValue'
220
+ qa_state = gr.State()
221
+ api_key_state = gr.State(getPersonalBotApiKey() if mode.type=='personalBot' else {}) # can be string (OpenAI) or dict (WX)
222
+ chromaVS_state = gr.State({})
223
+
224
+
225
+ # Setup the Gradio Layout
226
+ gr.Markdown(mode.title)
227
+ with gr.Tabs() as tabs:
228
+ with gr.Tab('Initialization', id='init'):
229
+ with gr.Row():
230
+ with gr.Column():
231
+ oaiKey_tb = gr.Textbox(label="OpenAI API Key", type='password'\
232
+ , info='You can find OpenAI API key at https://platform.openai.com/account/api-keys')
233
+ oaiKey_btn = gr.Button("Submit OpenAI API Key")
234
+ with gr.Column():
235
+ with gr.Row():
236
+ wxKey_tb = gr.Textbox(label="Watsonx API Key", type='password'\
237
+ , info='You can find IBM Cloud API Key at Manage > Access (IAM) > API keys on https://cloud.ibm.com/iam/overview')
238
+ wxPid_tb = gr.Textbox(label="Watsonx Project ID"\
239
+ , info='You can find Project ID at Project -> Manage -> General -> Details on https://dataplatform.cloud.ibm.com/wx/home')
240
+ wxKey_btn = gr.Button("Submit Watsonx Credentials")
241
+ with gr.Column():
242
+ bamKey_tb = gr.Textbox(label="BAM API Key", type='password'\
243
+ , info='Internal IBMers only')
244
+ bamKey_btn = gr.Button("Submit BAM API Key")
245
+ with gr.Row(visible=mode.uiAddDataVis):
246
+ upload_fb = gr.Files(scale=5, label="Upload (multiple) Files - pdf/txt/docx supported", file_types=['.doc', '.docx', 'text', '.pdf', '.csv', '.ppt', '.pptx'])
247
+ urls_tb = gr.Textbox(scale=5, label="Enter URLs starting with https (comma separated)"\
248
+ , info=url_tb_info\
249
+ , placeholder=url_tb_ph)
250
+ data_ingest_btn = gr.Button("Load Data")
251
+ status_tb = gr.TextArea(label='Status Info')
252
+ initChatbot_btn = gr.Button("Initialize Chatbot", variant="primary", interactive=False)
253
+
254
+ credComps_btn_tb = [oaiKey_tb, oaiKey_btn, bamKey_tb, bamKey_btn, wxKey_tb, wxPid_tb, wxKey_btn]
255
+ credComps_op = [status_tb] + credComps_btn_tb + [api_key_state]
256
+
257
+ with gr.Tab('Chatbot', id='cb'):
258
+ with gr.Row():
259
+ chatbot = gr.Chatbot(label="Chat History", scale=2, avatar_images=(user_avatar, bot_avatar))
260
+ srcDocs = gr.TextArea(label="References")
261
+ msg = gr.Textbox(label="User Input",placeholder="Type your questions here")
262
+ with gr.Row():
263
+ btn = gr.Button("Send Message", interactive=False, variant="primary")
264
+ clear = gr.ClearButton(components=[msg, chatbot, srcDocs], value="Clear chat history")
265
+ with gr.Accordion("Advance Settings - click to expand", open=False):
266
+ with gr.Row():
267
+ with gr.Column():
268
+ temp_sld = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.7, label="Temperature", info='Sampling temperature to use when calling LLM. Defaults to 0.7')
269
+ k_sld = gr.Slider(minimum=1, maximum=10, step=1, value=mode.k, label="K", info='Number of relavant documents to return from Vector Store. Defaults to 4')
270
+ model_dd = gr.Dropdown(label='Model Name'\
271
+ , choices=getModelChoices(openAi_models, ModelTypes, bam_models_old), allow_custom_value=True\
272
+ , info=model_dd_info)
273
+ stdlQs_rb = gr.Radio(label='Standalone Question', info=stdlQs_rb_info\
274
+ , type='index', value=stdlQs_rb_choices[1]\
275
+ , choices=stdlQs_rb_choices)
276
+
277
+ ### Setup the Gradio Event Listeners
278
+
279
+ # OpenAI API button
280
+ oaiKey_btn_args = {'fn':setOaiApiKey, 'inputs':[oaiKey_tb], 'outputs':credComps_op}
281
+ oaiKey_btn.click(**oaiKey_btn_args)
282
+ oaiKey_tb.submit(**oaiKey_btn_args)
283
+
284
+ # BAM API button
285
+ bamKey_btn_args = {'fn':setBamApiKey, 'inputs':[bamKey_tb], 'outputs':credComps_op+[model_dd]}
286
+ bamKey_btn.click(**bamKey_btn_args)
287
+ bamKey_tb.submit(**bamKey_btn_args)
288
+
289
+ # Watsonx Creds button
290
+ wxKey_btn_args = {'fn':setWxApiKey, 'inputs':[wxKey_tb, wxPid_tb], 'outputs':credComps_op}
291
+ wxKey_btn.click(**wxKey_btn_args)
292
+
293
+ # Data Ingest Button
294
+ data_ingest_event = data_ingest_btn.click(uiData_vecStore, [upload_fb, urls_tb, api_key_state, chromaVS_state], [chromaVS_state, status_tb, data_ingest_btn, upload_fb, urls_tb, initChatbot_btn])
295
+
296
+ # Adv Settings
297
+ advSet_args = {'fn':updateQaChain, 'inputs':[temp_sld, k_sld, model_dd, stdlQs_rb, api_key_state, chromaVS_state], 'outputs':[qa_state, model_dd]}
298
+ temp_sld.release(**advSet_args)
299
+ k_sld.release(**advSet_args)
300
+ model_dd.change(**advSet_args)
301
+ stdlQs_rb.change(**advSet_args)
302
+
303
+ # Initialize button
304
+ initCb_args = {'fn':initializeChatbot, 'inputs':[temp_sld, k_sld, model_dd, stdlQs_rb, api_key_state, chromaVS_state], 'outputs':[qa_state, model_dd, btn, initChatbot_btn, status_tb, tabs, chatbot]}
305
+ if mode.type=='personalBot':
306
+ demo.load(**initCb_args) # load Chatbot UI directly on startup
307
+ initChatbot_btn.click(**initCb_args)
308
+
309
+ # Chatbot submit button
310
+ chat_btn_args = {'fn':respond, 'inputs':[msg, chatbot, qa_state], 'outputs':[msg, chatbot, srcDocs, btn]}
311
+ btn.click(**chat_btn_args)
312
+ msg.submit(**chat_btn_args)
313
+
314
+ demo.queue(concurrency_count=10)
315
+ demo.launch(show_error=True)