Change Liao commited on
Commit
a9bbb0b
·
1 Parent(s): ebf3e17

Update session key and related endpoint information

Browse files
Files changed (1) hide show
  1. app.py +87 -85
app.py CHANGED
@@ -59,15 +59,16 @@ import pandas as pd
59
  #os env
60
  os.environ["OPENAI_API_TYPE"] = "azure"
61
  os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
62
- os.environ["OPENAI_API_BASE"] = "https://civet-project-001.openai.azure.com/"
63
- os.environ["OPENAI_API_KEY"] = "0e3e5b666818488fa1b5cb4e4238ffa7"
64
  os.environ["SERPAPI_API_KEY"] = "a5b67b8805b4e12b0ae147c9c6b2a7dbf3ab84fca5f24e531b6963b1f7fc1ff7"
65
 
66
- global_deployment_id = "CivetGPT"
67
- global_model_name = "gpt-35-turbo"
68
 
69
  #chroma settings
70
  chroma_api_impl = "HH_Azure_Openai"
 
71
  root_file_path = "./data/" #其實是data 存放的位置
72
  hr_source_path = "hr_source"
73
  ks_source_path = "ks_source"
@@ -79,7 +80,6 @@ persist_db = "persist_db"
79
  hr_collection_name = "hr_db"
80
  chroma_db_impl="localdb+langchain"
81
  tmp_collection="tmp_collection"
82
- davinci = "text-davinci-003"
83
 
84
  #global text setting
85
  inputText = "問題(按q 或Ctrl + c跳出): "
@@ -124,7 +124,9 @@ def get_openaiembeddings():
124
  return OpenAIEmbeddings(
125
  deployment="CivetGPT_embedding",
126
  model="text-embedding-ada-002",
127
- #embed_batch_size=1
 
 
128
  chunk_size=1
129
  )
130
 
@@ -263,7 +265,8 @@ def local_vector_search(question_str,chat_history, collection_name = hr_collecti
263
  chat_llm = AzureChatOpenAI(
264
  deployment_name = global_deployment_id,
265
  model_name= global_model_name,
266
- temperature = 0.2)
 
267
 
268
  prompt = PromptTemplate(
269
  template=get_prompt_template_string(),
@@ -289,24 +292,20 @@ def local_vector_search(question_str,chat_history, collection_name = hr_collecti
289
  description='Useful for when you need to answer questions about math.'
290
  )
291
 
292
- search = SerpAPIWrapper()
293
- search_tool = Tool(
294
- name="Search",
295
- func=search.run,
296
- description="當你需要回答一般問題時,非常有用; 不可以用來回答任何跟鴻海有關的問題.",
297
- )
298
- tools=[math_tool,km_tool, search_tool]
299
  agent=initialize_agent(
300
- agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
301
  tools=tools,
302
  llm=chat_llm,
303
  verbose=True,
304
  memory=memory,
305
  max_iterations=30,
306
  )
307
- result=km_chain(question_str)
308
 
 
309
  #result=agent.run(question_str)
 
310
  return result["answer"]
311
 
312
  def make_markdown_table(array):
@@ -323,9 +322,6 @@ def get_hr_files():
323
  else:
324
  return make_markdown_table(files)
325
 
326
- def get_be_csv_files():
327
- return make_markdown_table(glob.glob(root_file_path+believe_source_path+"/*.csv"))
328
-
329
  def update_hr_km(files):
330
  file_paths = [file.name for file in files]
331
  dest_file_path=root_file_path+hr_source_path
@@ -339,20 +335,6 @@ def update_hr_km(files):
339
  save_sqlite(sqlite_key, [Path(file_path).name for file_path in file_paths])
340
  return get_hr_files()
341
 
342
- def update_be_csv_km(files):
343
- file_paths = [file.name for file in files]
344
- dest_file_path=root_file_path+believe_source_path
345
- if not os.path.exists(dest_file_path):
346
- os.makedirs(dest_file_path)
347
-
348
- for file in file_paths:
349
- shutil.copy(file, dest_file_path)
350
-
351
- if files == None:
352
- return ''
353
- else:
354
- return make_markdown_table(files)
355
-
356
  def clear_all_collection(collection_name):
357
  pass
358
 
@@ -372,9 +354,6 @@ def clear_hr_datas():
372
  delete_sql(sqlite_key)
373
  return get_hr_files()
374
 
375
- def clear_be_csv_datas():
376
- all_files_under_diretory(root_file_path+believe_source_path)
377
-
378
  def num_of_collection(collection_name):
379
  client = get_chroma_client(collection_name)
380
  number = client.get_collection(collection_name).count()
@@ -552,6 +531,7 @@ def kh_update_km(files):
552
 
553
  return num_of_collection(tmp_collection)
554
 
 
555
  class Logger:
556
  def __init__(self, filename):
557
  self.terminal = sys.stdout
@@ -568,6 +548,7 @@ class Logger:
568
  def isatty(self):
569
  return False
570
 
 
571
  def read_logs():
572
  sys.stdout.flush()
573
  ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
@@ -575,33 +556,6 @@ def read_logs():
575
  with open("output.log", "r", encoding='UTF-8') as f:
576
  return ansi_escape.sub('', f.read())
577
 
578
- def pandas_analysis(prompt_str, message, chat_history):
579
- dir_path = f"{root_file_path}{believe_source_path}/*.csv"
580
- res = glob.glob(dir_path)
581
- df = pd.concat((pd.read_csv(f, encoding='unicode_escape') for f in res), ignore_index=True)
582
- local_deploy_id= "text-davinci-003"
583
- local_model_name = "text-davinci-003"
584
- llm = AzureOpenAI(
585
- deployment_name=local_deploy_id,
586
- model_name=local_model_name,
587
- max_tokens=2000,
588
- temperature=0,
589
- )
590
-
591
- be_agent = create_pandas_dataframe_agent(
592
- llm,
593
- df,
594
- prefix="Remove any ` from the Action Input",
595
- max_iterations=30,
596
- return_intermediate_steps=False,
597
- max_execution_time=60,
598
- handle_parsing_errors="Check your output and make sure it conforms!",
599
- verbose=True)
600
- new_str = prompt_str.format(message=message, chat_history=chat_history)
601
- print(new_str)
602
- answer = be_agent.run(new_str)
603
- chat_history.append((message, answer))
604
- return '', chat_history
605
 
606
  def lunch_style(demo, logs=gr.Text()):
607
  sys.stdout = Logger("output.log")
@@ -626,6 +580,7 @@ def lunch_style(demo, logs=gr.Text()):
626
  def gradio_run():
627
  print("User Login")
628
  with gr.Blocks(theme='bethecloud/storj_theme') as demo:
 
629
  with gr.Row():
630
  gr.Markdown("# HH Azure Openai Demo")
631
  #Header section
@@ -718,7 +673,7 @@ def gradio_run():
718
  cleanDataBtn = gr.Button(value="刪除所有知識以及檔案")
719
  cleanDataBtn.click(clear_hr_datas,outputs=file_list)
720
 
721
- with gr.Column(scale=1):
722
  with gr.Row():
723
  with gr.Column():
724
  tmp_file = gr.File(LOOPING_TALKING_HEAD, visible=False)
@@ -733,10 +688,16 @@ def gradio_run():
733
  audio_html = gr.HTML(htm_audio, visible=False)
734
  with gr.Column():
735
  isAudio = gr.Checkbox(label="是否要有語音", info="要開啟語音嗎?查詢時間會增長")
736
-
 
 
 
 
 
 
737
 
738
  with gr.Row():
739
- chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=600)
740
  with gr.Row():
741
  with gr.Column(scale=5):
742
  msg = gr.Textbox(
@@ -749,7 +710,6 @@ def gradio_run():
749
  def respond(message, chat_history):
750
  vector_search_message = local_vector_search(message, chat_history)
751
  chat_history.append((message, vector_search_message))
752
- print("vector_search:"+vector_search_message)
753
  if isAudio.value is False:
754
  print("isAudio is False")
755
  return '', chat_history, htm_video, ''
@@ -776,17 +736,59 @@ def gradio_run():
776
  with gr.Column(scale=1):
777
  youtube_summary_textbox=gr.Textbox(interactive=False, label="AI 解析", lines=20)
778
  youtube_analysis_btn.click(youtube_summary,youtube_link,youtube_summary_textbox)
779
- with gr.Tab("統計助手"):
 
 
 
 
 
 
780
  with gr.Row():
781
  gr.Markdown("""
782
  ### 使用方式
783
- 已經讀取所有提供的csv 資料, 可以詢問資料任何問題(Talk to data)
784
- 建議先詢問欄位後, 後續再構思其他問題
 
785
  """)
786
-
 
787
  with gr.Row():
788
  with gr.Column():
789
- tmp_chatbot = gr.Chatbot(value=[], elem_id="tmp_chatbot").style(height=700)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
790
  with gr.Row():
791
  with gr.Column(scale=5):
792
  tmp_msg = gr.Textbox(show_label=False,placeholder="輸入你的問題",)
@@ -794,30 +796,30 @@ def gradio_run():
794
  tmp_clear = gr.Button("清除對話")
795
  with gr.Column():
796
  prompt_textbox = gr.Textbox("""
797
- 你是一位資料科學家,提供給你的資料是2023年每一週的click 次數.
798
  有下列定義:
799
 
800
- 1. 欄位 23W01 代表2023年的第一個星期; 23W02 代表2023年的第二個星期
801
 
802
  請以中文回答我下面的問題:{message}
803
  """, lines=10, label="Prompt:有{chat_history}及{message}, 請至少保留{message}變數",interactive=True, max_lines=10)
804
- be_csv_file_list=gr.Textbox(get_be_csv_files, label="CSV Files", placeholder="沒有任何檔案存在", max_lines=10, lines=10)
805
- upload_button = gr.UploadButton("上傳統計資料(.csv格式)",file_types=[".csv"],file_count="multiple")
806
- upload_button.upload(update_be_csv_km, inputs=upload_button, outputs=be_csv_file_list).then(get_be_csv_files,outputs=be_csv_file_list)
807
-
808
- cleanDataBtn = gr.Button(value="刪除所有csv 資料")
809
- cleanDataBtn.click(clear_be_csv_datas, outputs=be_csv_file_list).then(get_be_csv_files, outputs=be_csv_file_list)
810
- tmp_msg.submit(pandas_analysis, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot],queue=True)
811
  tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False)
812
  with gr.Row():
813
  gr.Examples([
814
- '你有什麼欄位?'
 
 
 
815
  ], label="訊息範例", inputs=tmp_msg)
816
- with gr.Row():
817
- console = gr.Textbox(lines=11, label="Console", max_lines=11)
818
  demo.queue(concurrency_count=10)
819
  lunch_style(demo,console)
820
 
 
 
 
 
821
  gradio_run()
822
 
823
-
 
59
  #os env
60
  os.environ["OPENAI_API_TYPE"] = "azure"
61
  os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
62
+ os.environ["OPENAI_API_BASE"] = "https://hh-azure-openai-poc.openai.azure.com/"
63
+ os.environ["OPENAI_API_KEY"] = "41dd3ccda6a2489db375f3fe2a440953"
64
  os.environ["SERPAPI_API_KEY"] = "a5b67b8805b4e12b0ae147c9c6b2a7dbf3ab84fca5f24e531b6963b1f7fc1ff7"
65
 
66
+ global_deployment_id = "gpt-35-turbo-16k"
67
+ global_model_name = "gpt-35-turbo-16k"
68
 
69
  #chroma settings
70
  chroma_api_impl = "HH_Azure_Openai"
71
+ #root_file_path = "C:\\Users\\catsk\\SourceCode\\azure_openai_poc\\data\\"
72
  root_file_path = "./data/" #其實是data 存放的位置
73
  hr_source_path = "hr_source"
74
  ks_source_path = "ks_source"
 
80
  hr_collection_name = "hr_db"
81
  chroma_db_impl="localdb+langchain"
82
  tmp_collection="tmp_collection"
 
83
 
84
  #global text setting
85
  inputText = "問題(按q 或Ctrl + c跳出): "
 
124
  return OpenAIEmbeddings(
125
  deployment="CivetGPT_embedding",
126
  model="text-embedding-ada-002",
127
+ openai_api_base="https://civet-project-001.openai.azure.com/",
128
+ openai_api_type="azure",
129
+ openai_api_key = "0e3e5b666818488fa1b5cb4e4238ffa7",
130
  chunk_size=1
131
  )
132
 
 
265
  chat_llm = AzureChatOpenAI(
266
  deployment_name = global_deployment_id,
267
  model_name= global_model_name,
268
+ temperature = 0.0)
269
+
270
 
271
  prompt = PromptTemplate(
272
  template=get_prompt_template_string(),
 
292
  description='Useful for when you need to answer questions about math.'
293
  )
294
 
295
+ tools=[math_tool,km_tool]
 
 
 
 
 
 
296
  agent=initialize_agent(
297
+ agent=AgentType.OPENAI_FUNCTIONS,
298
  tools=tools,
299
  llm=chat_llm,
300
  verbose=True,
301
  memory=memory,
302
  max_iterations=30,
303
  )
304
+ print("query string:"+question_str)
305
 
306
+ result=km_chain(question_str)
307
  #result=agent.run(question_str)
308
+ print(result)
309
  return result["answer"]
310
 
311
  def make_markdown_table(array):
 
322
  else:
323
  return make_markdown_table(files)
324
 
 
 
 
325
  def update_hr_km(files):
326
  file_paths = [file.name for file in files]
327
  dest_file_path=root_file_path+hr_source_path
 
335
  save_sqlite(sqlite_key, [Path(file_path).name for file_path in file_paths])
336
  return get_hr_files()
337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  def clear_all_collection(collection_name):
339
  pass
340
 
 
354
  delete_sql(sqlite_key)
355
  return get_hr_files()
356
 
 
 
 
357
  def num_of_collection(collection_name):
358
  client = get_chroma_client(collection_name)
359
  number = client.get_collection(collection_name).count()
 
531
 
532
  return num_of_collection(tmp_collection)
533
 
534
+
535
  class Logger:
536
  def __init__(self, filename):
537
  self.terminal = sys.stdout
 
548
  def isatty(self):
549
  return False
550
 
551
+
552
  def read_logs():
553
  sys.stdout.flush()
554
  ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
 
556
  with open("output.log", "r", encoding='UTF-8') as f:
557
  return ansi_escape.sub('', f.read())
558
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
559
 
560
  def lunch_style(demo, logs=gr.Text()):
561
  sys.stdout = Logger("output.log")
 
580
  def gradio_run():
581
  print("User Login")
582
  with gr.Blocks(theme='bethecloud/storj_theme') as demo:
583
+
584
  with gr.Row():
585
  gr.Markdown("# HH Azure Openai Demo")
586
  #Header section
 
673
  cleanDataBtn = gr.Button(value="刪除所有知識以及檔案")
674
  cleanDataBtn.click(clear_hr_datas,outputs=file_list)
675
 
676
+ with gr.Column(scale=3):
677
  with gr.Row():
678
  with gr.Column():
679
  tmp_file = gr.File(LOOPING_TALKING_HEAD, visible=False)
 
688
  audio_html = gr.HTML(htm_audio, visible=False)
689
  with gr.Column():
690
  isAudio = gr.Checkbox(label="是否要有語音", info="要開啟語音嗎?查詢時間會增長")
691
+ gr.Markdown("""
692
+ ### AI 虛擬客服:
693
+ * 這是一個實驗性質的AI 客服
694
+ * 講話超過15秒就不會產生,正在要求放寬限制
695
+ * 想要放誰的頭像都可以, 要放董事長也可以.
696
+ * 訂閱制(有效時間 6/13~7/13)
697
+ """)
698
 
699
  with gr.Row():
700
+ chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=400)
701
  with gr.Row():
702
  with gr.Column(scale=5):
703
  msg = gr.Textbox(
 
710
  def respond(message, chat_history):
711
  vector_search_message = local_vector_search(message, chat_history)
712
  chat_history.append((message, vector_search_message))
 
713
  if isAudio.value is False:
714
  print("isAudio is False")
715
  return '', chat_history, htm_video, ''
 
736
  with gr.Column(scale=1):
737
  youtube_summary_textbox=gr.Textbox(interactive=False, label="AI 解析", lines=20)
738
  youtube_analysis_btn.click(youtube_summary,youtube_link,youtube_summary_textbox)
739
+
740
+ with gr.Tab("相信人員統計助手"):
741
+
742
+ mypath = root_file_path + believe_source_path
743
+ onlyfiles = os.listdir(mypath)
744
+ df = pd.concat((pd.read_csv(os.path.join(mypath, filename),encoding = "ISO-8859-1") for filename in onlyfiles))
745
+
746
  with gr.Row():
747
  gr.Markdown("""
748
  ### 使用方式
749
+ 資料裡有 `相信` 的active user 資料,
750
+ 右方己經有先算出平均每個問題花費多少, 隨意詢問算法AI 即可算出多少費用.
751
+ 若要改費用, 請在右方prompt 更改數字
752
  """)
753
+ invField = gr.Textbox(visible=False)
754
+ gr.Examples(onlyfiles, label="資料庫檔案", inputs=invField, examples_per_page=4)
755
  with gr.Row():
756
  with gr.Column():
757
+ llm = AzureOpenAI(
758
+ deployment_name=global_deployment_id,
759
+ model_name=global_model_name,
760
+ max_tokens=2000,
761
+ temperature=0,
762
+ )
763
+ be_agent = create_pandas_dataframe_agent(
764
+ llm,
765
+ df,
766
+ max_iterations=30,
767
+ return_intermediate_steps=False,
768
+ max_execution_time=60,
769
+ handle_parsing_errors="Check your output and make sure it conforms!",
770
+ verbose=True)
771
+ def tmp_respond(prompt_str, message, chat_history):
772
+ new_str = prompt_str.format(message=message, chat_history=chat_history)
773
+ answer = be_agent.run(new_str)
774
+ chat_history.append((message, answer))
775
+ """
776
+ try:
777
+ new_str = prompt_str.format(message=message, chat_history=chat_history)
778
+ answer = be_agent.run(new_str)
779
+ chat_history.append((message, answer))
780
+ except Exception as e:
781
+ response = str(e)
782
+ print(f"Got error!{response}")
783
+ if not response.startswith("Could not parse LLM output: `"):
784
+ raise e
785
+ answer = response.removeprefix("Could not parse LLM output: `").removesuffix("`")
786
+ print("answer:"+answer)
787
+ chat_history.append((message, answer))
788
+ """
789
+ return '', chat_history
790
+
791
+ tmp_chatbot = gr.Chatbot(value=[], elem_id="tmp_chatbot").style(height=500)
792
  with gr.Row():
793
  with gr.Column(scale=5):
794
  tmp_msg = gr.Textbox(show_label=False,placeholder="輸入你的問題",)
 
796
  tmp_clear = gr.Button("清除對話")
797
  with gr.Column():
798
  prompt_textbox = gr.Textbox("""
799
+ 你是一位專業資料科學家,提供給你的是研究列表.
800
  有下列定義:
801
 
802
+ 1.Title是研究報告的標題
803
 
804
  請以中文回答我下面的問題:{message}
805
  """, lines=10, label="Prompt:有{chat_history}及{message}, 請至少保留{message}變數",interactive=True, max_lines=10)
806
+ console = gr.Textbox(lines=11, label="Console", max_lines=11)
807
+ tmp_msg.submit(tmp_respond, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot],queue=True)
 
 
 
 
 
808
  tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False)
809
  with gr.Row():
810
  gr.Examples([
811
+ '你有什麼欄位?',
812
+ '資料裡有屬於台灣(TW)的員工有多少位?',
813
+ '全台灣的員工, 每人每天問五個問題, 1個月花費多少錢?',
814
+ '如果龍華廠區的員工每人每天問3個問題,台灣員工每人每天問7個問題, 請問這樣一個月多少錢?'
815
  ], label="訊息範例", inputs=tmp_msg)
816
+
 
817
  demo.queue(concurrency_count=10)
818
  lunch_style(demo,console)
819
 
820
+ def test():
821
+ mypath = "C:\\Users\\catsk\\SourceCode\\azure_openai_poc\\data\\ks_source_files"
822
+ onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
823
+ print(onlyfiles)
824
  gradio_run()
825