Spaces:

HH-AI-Org
/

HH-azure-openai-poc

Paused

App Files Files Community

Change Liao commited on Jul 23, 2023

Commit

a9bbb0b

1 Parent(s): ebf3e17

Update session key and related endpoint information

Browse files

Files changed (1) hide show

app.py +87 -85

app.py CHANGED Viewed

@@ -59,15 +59,16 @@ import pandas as pd
 #os env
 os.environ["OPENAI_API_TYPE"] = "azure"
 os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
-os.environ["OPENAI_API_BASE"] = "https://civet-project-001.openai.azure.com/"
-os.environ["OPENAI_API_KEY"] = "0e3e5b666818488fa1b5cb4e4238ffa7"
 os.environ["SERPAPI_API_KEY"] = "a5b67b8805b4e12b0ae147c9c6b2a7dbf3ab84fca5f24e531b6963b1f7fc1ff7"
-global_deployment_id = "CivetGPT"
-global_model_name = "gpt-35-turbo"
 #chroma settings
 chroma_api_impl = "HH_Azure_Openai"
 root_file_path = "./data/" #其實是data 存放的位置
 hr_source_path = "hr_source"
 ks_source_path = "ks_source"
@@ -79,7 +80,6 @@ persist_db = "persist_db"
 hr_collection_name = "hr_db"
 chroma_db_impl="localdb+langchain"
 tmp_collection="tmp_collection"
-davinci = "text-davinci-003"
 #global text setting
 inputText = "問題(按q 或Ctrl + c跳出): "
@@ -124,7 +124,9 @@ def get_openaiembeddings():
     return OpenAIEmbeddings(
         deployment="CivetGPT_embedding",
         model="text-embedding-ada-002",
-        #embed_batch_size=1
         chunk_size=1
     )
@@ -263,7 +265,8 @@ def local_vector_search(question_str,chat_history, collection_name = hr_collecti
     chat_llm = AzureChatOpenAI(
         deployment_name = global_deployment_id,
         model_name= global_model_name,
-        temperature = 0.2)
     prompt = PromptTemplate(
         template=get_prompt_template_string(),
@@ -289,24 +292,20 @@ def local_vector_search(question_str,chat_history, collection_name = hr_collecti
         description='Useful for when you need to answer questions about math.'
     )
-    search = SerpAPIWrapper()
-    search_tool = Tool(
-        name="Search",
-        func=search.run,
-        description="當你需要回答一般問題時,非常有用; 不可以用來回答任何跟鴻海有關的問題.",
-    )
-    tools=[math_tool,km_tool, search_tool]
     agent=initialize_agent(
-        agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
         tools=tools,
         llm=chat_llm,
         verbose=True,
         memory=memory,
         max_iterations=30,
     )
-    result=km_chain(question_str)
     #result=agent.run(question_str)
     return result["answer"]
 def make_markdown_table(array):
@@ -323,9 +322,6 @@ def get_hr_files():
     else:
         return make_markdown_table(files)
-def get_be_csv_files():
-    return make_markdown_table(glob.glob(root_file_path+believe_source_path+"/*.csv"))
 def update_hr_km(files):
     file_paths = [file.name for file in files]
     dest_file_path=root_file_path+hr_source_path
@@ -339,20 +335,6 @@ def update_hr_km(files):
     save_sqlite(sqlite_key, [Path(file_path).name for file_path in file_paths])
     return get_hr_files()
-def update_be_csv_km(files):
-    file_paths = [file.name for file in files]
-    dest_file_path=root_file_path+believe_source_path
-    if not os.path.exists(dest_file_path):
-        os.makedirs(dest_file_path)
-    for file in file_paths:
-        shutil.copy(file, dest_file_path)
-    if files == None:
-        return ''
-    else:
-        return make_markdown_table(files)
 def clear_all_collection(collection_name):
     pass
@@ -372,9 +354,6 @@ def clear_hr_datas():
     delete_sql(sqlite_key)
     return get_hr_files()
-def clear_be_csv_datas():
-    all_files_under_diretory(root_file_path+believe_source_path)
 def num_of_collection(collection_name):
     client = get_chroma_client(collection_name)
     number = client.get_collection(collection_name).count()
@@ -552,6 +531,7 @@ def kh_update_km(files):
     return num_of_collection(tmp_collection)
 class Logger:
     def __init__(self, filename):
         self.terminal = sys.stdout
@@ -568,6 +548,7 @@ class Logger:
     def isatty(self):
         return False
 def read_logs():
     sys.stdout.flush()
     ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
@@ -575,33 +556,6 @@ def read_logs():
     with open("output.log", "r", encoding='UTF-8') as f:
         return ansi_escape.sub('', f.read())
-def pandas_analysis(prompt_str, message, chat_history):
-    dir_path = f"{root_file_path}{believe_source_path}/*.csv"
-    res = glob.glob(dir_path)
-    df = pd.concat((pd.read_csv(f, encoding='unicode_escape') for f in res), ignore_index=True)
-    local_deploy_id= "text-davinci-003"
-    local_model_name = "text-davinci-003"
-    llm = AzureOpenAI(
-        deployment_name=local_deploy_id,
-        model_name=local_model_name,
-        max_tokens=2000,
-        temperature=0,
-    )
-    be_agent = create_pandas_dataframe_agent(
-        llm,
-        df,
-        prefix="Remove any ` from the Action Input",
-        max_iterations=30,
-        return_intermediate_steps=False,
-        max_execution_time=60,
-        handle_parsing_errors="Check your output and make sure it conforms!",
-        verbose=True)
-    new_str = prompt_str.format(message=message, chat_history=chat_history)
-    print(new_str)
-    answer = be_agent.run(new_str)
-    chat_history.append((message, answer))
-    return '', chat_history
 def lunch_style(demo, logs=gr.Text()):
     sys.stdout = Logger("output.log")
@@ -626,6 +580,7 @@ def lunch_style(demo, logs=gr.Text()):
 def gradio_run():
     print("User Login")
     with gr.Blocks(theme='bethecloud/storj_theme') as demo:
         with gr.Row():
             gr.Markdown("# HH Azure Openai Demo")
         #Header section
@@ -718,7 +673,7 @@ def gradio_run():
                             cleanDataBtn = gr.Button(value="刪除所有知識以及檔案")
                             cleanDataBtn.click(clear_hr_datas,outputs=file_list)
-                with gr.Column(scale=1):
                     with gr.Row():
                         with gr.Column():
                             tmp_file = gr.File(LOOPING_TALKING_HEAD, visible=False)
@@ -733,10 +688,16 @@ def gradio_run():
                             audio_html = gr.HTML(htm_audio, visible=False)
                         with gr.Column():
                             isAudio = gr.Checkbox(label="是否要有語音", info="要開啟語音嗎?查詢時間會增長")
                     with gr.Row():
-                        chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=600)
                     with gr.Row():
                         with gr.Column(scale=5):
                             msg = gr.Textbox(
@@ -749,7 +710,6 @@ def gradio_run():
                     def respond(message, chat_history):
                         vector_search_message = local_vector_search(message, chat_history)
                         chat_history.append((message, vector_search_message))
-                        print("vector_search:"+vector_search_message)
                         if isAudio.value is False:
                             print("isAudio is False")
                             return '', chat_history, htm_video, ''
@@ -776,17 +736,59 @@ def gradio_run():
                 with gr.Column(scale=1):
                     youtube_summary_textbox=gr.Textbox(interactive=False, label="AI 解析", lines=20)
             youtube_analysis_btn.click(youtube_summary,youtube_link,youtube_summary_textbox)
-        with gr.Tab("統計助手"):
             with gr.Row():
                 gr.Markdown("""
 ### 使用方式
-已經讀取所有提供的csv 資料, 可以詢問資料任何問題(Talk to data)
-建議先詢問欄位後, 後續再構思其他問題
                         """)
             with gr.Row():
                 with gr.Column():
-                    tmp_chatbot = gr.Chatbot(value=[], elem_id="tmp_chatbot").style(height=700)
                     with gr.Row():
                         with gr.Column(scale=5):
                             tmp_msg = gr.Textbox(show_label=False,placeholder="輸入你的問題",)
@@ -794,30 +796,30 @@ def gradio_run():
                             tmp_clear = gr.Button("清除對話")
                 with gr.Column():
                     prompt_textbox = gr.Textbox("""
-你是一位資料科學家,提供給你的資料是2023年每一週的click 次數.
 有下列定義:
-1. 欄位 23W01 代表2023年的第一個星期; 23W02 代表2023年的第二個星期
 請以中文回答我下面的問題:{message}
                             """, lines=10, label="Prompt:有{chat_history}及{message}, 請至少保留{message}變數",interactive=True, max_lines=10)
-                    be_csv_file_list=gr.Textbox(get_be_csv_files, label="CSV Files", placeholder="沒有任何檔案存在", max_lines=10, lines=10)
-                    upload_button = gr.UploadButton("上傳統計資料(.csv格式)",file_types=[".csv"],file_count="multiple")
-                    upload_button.upload(update_be_csv_km, inputs=upload_button, outputs=be_csv_file_list).then(get_be_csv_files,outputs=be_csv_file_list)
-                    cleanDataBtn = gr.Button(value="刪除所有csv 資料")
-                    cleanDataBtn.click(clear_be_csv_datas, outputs=be_csv_file_list).then(get_be_csv_files, outputs=be_csv_file_list)
-                tmp_msg.submit(pandas_analysis, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot],queue=True)
                 tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False)
             with gr.Row():
                 gr.Examples([
-                    '你有什麼欄位?'
                     ], label="訊息範例", inputs=tmp_msg)
-            with gr.Row():
-                console = gr.Textbox(lines=11, label="Console", max_lines=11)
         demo.queue(concurrency_count=10)
         lunch_style(demo,console)
 gradio_run()

 #os env
 os.environ["OPENAI_API_TYPE"] = "azure"
 os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
+os.environ["OPENAI_API_BASE"] = "https://hh-azure-openai-poc.openai.azure.com/"
+os.environ["OPENAI_API_KEY"] = "41dd3ccda6a2489db375f3fe2a440953"
 os.environ["SERPAPI_API_KEY"] = "a5b67b8805b4e12b0ae147c9c6b2a7dbf3ab84fca5f24e531b6963b1f7fc1ff7"
+global_deployment_id = "gpt-35-turbo-16k"
+global_model_name = "gpt-35-turbo-16k"
 #chroma settings
 chroma_api_impl = "HH_Azure_Openai"
+#root_file_path = "C:\\Users\\catsk\\SourceCode\\azure_openai_poc\\data\\"
 root_file_path = "./data/" #其實是data 存放的位置
 hr_source_path = "hr_source"
 ks_source_path = "ks_source"
 hr_collection_name = "hr_db"
 chroma_db_impl="localdb+langchain"
 tmp_collection="tmp_collection"
 #global text setting
 inputText = "問題(按q 或Ctrl + c跳出): "
     return OpenAIEmbeddings(
         deployment="CivetGPT_embedding",
         model="text-embedding-ada-002",
+        openai_api_base="https://civet-project-001.openai.azure.com/",
+        openai_api_type="azure",
+        openai_api_key = "0e3e5b666818488fa1b5cb4e4238ffa7",
         chunk_size=1
     )
     chat_llm = AzureChatOpenAI(
         deployment_name = global_deployment_id,
         model_name= global_model_name,
+        temperature = 0.0)
     prompt = PromptTemplate(
         template=get_prompt_template_string(),
         description='Useful for when you need to answer questions about math.'
     )
+    tools=[math_tool,km_tool]
     agent=initialize_agent(
+        agent=AgentType.OPENAI_FUNCTIONS,
         tools=tools,
         llm=chat_llm,
         verbose=True,
         memory=memory,
         max_iterations=30,
     )
+    print("query string:"+question_str)
+    result=km_chain(question_str)
     #result=agent.run(question_str)
+    print(result)
     return result["answer"]
 def make_markdown_table(array):
     else:
         return make_markdown_table(files)
 def update_hr_km(files):
     file_paths = [file.name for file in files]
     dest_file_path=root_file_path+hr_source_path
     save_sqlite(sqlite_key, [Path(file_path).name for file_path in file_paths])
     return get_hr_files()
 def clear_all_collection(collection_name):
     pass
     delete_sql(sqlite_key)
     return get_hr_files()
 def num_of_collection(collection_name):
     client = get_chroma_client(collection_name)
     number = client.get_collection(collection_name).count()
     return num_of_collection(tmp_collection)
 class Logger:
     def __init__(self, filename):
         self.terminal = sys.stdout
     def isatty(self):
         return False
 def read_logs():
     sys.stdout.flush()
     ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
     with open("output.log", "r", encoding='UTF-8') as f:
         return ansi_escape.sub('', f.read())
 def lunch_style(demo, logs=gr.Text()):
     sys.stdout = Logger("output.log")
 def gradio_run():
     print("User Login")
     with gr.Blocks(theme='bethecloud/storj_theme') as demo:
         with gr.Row():
             gr.Markdown("# HH Azure Openai Demo")
         #Header section
                             cleanDataBtn = gr.Button(value="刪除所有知識以及檔案")
                             cleanDataBtn.click(clear_hr_datas,outputs=file_list)
+                with gr.Column(scale=3):
                     with gr.Row():
                         with gr.Column():
                             tmp_file = gr.File(LOOPING_TALKING_HEAD, visible=False)
                             audio_html = gr.HTML(htm_audio, visible=False)
                         with gr.Column():
                             isAudio = gr.Checkbox(label="是否要有語音", info="要開啟語音嗎?查詢時間會增長")
+                            gr.Markdown("""
+                            ### AI 虛擬客服:
+                            * 這是一個實驗性質的AI 客服
+                            * 講話超過15秒就不會產生,正在要求放寬限制
+                            * 想要放誰的頭像都可以, 要放董事長也可以.
+                            * 訂閱制(有效時間 6/13~7/13)
+                            """)
                     with gr.Row():
+                        chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=400)
                     with gr.Row():
                         with gr.Column(scale=5):
                             msg = gr.Textbox(
                     def respond(message, chat_history):
                         vector_search_message = local_vector_search(message, chat_history)
                         chat_history.append((message, vector_search_message))
                         if isAudio.value is False:
                             print("isAudio is False")
                             return '', chat_history, htm_video, ''
                 with gr.Column(scale=1):
                     youtube_summary_textbox=gr.Textbox(interactive=False, label="AI 解析", lines=20)
             youtube_analysis_btn.click(youtube_summary,youtube_link,youtube_summary_textbox)
+        with gr.Tab("相信人員統計助手"):
+            mypath = root_file_path + believe_source_path
+            onlyfiles = os.listdir(mypath)
+            df = pd.concat((pd.read_csv(os.path.join(mypath, filename),encoding = "ISO-8859-1") for filename in onlyfiles))
             with gr.Row():
                 gr.Markdown("""
 ### 使用方式
+資料裡有 `相信` 的active user 資料,
+右方己經有先算出平均每個問題花費多少, 隨意詢問算法AI 即可算出多少費用.
+若要改費用, 請在右方prompt 更改數字
                         """)
+                invField = gr.Textbox(visible=False)
+                gr.Examples(onlyfiles, label="資料庫檔案", inputs=invField, examples_per_page=4)
             with gr.Row():
                 with gr.Column():
+                    llm = AzureOpenAI(
+                        deployment_name=global_deployment_id,
+                        model_name=global_model_name,
+                        max_tokens=2000,
+                        temperature=0,
+                    )
+                    be_agent = create_pandas_dataframe_agent(
+                        llm,
+                        df,
+                        max_iterations=30,
+                        return_intermediate_steps=False,
+                        max_execution_time=60,
+                        handle_parsing_errors="Check your output and make sure it conforms!",
+                        verbose=True)
+                    def tmp_respond(prompt_str, message, chat_history):
+                        new_str = prompt_str.format(message=message, chat_history=chat_history)
+                        answer = be_agent.run(new_str)
+                        chat_history.append((message, answer))
+                        """
+                        try:
+                            new_str = prompt_str.format(message=message, chat_history=chat_history)
+                            answer = be_agent.run(new_str)
+                            chat_history.append((message, answer))
+                        except Exception as e:
+                            response = str(e)
+                            print(f"Got error!{response}")
+                            if not response.startswith("Could not parse LLM output: `"):
+                                raise e
+                            answer = response.removeprefix("Could not parse LLM output: `").removesuffix("`")
+                            print("answer:"+answer)
+                            chat_history.append((message, answer))
+                        """
+                        return '', chat_history
+                    tmp_chatbot = gr.Chatbot(value=[], elem_id="tmp_chatbot").style(height=500)
                     with gr.Row():
                         with gr.Column(scale=5):
                             tmp_msg = gr.Textbox(show_label=False,placeholder="輸入你的問題",)
                             tmp_clear = gr.Button("清除對話")
                 with gr.Column():
                     prompt_textbox = gr.Textbox("""
+你是一位專業資料科學家,提供給你的是研究列表.
 有下列定義:
+1.Title是研究報告的標題
 請以中文回答我下面的問題:{message}
                             """, lines=10, label="Prompt:有{chat_history}及{message}, 請至少保留{message}變數",interactive=True, max_lines=10)
+                    console = gr.Textbox(lines=11, label="Console", max_lines=11)
+                tmp_msg.submit(tmp_respond, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot],queue=True)
                 tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False)
             with gr.Row():
                 gr.Examples([
+                    '你有什麼欄位?',
+                    '資料裡有屬於台灣(TW)的員工有多少位?',
+                    '全台灣的員工, 每人每天問五個問題, 1個月花費多少錢?',
+                    '如果龍華廠區的員工每人每天問3個問題,台灣員工每人每天問7個問題, 請問這樣一個月多少錢?'
                     ], label="訊息範例", inputs=tmp_msg)
         demo.queue(concurrency_count=10)
         lunch_style(demo,console)
+def test():
+    mypath = "C:\\Users\\catsk\\SourceCode\\azure_openai_poc\\data\\ks_source_files"
+    onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
+    print(onlyfiles)
 gradio_run()