Spaces:

HH-AI-Org
/

HH-azure-openai-poc

Paused

App Files Files Community

Change Liao commited on Jul 7, 2023

Commit

65056d9

1 Parent(s): 25be1b2

update requriements.txt

Browse files

Files changed (5) hide show

app.py +142 -99
cache.sqlite3 +0 -0
data/audios/tempfile.mp3 +0 -0
data/videos/tempfile.mp4 +0 -0
requirements.txt +18 -5

app.py CHANGED Viewed

@@ -15,23 +15,34 @@ from sqlitedict import SqliteDict
 import gradio as gr
 from langchain.llms import AzureOpenAI
 from langchain.chat_models import AzureChatOpenAI
 from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ChatMessageHistory
-from langchain import PromptTemplate
 from langchain.vectorstores import Chroma
 from langchain.text_splitter import CharacterTextSplitter
-from langchain.memory import ConversationBufferMemory
-from langchain.document_loaders import DirectoryLoader
 from langchain.document_loaders import UnstructuredFileLoader
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.chains.summarize import load_summarize_chain
 import clickhouse_connect
 from pathlib import Path
@@ -41,21 +52,25 @@ from langchain.document_loaders import YoutubeLoader
 from azure_utils import AzureVoiceData
 from polly_utils import PollyVoiceData, NEURAL_ENGINE
 from contextlib import closing
 #os env
 os.environ["OPENAI_API_TYPE"] = "azure"
 os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
 os.environ["OPENAI_API_BASE"] = "https://civet-project-001.openai.azure.com/"
 os.environ["OPENAI_API_KEY"] = "0e3e5b666818488fa1b5cb4e4238ffa7"
 global_deployment_id = "CivetGPT"
 global_model_name = "gpt-35-turbo"
 #chroma settings
 chroma_api_impl = "HH_Azure_Openai"
-#root_file_path = "C:\\Users\\catsk\\SourceCode\\azure_openai_poc\\data\\"
 root_file_path = "./data/" #其實是data 存放的位置
 hr_source_path = "hr_source"
 ks_source_path = "ks_source"
 sqlite_name = "cache.sqlite3"
 sqlite_key="stored_files"
@@ -63,6 +78,7 @@ persist_db = "persist_db"
 hr_collection_name = "hr_db"
 chroma_db_impl="localdb+langchain"
 tmp_collection="tmp_collection"
 #global text setting
 inputText = "問題(按q 或Ctrl + c跳出): "
@@ -183,13 +199,13 @@ def get_prompt_summary_string():
 def get_prompt_template_string():
     today = datetime.date.today().strftime("%Y年%m月%d日")
-    template_string = f"我是鴻海的員工, 你是一個超級助理. 今天是{today}".format(today=today)+"""
-請根據歷史對話,針對這次的問題, 形成獨立問題並以中文作回答. 請優先從提供的文件中尋找答案, 你被允許回答不知道, 但回答不知道時需要給中央人資的客服聯絡窗口資訊.
 不論什麼問題, 都以中文回答
 歷史對話: {chat_history}
 這次的問題: {question}
-超級助理:
     """
     return template_string
@@ -238,23 +254,58 @@ def local_vector_search(question_str,chat_history, collection_name = hr_collecti
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, ai_prefix = "AI超級助理")
-    llm = AzureChatOpenAI(
-            deployment_name = global_deployment_id,
-            model_name= global_model_name,
-            temperature = 0.2)
     prompt = PromptTemplate(
         template=get_prompt_template_string(),
         input_variables=["question","chat_history"]
     )
     prompt.format(question=question_str,chat_history=chat_history)
-    chain = ConversationalRetrievalChain.from_llm(
-        llm=llm,
         retriever=vectorstore.as_retriever(),
         memory=memory,
         condense_question_prompt=prompt,
     )
-    result = chain({"question": question_str, "chat_history":chat_history})
     return result["answer"]
 def make_markdown_table(array):
@@ -308,7 +359,6 @@ def num_of_collection(collection_name):
     number = client.get_collection(collection_name).count()
     return f"目前知識卷裡有{number}卷項目"
 def clear_tmp_collection():
     client = get_chroma_client(tmp_collection)
     client.delete_collection(name=tmp_collection)
@@ -481,7 +531,6 @@ def kh_update_km(files):
     return num_of_collection(tmp_collection)
 class Logger:
     def __init__(self, filename):
         self.terminal = sys.stdout
@@ -498,7 +547,6 @@ class Logger:
     def isatty(self):
         return False
 def read_logs():
     sys.stdout.flush()
     ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
@@ -506,6 +554,33 @@ def read_logs():
     with open("output.log", "r", encoding='UTF-8') as f:
         return ansi_escape.sub('', f.read())
 def lunch_style(demo, logs=gr.Text()):
     sys.stdout = Logger("output.log")
@@ -530,7 +605,6 @@ def lunch_style(demo, logs=gr.Text()):
 def gradio_run():
     print("User Login")
     with gr.Blocks(theme='bethecloud/storj_theme') as demo:
         with gr.Row():
             gr.Markdown("# HH Azure Openai Demo")
         #Header section
@@ -636,18 +710,8 @@ def gradio_run():
                             tmp_aud_file_url = "/file=" + tmp_aud_file.value['name']
                             htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
                             audio_html = gr.HTML(htm_audio, visible=False)
-                            def respond(message, chat_history):
-                                vector_search_message = local_vector_search(message, chat_history)
-                                chat_history.append((message, vector_search_message))
-                                html_audio, audio_file_path = do_html_audio_speak(vector_search_message)
-                                res, new_html_video, video_file_path = do_html_video_speak()
-                                if res.status_code == 200:
-                                    return '', chat_history, new_html_video, ''
-                                else:
-                                    return '', chat_history, htm_video, html_audio
                         with gr.Column():
                             gr.Markdown("""
                             ### AI 虛擬客服:
                             * 這是一個實驗性質的AI 客服
@@ -655,6 +719,7 @@ def gradio_run():
                             * 想要放誰的頭像都可以, 要放董事長也可以.
                             * 訂閱制(有效時間 6/13~7/13)
                             """)
                     with gr.Row():
                         chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=400)
                     with gr.Row():
@@ -665,8 +730,26 @@ def gradio_run():
                             )
                         with gr.Column(scale=1):
                             clear = gr.Button("清除")
-                        msg.submit(respond, [msg, chatbot], [msg, chatbot, video_html, audio_html], queue=True)
-                        clear.click(lambda: None, None, chatbot, queue=False)
         #3rd youtube
         with gr.Tab("Youtube 影片摘要"):
             with gr.Row():
@@ -678,88 +761,48 @@ def gradio_run():
                 with gr.Column(scale=1):
                     youtube_summary_textbox=gr.Textbox(interactive=False, label="AI 解析", lines=20)
             youtube_analysis_btn.click(youtube_summary,youtube_link,youtube_summary_textbox)
-        with gr.Tab("高雄市政府票証"):
-            from langchain.agents import create_pandas_dataframe_agent
-            import pandas as pd
-            mypath = root_file_path+ks_source_path
-            onlyfiles = os.listdir(mypath)
-            df = pd.concat((pd.read_csv(os.path.join(mypath, filename)) for filename in onlyfiles))
             with gr.Row():
                 gr.Markdown("""
-                ### 使用方式
-                這是一個使用高雄公車票證資料, 運用AI協助決策的工具.
-                如果有出現error, 請重新刷新頁面. 有error 就代表運算到最後token 數量超出azure openai 上限了, 這部份還在想辦法調整中.
-                """)
                 invField = gr.Textbox(visible=False)
-                gr.Examples(onlyfiles, label="資料庫檔案", inputs=invField, examples_per_page=4)
             with gr.Row():
                 with gr.Column():
-                    davinci="text-davinci-003"
-                    llm = AzureOpenAI(
-                        deployment_name=davinci,
-                        model_name=davinci,
-                        max_tokens=2000,
-                        temperature=0,
-                    )
-                    agent=create_pandas_dataframe_agent(
-                        llm,
-                        df,
-                        max_iterations=30,
-                        return_intermediate_steps=False,
-                        verbose=True
-                    )
-                    def tmp_respond(prompt_str,message, chat_history):
-                        try:
-                            new_str=prompt_str.format(message=message, chat_history=chat_history)
-                            answer=agent.run(new_str)
-                            chat_history.append((message, answer))
-                        except Exception as e:
-                            response = str(e)
-                            print(f"Got error!{response}")
-                            if not response.startswith("Could not parse LLM output: `"):
-                                raise e
-                            answer = response.removeprefix("Could not parse LLM output: `").removesuffix("`")
-                            chat_history.append((message, answer))
-                        return '', chat_history
                     tmp_chatbot = gr.Chatbot(value=[], elem_id="tmp_chatbot").style(height=500)
                     with gr.Row():
                         with gr.Column(scale=5):
-                            tmp_msg = gr.Textbox(
-                                        show_label=False,
-                                        placeholder="輸入你的問題",
-                                    )
                         with gr.Column(scale=1):
-                                tmp_clear = gr.Button("清除對話")
                 with gr.Column():
-                    prompt_textbox=gr.Textbox("""
-你是一位專業的資料科學家,有下列定義:
-1.每個票卡序號代表一名乘客
-2.原始票價視為花費或是消費
-3.轉乘次數: 一名乘客在同一天有任意兩筆紀錄,其中一筆出下車站的資料等於另一筆進上車站的資料,其出下車站代表的車站的轉乘次數就要增加1.
-歷史訊息是 {chat_history}
 請以中文回答我下面的問題:{message}
-                    """, lines=10, label="Prompt:有{chat_history}及{message}, 請至少保留{message}變數", interactive=True, max_lines=10)
-                    console=gr.Textbox(lines=11, label="Console",max_lines=11)
-                tmp_msg.submit(tmp_respond, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot], queue=True)
                 tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False)
             with gr.Row():
                 gr.Examples([
-                    '你有哪些業者?',
-                    '0001站轉乘旅客所佔比例',
-                    '高雄捷運的2022年7月份運輸量與2022年6月份相比, 增減如何?',
-                    '請給我2022年6月至2022年7月之間, 轉乘數量最高排名前五名的車站?',
-                    '0001站 在2022年9月份轉乘數量是未知. 請依2022年7月份到2022年8月份的趨勢, 請以月份做為時間單位, 做出一個數學模型. 用此數學模型來預測 0001站 在2022年9月份的轉乘數量會多少, 增減如何?'
-                ], label="訊息範例",inputs=tmp_msg)
         demo.queue(concurrency_count=10)
         lunch_style(demo,console)
-def test():
-    mypath = "C:\\Users\\catsk\\SourceCode\\azure_openai_poc\\data\\ks_source_files"
-    onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
-    print(onlyfiles)
 gradio_run()

 import gradio as gr
+from langchain import PromptTemplate
+from langchain.agents import Tool
+from langchain.agents import initialize_agent
+from langchain.agents import AgentType
+from langchain.chains import LLMMathChain
+from langchain import SerpAPIWrapper
+from langchain.chains import ConversationalRetrievalChain
+from langchain.chains.summarize import load_summarize_chain
 from langchain.llms import AzureOpenAI
 from langchain.chat_models import AzureChatOpenAI
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.memory import ChatMessageHistory
+from langchain.memory import ConversationBufferMemory
 from langchain.vectorstores import Chroma
 from langchain.text_splitter import CharacterTextSplitter
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import DirectoryLoader
 from langchain.document_loaders import UnstructuredFileLoader
 import clickhouse_connect
 from pathlib import Path
 from azure_utils import AzureVoiceData
 from polly_utils import PollyVoiceData, NEURAL_ENGINE
 from contextlib import closing
+from langchain.agents import create_pandas_dataframe_agent
+import pandas as pd
 #os env
 os.environ["OPENAI_API_TYPE"] = "azure"
 os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
 os.environ["OPENAI_API_BASE"] = "https://civet-project-001.openai.azure.com/"
 os.environ["OPENAI_API_KEY"] = "0e3e5b666818488fa1b5cb4e4238ffa7"
+os.environ["SERPAPI_API_KEY"] = "a5b67b8805b4e12b0ae147c9c6b2a7dbf3ab84fca5f24e531b6963b1f7fc1ff7"
 global_deployment_id = "CivetGPT"
 global_model_name = "gpt-35-turbo"
 #chroma settings
 chroma_api_impl = "HH_Azure_Openai"
 root_file_path = "./data/" #其實是data 存放的位置
 hr_source_path = "hr_source"
 ks_source_path = "ks_source"
+believe_source_path = 'be_source'
 sqlite_name = "cache.sqlite3"
 sqlite_key="stored_files"
 hr_collection_name = "hr_db"
 chroma_db_impl="localdb+langchain"
 tmp_collection="tmp_collection"
+davinci = "text-davinci-003"
 #global text setting
 inputText = "問題(按q 或Ctrl + c跳出): "
 def get_prompt_template_string():
     today = datetime.date.today().strftime("%Y年%m月%d日")
+    template_string = f"我是鴻海(等同Foxconn)的員工, 你是一個鴻海的人資專家. 今天是{today}".format(today=today)+"""
+請根據歷史對話,針對這次的問題, 形成獨立問題. 請優先從提供的文件中尋找答案, 你被允許回答不知道, 但回答不知道時需要給中央人資的客服聯絡窗口資訊.
 不論什麼問題, 都以中文回答
 歷史對話: {chat_history}
 這次的問題: {question}
+人資專家:
     """
     return template_string
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, ai_prefix = "AI超級助理")
+    llm = AzureOpenAI(
+        deployment_name = global_deployment_id,
+        model_name= global_model_name,
+        temperature = 0.0)
+    chat_llm = AzureChatOpenAI(
+        deployment_name = global_deployment_id,
+        model_name= global_model_name,
+        temperature = 0.2)
     prompt = PromptTemplate(
         template=get_prompt_template_string(),
         input_variables=["question","chat_history"]
     )
     prompt.format(question=question_str,chat_history=chat_history)
+    km_chain = ConversationalRetrievalChain.from_llm(
+        llm=chat_llm,
         retriever=vectorstore.as_retriever(),
         memory=memory,
         condense_question_prompt=prompt,
     )
+    km_tool = Tool(
+        name='Knowledge Base',
+        func=km_chain.run,
+        description='一個非常有用的工具, 當要查詢任何公司政策以及鴻海相關資料都使用這個工具'
+    )
+    math_math = LLMMathChain(llm=llm,verbose=True)
+    math_tool = Tool(
+        name='Calculator',
+        func=math_math.run,
+        description='Useful for when you need to answer questions about math.'
+    )
+    search = SerpAPIWrapper()
+    search_tool = Tool(
+        name="Search",
+        func=search.run,
+        description="當你需要回答一般問題時,非常有用; 不可以用來回答任何跟鴻海有關的問題.",
+    )
+    tools=[math_tool,km_tool, search_tool]
+    agent=initialize_agent(
+        agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
+        tools=tools,
+        llm=chat_llm,
+        verbose=True,
+        memory=memory,
+        max_iterations=30,
+    )
+    result=km_chain(question_str)
+    #result=agent.run(question_str)
     return result["answer"]
 def make_markdown_table(array):
     number = client.get_collection(collection_name).count()
     return f"目前知識卷裡有{number}卷項目"
 def clear_tmp_collection():
     client = get_chroma_client(tmp_collection)
     client.delete_collection(name=tmp_collection)
     return num_of_collection(tmp_collection)
 class Logger:
     def __init__(self, filename):
         self.terminal = sys.stdout
     def isatty(self):
         return False
 def read_logs():
     sys.stdout.flush()
     ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
     with open("output.log", "r", encoding='UTF-8') as f:
         return ansi_escape.sub('', f.read())
+def pandas_analysis(prompt_str, message, chat_history):
+    dir_path = f"{root_file_path}{believe_source_path}/*.csv"
+    res = glob.glob(dir_path)
+    df = pd.concat(map(pd.read_csv, res))
+    local_deploy_id= "text-davinci-003"
+    local_model_name = "text-davinci-003"
+    llm = AzureOpenAI(
+        deployment_name=local_deploy_id,
+        model_name=local_model_name,
+        max_tokens=2000,
+        temperature=0,
+    )
+    be_agent = create_pandas_dataframe_agent(
+        llm,
+        df,
+        prefix="Remove any ` from the Action Input",
+        max_iterations=30,
+        return_intermediate_steps=False,
+        max_execution_time=60,
+        handle_parsing_errors="Check your output and make sure it conforms!",
+        verbose=True)
+    new_str = prompt_str.format(message=message, chat_history=chat_history)
+    print(new_str)
+    answer = be_agent.run(new_str)
+    chat_history.append((message, answer))
+    return '', chat_history
 def lunch_style(demo, logs=gr.Text()):
     sys.stdout = Logger("output.log")
 def gradio_run():
     print("User Login")
     with gr.Blocks(theme='bethecloud/storj_theme') as demo:
         with gr.Row():
             gr.Markdown("# HH Azure Openai Demo")
         #Header section
                             tmp_aud_file_url = "/file=" + tmp_aud_file.value['name']
                             htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
                             audio_html = gr.HTML(htm_audio, visible=False)
                         with gr.Column():
+                            isAudio = gr.Checkbox(label="是否要有語音", info="要開啟語音嗎?查詢時間會增長")
                             gr.Markdown("""
                             ### AI 虛擬客服:
                             * 這是一個實驗性質的AI 客服
                             * 想要放誰的頭像都可以, 要放董事長也可以.
                             * 訂閱制(有效時間 6/13~7/13)
                             """)
                     with gr.Row():
                         chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=400)
                     with gr.Row():
                             )
                         with gr.Column(scale=1):
                             clear = gr.Button("清除")
+                    def respond(message, chat_history):
+                        vector_search_message = local_vector_search(message, chat_history)
+                        chat_history.append((message, vector_search_message))
+                        print("vector_search:"+vector_search_message)
+                        if isAudio.value is False:
+                            print("isAudio is False")
+                            return '', chat_history, htm_video, ''
+                        else:
+                            print("isAudio is True")
+                            html_audio, audio_file_path = do_html_audio_speak(vector_search_message)
+                            res, new_html_video, video_file_path = do_html_video_speak()
+                            if res.status_code == 200:
+                                return '', chat_history, new_html_video, ''
+                            else:
+                                return '', chat_history, htm_video, html_audio
+                    msg.submit(respond, [msg, chatbot], [msg, chatbot, video_html, audio_html], queue=True)
+                    clear.click(lambda: None, None, chatbot, queue=False)
         #3rd youtube
         with gr.Tab("Youtube 影片摘要"):
             with gr.Row():
                 with gr.Column(scale=1):
                     youtube_summary_textbox=gr.Textbox(interactive=False, label="AI 解析", lines=20)
             youtube_analysis_btn.click(youtube_summary,youtube_link,youtube_summary_textbox)
+        with gr.Tab("統計助手"):
             with gr.Row():
                 gr.Markdown("""
+### 使用方式
+已經讀取所有提供的csv 資料, 可以詢問資料任何問題(Talk to data)
+建議先詢問欄位後, 後續再構思其他問題
+                        """)
                 invField = gr.Textbox(visible=False)
+                dir_path = f"{root_file_path}{believe_source_path}/*.csv"
+                res = glob.glob(dir_path)
+                gr.Examples(res, label="資料庫檔案", inputs=invField, examples_per_page=4)
             with gr.Row():
                 with gr.Column():
                     tmp_chatbot = gr.Chatbot(value=[], elem_id="tmp_chatbot").style(height=500)
                     with gr.Row():
                         with gr.Column(scale=5):
+                            tmp_msg = gr.Textbox(show_label=False,placeholder="輸入你的問題",)
                         with gr.Column(scale=1):
+                            tmp_clear = gr.Button("清��對話")
                 with gr.Column():
+                    prompt_textbox = gr.Textbox("""
+你是一位專業HR專家以及資料科學家,提供給你的資料是現有的人員資料表.
+有下列定義:
+1.
 請以中文回答我下面的問題:{message}
+                            """, lines=10, label="Prompt:有{chat_history}及{message}, 請至少保留{message}變數",interactive=True, max_lines=10)
+                    console = gr.Textbox(lines=11, label="Console", max_lines=11)
+                tmp_msg.submit(pandas_analysis, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot],queue=True)
                 tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False)
             with gr.Row():
                 gr.Examples([
+                    '你有什麼欄位?',
+                    '資料裡有屬於台灣(TW)的員工有多少位?',
+                    '全台灣的員工, 每人每天問五個問題, 1個月花費多少錢?',
+                    '如果龍華廠區的員工每人每天問3個問題,台灣員工每人每天問7個問題, 請問這樣一個月多少錢?'
+                    ], label="訊息範例", inputs=tmp_msg)
         demo.queue(concurrency_count=10)
         lunch_style(demo,console)
 gradio_run()

cache.sqlite3 CHANGED Viewed

Binary files a/cache.sqlite3 and b/cache.sqlite3 differ

data/audios/tempfile.mp3 CHANGED Viewed

Binary files a/data/audios/tempfile.mp3 and b/data/audios/tempfile.mp3 differ

data/videos/tempfile.mp4 CHANGED Viewed

Binary files a/data/videos/tempfile.mp4 and b/data/videos/tempfile.mp4 differ

requirements.txt CHANGED Viewed

@@ -3,13 +3,16 @@ aiohttp==3.8.4
 aiosignal==1.3.1
 altair==5.0.1
 anyio==3.7.0
-argilla==1.9.0
 argon2-cffi==21.3.0
 argon2-cffi-bindings==21.2.0
 arrow==1.2.3
 asttokens==2.2.1
 async-timeout==4.0.2
 attrs==23.1.0
 backcall==0.2.0
 backoff==2.2.1
 beautifulsoup4==4.12.2
@@ -50,6 +53,7 @@ fonttools==4.39.4
 fqdn==1.5.1
 frozenlist==1.3.3
 fsspec==2023.6.0
 gradio==3.34.0
 gradio_client==0.2.6
 greenlet==2.0.2
@@ -69,17 +73,21 @@ jedi==0.18.2
 Jinja2==3.1.2
 jmespath==1.0.1
 joblib==1.2.0
 jsonpointer==2.3
 jsonschema==4.17.3
 jupyter-events==0.6.3
 jupyter_client==8.2.0
 jupyter_core==5.3.1
 jupyter_server==2.6.0
 jupyter_server_terminals==0.4.4
 jupyterlab-pygments==0.2.2
 kiwisolver==1.4.4
-langchain==0.0.200
-langchainplus-sdk==0.0.10
 linkify-it-py==2.0.2
 lxml==4.9.2
 lz4==4.3.2
@@ -115,9 +123,10 @@ openpyxl==3.1.2
 orjson==3.9.1
 overrides==7.3.1
 packaging==23.1
-pandas==1.5.3
 pandocfilters==1.5.0
 parso==0.8.3
 pdf2image==1.16.3
 pdfminer.six==20221105
 pickleshare==0.7.5
@@ -148,6 +157,8 @@ python-multipart==0.0.6
 python-pptx==0.6.21
 pytube==15.0.0
 pytz==2023.3
 PyYAML==6.0
 pyzmq==25.1.0
 regex==2023.6.3
@@ -169,6 +180,7 @@ SQLAlchemy==2.0.16
 sqlitedict==2.1.0
 stack-data==0.6.2
 starlette==0.27.0
 sympy==1.12
 tabulate==0.9.0
 tenacity==8.2.2
@@ -177,6 +189,7 @@ threadpoolctl==3.1.0
 tiktoken==0.4.0
 tinycss2==1.2.1
 tokenizers==0.13.3
 toolz==0.12.0
 tornado==6.3.2
 tqdm==4.65.0
@@ -188,7 +201,7 @@ tzdata==2023.3
 uc-micro-py==1.0.2
 unstructured==0.7.3
 uri-template==1.2.0
-urllib3
 uvicorn==0.22.0
 virtualenv==20.23.0
 watchfiles==0.19.0

 aiosignal==1.3.1
 altair==5.0.1
 anyio==3.7.0
+argilla==1.10.0
 argon2-cffi==21.3.0
 argon2-cffi-bindings==21.2.0
 arrow==1.2.3
+astor==0.8.1
 asttokens==2.2.1
+async-lru==2.0.2
 async-timeout==4.0.2
 attrs==23.1.0
+Babel==2.12.1
 backcall==0.2.0
 backoff==2.2.1
 beautifulsoup4==4.12.2
 fqdn==1.5.1
 frozenlist==1.3.3
 fsspec==2023.6.0
+google-search-results==2.4.2
 gradio==3.34.0
 gradio_client==0.2.6
 greenlet==2.0.2
 Jinja2==3.1.2
 jmespath==1.0.1
 joblib==1.2.0
+json5==0.9.14
 jsonpointer==2.3
 jsonschema==4.17.3
 jupyter-events==0.6.3
+jupyter-lsp==2.2.0
 jupyter_client==8.2.0
 jupyter_core==5.3.1
 jupyter_server==2.6.0
 jupyter_server_terminals==0.4.4
+jupyterlab==4.0.2
 jupyterlab-pygments==0.2.2
+jupyterlab_server==2.23.0
 kiwisolver==1.4.4
+langchain==0.0.217
+langchainplus-sdk==0.0.17
 linkify-it-py==2.0.2
 lxml==4.9.2
 lz4==4.3.2
 orjson==3.9.1
 overrides==7.3.1
 packaging==23.1
+pandas==2.0.2
 pandocfilters==1.5.0
 parso==0.8.3
+patsy==0.5.3
 pdf2image==1.16.3
 pdfminer.six==20221105
 pickleshare==0.7.5
 python-pptx==0.6.21
 pytube==15.0.0
 pytz==2023.3
+pywin32==306
+pywinpty==2.0.10
 PyYAML==6.0
 pyzmq==25.1.0
 regex==2023.6.3
 sqlitedict==2.1.0
 stack-data==0.6.2
 starlette==0.27.0
+statsmodels==0.14.0
 sympy==1.12
 tabulate==0.9.0
 tenacity==8.2.2
 tiktoken==0.4.0
 tinycss2==1.2.1
 tokenizers==0.13.3
+tomli==2.0.1
 toolz==0.12.0
 tornado==6.3.2
 tqdm==4.65.0
 uc-micro-py==1.0.2
 unstructured==0.7.3
 uri-template==1.2.0
+urllib3==2.0.3
 uvicorn==0.22.0
 virtualenv==20.23.0
 watchfiles==0.19.0