Change Liao
commited on
Commit
·
65056d9
1
Parent(s):
25be1b2
update requriements.txt
Browse files- app.py +142 -99
- cache.sqlite3 +0 -0
- data/audios/tempfile.mp3 +0 -0
- data/videos/tempfile.mp4 +0 -0
- requirements.txt +18 -5
app.py
CHANGED
@@ -15,23 +15,34 @@ from sqlitedict import SqliteDict
|
|
15 |
|
16 |
import gradio as gr
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
from langchain.llms import AzureOpenAI
|
19 |
from langchain.chat_models import AzureChatOpenAI
|
20 |
|
21 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
22 |
-
from langchain.chains import ConversationalRetrievalChain
|
23 |
|
24 |
from langchain.memory import ChatMessageHistory
|
25 |
-
from langchain import
|
|
|
26 |
from langchain.vectorstores import Chroma
|
27 |
|
28 |
from langchain.text_splitter import CharacterTextSplitter
|
29 |
-
from langchain.
|
30 |
-
from langchain.document_loaders import DirectoryLoader
|
31 |
|
|
|
32 |
from langchain.document_loaders import UnstructuredFileLoader
|
33 |
-
|
34 |
-
from langchain.chains.summarize import load_summarize_chain
|
35 |
|
36 |
import clickhouse_connect
|
37 |
from pathlib import Path
|
@@ -41,21 +52,25 @@ from langchain.document_loaders import YoutubeLoader
|
|
41 |
from azure_utils import AzureVoiceData
|
42 |
from polly_utils import PollyVoiceData, NEURAL_ENGINE
|
43 |
from contextlib import closing
|
|
|
|
|
44 |
|
45 |
#os env
|
46 |
os.environ["OPENAI_API_TYPE"] = "azure"
|
47 |
os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
|
48 |
os.environ["OPENAI_API_BASE"] = "https://civet-project-001.openai.azure.com/"
|
49 |
os.environ["OPENAI_API_KEY"] = "0e3e5b666818488fa1b5cb4e4238ffa7"
|
|
|
|
|
50 |
global_deployment_id = "CivetGPT"
|
51 |
global_model_name = "gpt-35-turbo"
|
52 |
|
53 |
#chroma settings
|
54 |
chroma_api_impl = "HH_Azure_Openai"
|
55 |
-
#root_file_path = "C:\\Users\\catsk\\SourceCode\\azure_openai_poc\\data\\"
|
56 |
root_file_path = "./data/" #其實是data 存放的位置
|
57 |
hr_source_path = "hr_source"
|
58 |
ks_source_path = "ks_source"
|
|
|
59 |
|
60 |
sqlite_name = "cache.sqlite3"
|
61 |
sqlite_key="stored_files"
|
@@ -63,6 +78,7 @@ persist_db = "persist_db"
|
|
63 |
hr_collection_name = "hr_db"
|
64 |
chroma_db_impl="localdb+langchain"
|
65 |
tmp_collection="tmp_collection"
|
|
|
66 |
|
67 |
#global text setting
|
68 |
inputText = "問題(按q 或Ctrl + c跳出): "
|
@@ -183,13 +199,13 @@ def get_prompt_summary_string():
|
|
183 |
|
184 |
def get_prompt_template_string():
|
185 |
today = datetime.date.today().strftime("%Y年%m月%d日")
|
186 |
-
template_string = f"
|
187 |
-
請根據歷史對話,針對這次的問題,
|
188 |
不論什麼問題, 都以中文回答
|
189 |
|
190 |
歷史對話: {chat_history}
|
191 |
這次的問題: {question}
|
192 |
-
|
193 |
"""
|
194 |
return template_string
|
195 |
|
@@ -238,23 +254,58 @@ def local_vector_search(question_str,chat_history, collection_name = hr_collecti
|
|
238 |
|
239 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, ai_prefix = "AI超級助理")
|
240 |
|
241 |
-
llm =
|
242 |
-
|
243 |
-
|
244 |
-
|
|
|
|
|
|
|
|
|
|
|
245 |
|
246 |
prompt = PromptTemplate(
|
247 |
template=get_prompt_template_string(),
|
248 |
input_variables=["question","chat_history"]
|
249 |
)
|
250 |
prompt.format(question=question_str,chat_history=chat_history)
|
251 |
-
|
252 |
-
llm=
|
253 |
retriever=vectorstore.as_retriever(),
|
254 |
memory=memory,
|
255 |
condense_question_prompt=prompt,
|
256 |
)
|
257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
return result["answer"]
|
259 |
|
260 |
def make_markdown_table(array):
|
@@ -308,7 +359,6 @@ def num_of_collection(collection_name):
|
|
308 |
number = client.get_collection(collection_name).count()
|
309 |
return f"目前知識卷裡有{number}卷項目"
|
310 |
|
311 |
-
|
312 |
def clear_tmp_collection():
|
313 |
client = get_chroma_client(tmp_collection)
|
314 |
client.delete_collection(name=tmp_collection)
|
@@ -481,7 +531,6 @@ def kh_update_km(files):
|
|
481 |
|
482 |
return num_of_collection(tmp_collection)
|
483 |
|
484 |
-
|
485 |
class Logger:
|
486 |
def __init__(self, filename):
|
487 |
self.terminal = sys.stdout
|
@@ -498,7 +547,6 @@ class Logger:
|
|
498 |
def isatty(self):
|
499 |
return False
|
500 |
|
501 |
-
|
502 |
def read_logs():
|
503 |
sys.stdout.flush()
|
504 |
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
|
@@ -506,6 +554,33 @@ def read_logs():
|
|
506 |
with open("output.log", "r", encoding='UTF-8') as f:
|
507 |
return ansi_escape.sub('', f.read())
|
508 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
509 |
|
510 |
def lunch_style(demo, logs=gr.Text()):
|
511 |
sys.stdout = Logger("output.log")
|
@@ -530,7 +605,6 @@ def lunch_style(demo, logs=gr.Text()):
|
|
530 |
def gradio_run():
|
531 |
print("User Login")
|
532 |
with gr.Blocks(theme='bethecloud/storj_theme') as demo:
|
533 |
-
|
534 |
with gr.Row():
|
535 |
gr.Markdown("# HH Azure Openai Demo")
|
536 |
#Header section
|
@@ -636,18 +710,8 @@ def gradio_run():
|
|
636 |
tmp_aud_file_url = "/file=" + tmp_aud_file.value['name']
|
637 |
htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
|
638 |
audio_html = gr.HTML(htm_audio, visible=False)
|
639 |
-
def respond(message, chat_history):
|
640 |
-
vector_search_message = local_vector_search(message, chat_history)
|
641 |
-
chat_history.append((message, vector_search_message))
|
642 |
-
|
643 |
-
html_audio, audio_file_path = do_html_audio_speak(vector_search_message)
|
644 |
-
res, new_html_video, video_file_path = do_html_video_speak()
|
645 |
-
|
646 |
-
if res.status_code == 200:
|
647 |
-
return '', chat_history, new_html_video, ''
|
648 |
-
else:
|
649 |
-
return '', chat_history, htm_video, html_audio
|
650 |
with gr.Column():
|
|
|
651 |
gr.Markdown("""
|
652 |
### AI 虛擬客服:
|
653 |
* 這是一個實驗性質的AI 客服
|
@@ -655,6 +719,7 @@ def gradio_run():
|
|
655 |
* 想要放誰的頭像都可以, 要放董事長也可以.
|
656 |
* 訂閱制(有效時間 6/13~7/13)
|
657 |
""")
|
|
|
658 |
with gr.Row():
|
659 |
chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=400)
|
660 |
with gr.Row():
|
@@ -665,8 +730,26 @@ def gradio_run():
|
|
665 |
)
|
666 |
with gr.Column(scale=1):
|
667 |
clear = gr.Button("清除")
|
668 |
-
|
669 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
670 |
#3rd youtube
|
671 |
with gr.Tab("Youtube 影片摘要"):
|
672 |
with gr.Row():
|
@@ -678,88 +761,48 @@ def gradio_run():
|
|
678 |
with gr.Column(scale=1):
|
679 |
youtube_summary_textbox=gr.Textbox(interactive=False, label="AI 解析", lines=20)
|
680 |
youtube_analysis_btn.click(youtube_summary,youtube_link,youtube_summary_textbox)
|
681 |
-
with gr.Tab("
|
682 |
-
from langchain.agents import create_pandas_dataframe_agent
|
683 |
-
import pandas as pd
|
684 |
-
mypath = root_file_path+ks_source_path
|
685 |
-
onlyfiles = os.listdir(mypath)
|
686 |
-
df = pd.concat((pd.read_csv(os.path.join(mypath, filename)) for filename in onlyfiles))
|
687 |
with gr.Row():
|
688 |
gr.Markdown("""
|
689 |
-
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
invField = gr.Textbox(visible=False)
|
694 |
-
|
|
|
|
|
695 |
with gr.Row():
|
696 |
with gr.Column():
|
697 |
-
davinci="text-davinci-003"
|
698 |
-
llm = AzureOpenAI(
|
699 |
-
deployment_name=davinci,
|
700 |
-
model_name=davinci,
|
701 |
-
max_tokens=2000,
|
702 |
-
temperature=0,
|
703 |
-
)
|
704 |
-
|
705 |
-
agent=create_pandas_dataframe_agent(
|
706 |
-
llm,
|
707 |
-
df,
|
708 |
-
max_iterations=30,
|
709 |
-
return_intermediate_steps=False,
|
710 |
-
verbose=True
|
711 |
-
)
|
712 |
-
|
713 |
-
def tmp_respond(prompt_str,message, chat_history):
|
714 |
-
try:
|
715 |
-
new_str=prompt_str.format(message=message, chat_history=chat_history)
|
716 |
-
answer=agent.run(new_str)
|
717 |
-
chat_history.append((message, answer))
|
718 |
-
except Exception as e:
|
719 |
-
response = str(e)
|
720 |
-
print(f"Got error!{response}")
|
721 |
-
if not response.startswith("Could not parse LLM output: `"):
|
722 |
-
raise e
|
723 |
-
answer = response.removeprefix("Could not parse LLM output: `").removesuffix("`")
|
724 |
-
chat_history.append((message, answer))
|
725 |
-
return '', chat_history
|
726 |
-
|
727 |
tmp_chatbot = gr.Chatbot(value=[], elem_id="tmp_chatbot").style(height=500)
|
728 |
with gr.Row():
|
729 |
with gr.Column(scale=5):
|
730 |
-
tmp_msg = gr.Textbox(
|
731 |
-
show_label=False,
|
732 |
-
placeholder="輸入你的問題",
|
733 |
-
)
|
734 |
with gr.Column(scale=1):
|
735 |
-
|
736 |
with gr.Column():
|
737 |
-
prompt_textbox=gr.Textbox("""
|
738 |
-
|
739 |
-
|
740 |
-
|
741 |
-
|
742 |
-
|
743 |
請以中文回答我下面的問題:{message}
|
744 |
-
|
745 |
-
console=gr.Textbox(lines=11, label="Console",max_lines=11)
|
746 |
-
tmp_msg.submit(
|
747 |
tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False)
|
748 |
with gr.Row():
|
749 |
gr.Examples([
|
750 |
-
'
|
751 |
-
'
|
752 |
-
'
|
753 |
-
'
|
754 |
-
|
755 |
-
|
756 |
demo.queue(concurrency_count=10)
|
757 |
lunch_style(demo,console)
|
758 |
|
759 |
-
def test():
|
760 |
-
mypath = "C:\\Users\\catsk\\SourceCode\\azure_openai_poc\\data\\ks_source_files"
|
761 |
-
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
|
762 |
-
print(onlyfiles)
|
763 |
gradio_run()
|
764 |
|
765 |
|
|
|
15 |
|
16 |
import gradio as gr
|
17 |
|
18 |
+
from langchain import PromptTemplate
|
19 |
+
from langchain.agents import Tool
|
20 |
+
from langchain.agents import initialize_agent
|
21 |
+
|
22 |
+
from langchain.agents import AgentType
|
23 |
+
|
24 |
+
from langchain.chains import LLMMathChain
|
25 |
+
from langchain import SerpAPIWrapper
|
26 |
+
from langchain.chains import ConversationalRetrievalChain
|
27 |
+
|
28 |
+
from langchain.chains.summarize import load_summarize_chain
|
29 |
+
|
30 |
from langchain.llms import AzureOpenAI
|
31 |
from langchain.chat_models import AzureChatOpenAI
|
32 |
|
33 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
|
|
34 |
|
35 |
from langchain.memory import ChatMessageHistory
|
36 |
+
from langchain.memory import ConversationBufferMemory
|
37 |
+
|
38 |
from langchain.vectorstores import Chroma
|
39 |
|
40 |
from langchain.text_splitter import CharacterTextSplitter
|
41 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
42 |
|
43 |
+
from langchain.document_loaders import DirectoryLoader
|
44 |
from langchain.document_loaders import UnstructuredFileLoader
|
45 |
+
|
|
|
46 |
|
47 |
import clickhouse_connect
|
48 |
from pathlib import Path
|
|
|
52 |
from azure_utils import AzureVoiceData
|
53 |
from polly_utils import PollyVoiceData, NEURAL_ENGINE
|
54 |
from contextlib import closing
|
55 |
+
from langchain.agents import create_pandas_dataframe_agent
|
56 |
+
import pandas as pd
|
57 |
|
58 |
#os env
|
59 |
os.environ["OPENAI_API_TYPE"] = "azure"
|
60 |
os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
|
61 |
os.environ["OPENAI_API_BASE"] = "https://civet-project-001.openai.azure.com/"
|
62 |
os.environ["OPENAI_API_KEY"] = "0e3e5b666818488fa1b5cb4e4238ffa7"
|
63 |
+
os.environ["SERPAPI_API_KEY"] = "a5b67b8805b4e12b0ae147c9c6b2a7dbf3ab84fca5f24e531b6963b1f7fc1ff7"
|
64 |
+
|
65 |
global_deployment_id = "CivetGPT"
|
66 |
global_model_name = "gpt-35-turbo"
|
67 |
|
68 |
#chroma settings
|
69 |
chroma_api_impl = "HH_Azure_Openai"
|
|
|
70 |
root_file_path = "./data/" #其實是data 存放的位置
|
71 |
hr_source_path = "hr_source"
|
72 |
ks_source_path = "ks_source"
|
73 |
+
believe_source_path = 'be_source'
|
74 |
|
75 |
sqlite_name = "cache.sqlite3"
|
76 |
sqlite_key="stored_files"
|
|
|
78 |
hr_collection_name = "hr_db"
|
79 |
chroma_db_impl="localdb+langchain"
|
80 |
tmp_collection="tmp_collection"
|
81 |
+
davinci = "text-davinci-003"
|
82 |
|
83 |
#global text setting
|
84 |
inputText = "問題(按q 或Ctrl + c跳出): "
|
|
|
199 |
|
200 |
def get_prompt_template_string():
|
201 |
today = datetime.date.today().strftime("%Y年%m月%d日")
|
202 |
+
template_string = f"我是鴻海(等同Foxconn)的員工, 你是一個鴻海的人資專家. 今天是{today}".format(today=today)+"""
|
203 |
+
請根據歷史對話,針對這次的問題, 形成獨立問題. 請優先從提供的文件中尋找答案, 你被允許回答不知道, 但回答不知道時需要給中央人資的客服聯絡窗口資訊.
|
204 |
不論什麼問題, 都以中文回答
|
205 |
|
206 |
歷史對話: {chat_history}
|
207 |
這次的問題: {question}
|
208 |
+
人資專家:
|
209 |
"""
|
210 |
return template_string
|
211 |
|
|
|
254 |
|
255 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, ai_prefix = "AI超級助理")
|
256 |
|
257 |
+
llm = AzureOpenAI(
|
258 |
+
deployment_name = global_deployment_id,
|
259 |
+
model_name= global_model_name,
|
260 |
+
temperature = 0.0)
|
261 |
+
|
262 |
+
chat_llm = AzureChatOpenAI(
|
263 |
+
deployment_name = global_deployment_id,
|
264 |
+
model_name= global_model_name,
|
265 |
+
temperature = 0.2)
|
266 |
|
267 |
prompt = PromptTemplate(
|
268 |
template=get_prompt_template_string(),
|
269 |
input_variables=["question","chat_history"]
|
270 |
)
|
271 |
prompt.format(question=question_str,chat_history=chat_history)
|
272 |
+
km_chain = ConversationalRetrievalChain.from_llm(
|
273 |
+
llm=chat_llm,
|
274 |
retriever=vectorstore.as_retriever(),
|
275 |
memory=memory,
|
276 |
condense_question_prompt=prompt,
|
277 |
)
|
278 |
+
km_tool = Tool(
|
279 |
+
name='Knowledge Base',
|
280 |
+
func=km_chain.run,
|
281 |
+
description='一個非常有用的工具, 當要查詢任何公司政策以及鴻海相關資料都使用這個工具'
|
282 |
+
)
|
283 |
+
|
284 |
+
math_math = LLMMathChain(llm=llm,verbose=True)
|
285 |
+
math_tool = Tool(
|
286 |
+
name='Calculator',
|
287 |
+
func=math_math.run,
|
288 |
+
description='Useful for when you need to answer questions about math.'
|
289 |
+
)
|
290 |
+
|
291 |
+
search = SerpAPIWrapper()
|
292 |
+
search_tool = Tool(
|
293 |
+
name="Search",
|
294 |
+
func=search.run,
|
295 |
+
description="當你需要回答一般問題時,非常有用; 不可以用來回答任何跟鴻海有關的問題.",
|
296 |
+
)
|
297 |
+
tools=[math_tool,km_tool, search_tool]
|
298 |
+
agent=initialize_agent(
|
299 |
+
agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
|
300 |
+
tools=tools,
|
301 |
+
llm=chat_llm,
|
302 |
+
verbose=True,
|
303 |
+
memory=memory,
|
304 |
+
max_iterations=30,
|
305 |
+
)
|
306 |
+
result=km_chain(question_str)
|
307 |
+
|
308 |
+
#result=agent.run(question_str)
|
309 |
return result["answer"]
|
310 |
|
311 |
def make_markdown_table(array):
|
|
|
359 |
number = client.get_collection(collection_name).count()
|
360 |
return f"目前知識卷裡有{number}卷項目"
|
361 |
|
|
|
362 |
def clear_tmp_collection():
|
363 |
client = get_chroma_client(tmp_collection)
|
364 |
client.delete_collection(name=tmp_collection)
|
|
|
531 |
|
532 |
return num_of_collection(tmp_collection)
|
533 |
|
|
|
534 |
class Logger:
|
535 |
def __init__(self, filename):
|
536 |
self.terminal = sys.stdout
|
|
|
547 |
def isatty(self):
|
548 |
return False
|
549 |
|
|
|
550 |
def read_logs():
|
551 |
sys.stdout.flush()
|
552 |
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
|
|
|
554 |
with open("output.log", "r", encoding='UTF-8') as f:
|
555 |
return ansi_escape.sub('', f.read())
|
556 |
|
557 |
+
def pandas_analysis(prompt_str, message, chat_history):
|
558 |
+
dir_path = f"{root_file_path}{believe_source_path}/*.csv"
|
559 |
+
res = glob.glob(dir_path)
|
560 |
+
df = pd.concat(map(pd.read_csv, res))
|
561 |
+
local_deploy_id= "text-davinci-003"
|
562 |
+
local_model_name = "text-davinci-003"
|
563 |
+
llm = AzureOpenAI(
|
564 |
+
deployment_name=local_deploy_id,
|
565 |
+
model_name=local_model_name,
|
566 |
+
max_tokens=2000,
|
567 |
+
temperature=0,
|
568 |
+
)
|
569 |
+
|
570 |
+
be_agent = create_pandas_dataframe_agent(
|
571 |
+
llm,
|
572 |
+
df,
|
573 |
+
prefix="Remove any ` from the Action Input",
|
574 |
+
max_iterations=30,
|
575 |
+
return_intermediate_steps=False,
|
576 |
+
max_execution_time=60,
|
577 |
+
handle_parsing_errors="Check your output and make sure it conforms!",
|
578 |
+
verbose=True)
|
579 |
+
new_str = prompt_str.format(message=message, chat_history=chat_history)
|
580 |
+
print(new_str)
|
581 |
+
answer = be_agent.run(new_str)
|
582 |
+
chat_history.append((message, answer))
|
583 |
+
return '', chat_history
|
584 |
|
585 |
def lunch_style(demo, logs=gr.Text()):
|
586 |
sys.stdout = Logger("output.log")
|
|
|
605 |
def gradio_run():
|
606 |
print("User Login")
|
607 |
with gr.Blocks(theme='bethecloud/storj_theme') as demo:
|
|
|
608 |
with gr.Row():
|
609 |
gr.Markdown("# HH Azure Openai Demo")
|
610 |
#Header section
|
|
|
710 |
tmp_aud_file_url = "/file=" + tmp_aud_file.value['name']
|
711 |
htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
|
712 |
audio_html = gr.HTML(htm_audio, visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
713 |
with gr.Column():
|
714 |
+
isAudio = gr.Checkbox(label="是否要有語音", info="要開啟語音嗎?查詢時間會增長")
|
715 |
gr.Markdown("""
|
716 |
### AI 虛擬客服:
|
717 |
* 這是一個實驗性質的AI 客服
|
|
|
719 |
* 想要放誰的頭像都可以, 要放董事長也可以.
|
720 |
* 訂閱制(有效時間 6/13~7/13)
|
721 |
""")
|
722 |
+
|
723 |
with gr.Row():
|
724 |
chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=400)
|
725 |
with gr.Row():
|
|
|
730 |
)
|
731 |
with gr.Column(scale=1):
|
732 |
clear = gr.Button("清除")
|
733 |
+
|
734 |
+
def respond(message, chat_history):
|
735 |
+
vector_search_message = local_vector_search(message, chat_history)
|
736 |
+
chat_history.append((message, vector_search_message))
|
737 |
+
print("vector_search:"+vector_search_message)
|
738 |
+
if isAudio.value is False:
|
739 |
+
print("isAudio is False")
|
740 |
+
return '', chat_history, htm_video, ''
|
741 |
+
else:
|
742 |
+
print("isAudio is True")
|
743 |
+
html_audio, audio_file_path = do_html_audio_speak(vector_search_message)
|
744 |
+
res, new_html_video, video_file_path = do_html_video_speak()
|
745 |
+
|
746 |
+
if res.status_code == 200:
|
747 |
+
return '', chat_history, new_html_video, ''
|
748 |
+
else:
|
749 |
+
return '', chat_history, htm_video, html_audio
|
750 |
+
|
751 |
+
msg.submit(respond, [msg, chatbot], [msg, chatbot, video_html, audio_html], queue=True)
|
752 |
+
clear.click(lambda: None, None, chatbot, queue=False)
|
753 |
#3rd youtube
|
754 |
with gr.Tab("Youtube 影片摘要"):
|
755 |
with gr.Row():
|
|
|
761 |
with gr.Column(scale=1):
|
762 |
youtube_summary_textbox=gr.Textbox(interactive=False, label="AI 解析", lines=20)
|
763 |
youtube_analysis_btn.click(youtube_summary,youtube_link,youtube_summary_textbox)
|
764 |
+
with gr.Tab("統計助手"):
|
|
|
|
|
|
|
|
|
|
|
765 |
with gr.Row():
|
766 |
gr.Markdown("""
|
767 |
+
### 使用方式
|
768 |
+
已經讀取所有提供的csv 資料, 可以詢問資料任何問題(Talk to data)
|
769 |
+
建議先詢問欄位後, 後續再構思其他問題
|
770 |
+
""")
|
771 |
invField = gr.Textbox(visible=False)
|
772 |
+
dir_path = f"{root_file_path}{believe_source_path}/*.csv"
|
773 |
+
res = glob.glob(dir_path)
|
774 |
+
gr.Examples(res, label="資料庫檔案", inputs=invField, examples_per_page=4)
|
775 |
with gr.Row():
|
776 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
777 |
tmp_chatbot = gr.Chatbot(value=[], elem_id="tmp_chatbot").style(height=500)
|
778 |
with gr.Row():
|
779 |
with gr.Column(scale=5):
|
780 |
+
tmp_msg = gr.Textbox(show_label=False,placeholder="輸入你的問題",)
|
|
|
|
|
|
|
781 |
with gr.Column(scale=1):
|
782 |
+
tmp_clear = gr.Button("清��對話")
|
783 |
with gr.Column():
|
784 |
+
prompt_textbox = gr.Textbox("""
|
785 |
+
你是一位專業HR專家以及資料科學家,提供給你的資料是現有的人員資料表.
|
786 |
+
有下列定義:
|
787 |
+
|
788 |
+
1.
|
789 |
+
|
790 |
請以中文回答我下面的問題:{message}
|
791 |
+
""", lines=10, label="Prompt:有{chat_history}及{message}, 請至少保留{message}變數",interactive=True, max_lines=10)
|
792 |
+
console = gr.Textbox(lines=11, label="Console", max_lines=11)
|
793 |
+
tmp_msg.submit(pandas_analysis, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot],queue=True)
|
794 |
tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False)
|
795 |
with gr.Row():
|
796 |
gr.Examples([
|
797 |
+
'你有什麼欄位?',
|
798 |
+
'資料裡有屬於台灣(TW)的員工有多少位?',
|
799 |
+
'全台灣的員工, 每人每天問五個問題, 1個月花費多少錢?',
|
800 |
+
'如果龍華廠區的員工每人每天問3個問題,台灣員工每人每天問7個問題, 請問這樣一個月多少錢?'
|
801 |
+
], label="訊息範例", inputs=tmp_msg)
|
802 |
+
|
803 |
demo.queue(concurrency_count=10)
|
804 |
lunch_style(demo,console)
|
805 |
|
|
|
|
|
|
|
|
|
806 |
gradio_run()
|
807 |
|
808 |
|
cache.sqlite3
CHANGED
Binary files a/cache.sqlite3 and b/cache.sqlite3 differ
|
|
data/audios/tempfile.mp3
CHANGED
Binary files a/data/audios/tempfile.mp3 and b/data/audios/tempfile.mp3 differ
|
|
data/videos/tempfile.mp4
CHANGED
Binary files a/data/videos/tempfile.mp4 and b/data/videos/tempfile.mp4 differ
|
|
requirements.txt
CHANGED
@@ -3,13 +3,16 @@ aiohttp==3.8.4
|
|
3 |
aiosignal==1.3.1
|
4 |
altair==5.0.1
|
5 |
anyio==3.7.0
|
6 |
-
argilla==1.
|
7 |
argon2-cffi==21.3.0
|
8 |
argon2-cffi-bindings==21.2.0
|
9 |
arrow==1.2.3
|
|
|
10 |
asttokens==2.2.1
|
|
|
11 |
async-timeout==4.0.2
|
12 |
attrs==23.1.0
|
|
|
13 |
backcall==0.2.0
|
14 |
backoff==2.2.1
|
15 |
beautifulsoup4==4.12.2
|
@@ -50,6 +53,7 @@ fonttools==4.39.4
|
|
50 |
fqdn==1.5.1
|
51 |
frozenlist==1.3.3
|
52 |
fsspec==2023.6.0
|
|
|
53 |
gradio==3.34.0
|
54 |
gradio_client==0.2.6
|
55 |
greenlet==2.0.2
|
@@ -69,17 +73,21 @@ jedi==0.18.2
|
|
69 |
Jinja2==3.1.2
|
70 |
jmespath==1.0.1
|
71 |
joblib==1.2.0
|
|
|
72 |
jsonpointer==2.3
|
73 |
jsonschema==4.17.3
|
74 |
jupyter-events==0.6.3
|
|
|
75 |
jupyter_client==8.2.0
|
76 |
jupyter_core==5.3.1
|
77 |
jupyter_server==2.6.0
|
78 |
jupyter_server_terminals==0.4.4
|
|
|
79 |
jupyterlab-pygments==0.2.2
|
|
|
80 |
kiwisolver==1.4.4
|
81 |
-
langchain==0.0.
|
82 |
-
langchainplus-sdk==0.0.
|
83 |
linkify-it-py==2.0.2
|
84 |
lxml==4.9.2
|
85 |
lz4==4.3.2
|
@@ -115,9 +123,10 @@ openpyxl==3.1.2
|
|
115 |
orjson==3.9.1
|
116 |
overrides==7.3.1
|
117 |
packaging==23.1
|
118 |
-
pandas==
|
119 |
pandocfilters==1.5.0
|
120 |
parso==0.8.3
|
|
|
121 |
pdf2image==1.16.3
|
122 |
pdfminer.six==20221105
|
123 |
pickleshare==0.7.5
|
@@ -148,6 +157,8 @@ python-multipart==0.0.6
|
|
148 |
python-pptx==0.6.21
|
149 |
pytube==15.0.0
|
150 |
pytz==2023.3
|
|
|
|
|
151 |
PyYAML==6.0
|
152 |
pyzmq==25.1.0
|
153 |
regex==2023.6.3
|
@@ -169,6 +180,7 @@ SQLAlchemy==2.0.16
|
|
169 |
sqlitedict==2.1.0
|
170 |
stack-data==0.6.2
|
171 |
starlette==0.27.0
|
|
|
172 |
sympy==1.12
|
173 |
tabulate==0.9.0
|
174 |
tenacity==8.2.2
|
@@ -177,6 +189,7 @@ threadpoolctl==3.1.0
|
|
177 |
tiktoken==0.4.0
|
178 |
tinycss2==1.2.1
|
179 |
tokenizers==0.13.3
|
|
|
180 |
toolz==0.12.0
|
181 |
tornado==6.3.2
|
182 |
tqdm==4.65.0
|
@@ -188,7 +201,7 @@ tzdata==2023.3
|
|
188 |
uc-micro-py==1.0.2
|
189 |
unstructured==0.7.3
|
190 |
uri-template==1.2.0
|
191 |
-
urllib3
|
192 |
uvicorn==0.22.0
|
193 |
virtualenv==20.23.0
|
194 |
watchfiles==0.19.0
|
|
|
3 |
aiosignal==1.3.1
|
4 |
altair==5.0.1
|
5 |
anyio==3.7.0
|
6 |
+
argilla==1.10.0
|
7 |
argon2-cffi==21.3.0
|
8 |
argon2-cffi-bindings==21.2.0
|
9 |
arrow==1.2.3
|
10 |
+
astor==0.8.1
|
11 |
asttokens==2.2.1
|
12 |
+
async-lru==2.0.2
|
13 |
async-timeout==4.0.2
|
14 |
attrs==23.1.0
|
15 |
+
Babel==2.12.1
|
16 |
backcall==0.2.0
|
17 |
backoff==2.2.1
|
18 |
beautifulsoup4==4.12.2
|
|
|
53 |
fqdn==1.5.1
|
54 |
frozenlist==1.3.3
|
55 |
fsspec==2023.6.0
|
56 |
+
google-search-results==2.4.2
|
57 |
gradio==3.34.0
|
58 |
gradio_client==0.2.6
|
59 |
greenlet==2.0.2
|
|
|
73 |
Jinja2==3.1.2
|
74 |
jmespath==1.0.1
|
75 |
joblib==1.2.0
|
76 |
+
json5==0.9.14
|
77 |
jsonpointer==2.3
|
78 |
jsonschema==4.17.3
|
79 |
jupyter-events==0.6.3
|
80 |
+
jupyter-lsp==2.2.0
|
81 |
jupyter_client==8.2.0
|
82 |
jupyter_core==5.3.1
|
83 |
jupyter_server==2.6.0
|
84 |
jupyter_server_terminals==0.4.4
|
85 |
+
jupyterlab==4.0.2
|
86 |
jupyterlab-pygments==0.2.2
|
87 |
+
jupyterlab_server==2.23.0
|
88 |
kiwisolver==1.4.4
|
89 |
+
langchain==0.0.217
|
90 |
+
langchainplus-sdk==0.0.17
|
91 |
linkify-it-py==2.0.2
|
92 |
lxml==4.9.2
|
93 |
lz4==4.3.2
|
|
|
123 |
orjson==3.9.1
|
124 |
overrides==7.3.1
|
125 |
packaging==23.1
|
126 |
+
pandas==2.0.2
|
127 |
pandocfilters==1.5.0
|
128 |
parso==0.8.3
|
129 |
+
patsy==0.5.3
|
130 |
pdf2image==1.16.3
|
131 |
pdfminer.six==20221105
|
132 |
pickleshare==0.7.5
|
|
|
157 |
python-pptx==0.6.21
|
158 |
pytube==15.0.0
|
159 |
pytz==2023.3
|
160 |
+
pywin32==306
|
161 |
+
pywinpty==2.0.10
|
162 |
PyYAML==6.0
|
163 |
pyzmq==25.1.0
|
164 |
regex==2023.6.3
|
|
|
180 |
sqlitedict==2.1.0
|
181 |
stack-data==0.6.2
|
182 |
starlette==0.27.0
|
183 |
+
statsmodels==0.14.0
|
184 |
sympy==1.12
|
185 |
tabulate==0.9.0
|
186 |
tenacity==8.2.2
|
|
|
189 |
tiktoken==0.4.0
|
190 |
tinycss2==1.2.1
|
191 |
tokenizers==0.13.3
|
192 |
+
tomli==2.0.1
|
193 |
toolz==0.12.0
|
194 |
tornado==6.3.2
|
195 |
tqdm==4.65.0
|
|
|
201 |
uc-micro-py==1.0.2
|
202 |
unstructured==0.7.3
|
203 |
uri-template==1.2.0
|
204 |
+
urllib3==2.0.3
|
205 |
uvicorn==0.22.0
|
206 |
virtualenv==20.23.0
|
207 |
watchfiles==0.19.0
|