Spaces:

HH-AI-Org
/

HH-azure-openai-poc

Paused

Change Liao

Merge remote-tracking branch 'origin/main'

2ea9f6d almost 2 years ago

31.5 kB

	import os
	import datetime
	import glob
	import shutil
	import requests
	import io
	import sys
	import re
	import boto3
	from os import listdir
	from os.path import isfile, join

	import gradio
	from sqlitedict import SqliteDict

	import gradio as gr

	from langchain import PromptTemplate
	from langchain.agents import Tool
	from langchain.agents import load_tools
	from langchain.agents import initialize_agent

	from langchain.agents import AgentType

	from langchain.chains import LLMMathChain
	from langchain import SerpAPIWrapper
	from langchain.chains import ConversationalRetrievalChain

	from langchain.chains.summarize import load_summarize_chain

	from langchain.llms import AzureOpenAI
	from langchain.chat_models import AzureChatOpenAI

	from langchain.embeddings.openai import OpenAIEmbeddings

	from langchain.memory import ChatMessageHistory
	from langchain.memory import ConversationBufferMemory

	from langchain.vectorstores import Chroma

	from langchain.text_splitter import CharacterTextSplitter
	from langchain.text_splitter import RecursiveCharacterTextSplitter

	from langchain.document_loaders import DirectoryLoader
	from langchain.document_loaders import UnstructuredFileLoader


	import clickhouse_connect
	from pathlib import Path

	from langchain.document_loaders import YoutubeLoader

	from azure_utils import AzureVoiceData
	from polly_utils import PollyVoiceData, NEURAL_ENGINE
	from contextlib import closing
	from langchain.agents import create_pandas_dataframe_agent
	import pandas as pd

	#os env
	os.environ["OPENAI_API_TYPE"] = "azure"
	os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
	os.environ["OPENAI_API_BASE"] = "https://civet-project-001.openai.azure.com/"
	os.environ["OPENAI_API_KEY"] = "0e3e5b666818488fa1b5cb4e4238ffa7"
	os.environ["SERPAPI_API_KEY"] = "a5b67b8805b4e12b0ae147c9c6b2a7dbf3ab84fca5f24e531b6963b1f7fc1ff7"

	global_deployment_id = "CivetGPT"
	global_model_name = "gpt-35-turbo"

	#chroma settings
	chroma_api_impl = "HH_Azure_Openai"
	root_file_path = "./data/" #其實是data 存放的位置
	hr_source_path = "hr_source"
	ks_source_path = "ks_source"
	believe_source_path = 'be_source'

	sqlite_name = "cache.sqlite3"
	sqlite_key="stored_files"
	persist_db = "persist_db"
	hr_collection_name = "hr_db"
	chroma_db_impl="localdb+langchain"
	tmp_collection="tmp_collection"
	davinci = "text-davinci-003"

	#global text setting
	inputText = "問題(按q 或Ctrl + c跳出): "
	refuse_string="服務被拒. 內容可能涉及敏感字詞,政治,煽動他人或是其他不當言詞, 請改以其他內容嚐試"

	#video
	LOOPING_TALKING_HEAD = "./data/videos/Masahiro.mp4"
	TALKING_HEAD_WIDTH = "192"
	AZURE_VOICE_DATA = AzureVoiceData()
	POLLY_VOICE_DATA = PollyVoiceData()


	def save_sqlite(key,value):
	try:
	with SqliteDict(sqlite_name) as mydict:
	old_value = mydict[key]
	mydict[key] = value+old_value # Using dict[key] to store
	mydict.commit() # Need to commit() to actually flush the data
	except Exception as ex:
	print("Error during storing data (Possibly unsupported):", ex)

	def load_sqlite(key):
	try:
	with SqliteDict(sqlite_name) as mydict:
	value = mydict[key] # No need to use commit(), since we are only loading data!
	return value
	except Exception as ex:
	print("Error during loading data:", ex)

	def delete_sql(key):
	try:
	with SqliteDict(sqlite_name) as mydict:
	mydict[key] = [] # Using dict[key] to store
	mydict.commit() # Need to commit() to actually flush the data
	except Exception as ex:
	print("Error during storing data (Possibly unsupported):", ex)

	def ai_answer(answer):
	print('AI 回答: \033[32m' + answer +'\033[0m')

	def get_openaiembeddings():
	return OpenAIEmbeddings(
	deployment="CivetGPT_embedding",
	model="text-embedding-ada-002",
	#embed_batch_size=1
	chunk_size=1
	)

	"""
	def get_chroma_client():
	chroma_client = chromadb.Client(Settings(chroma_api_impl=chroma_api_impl,
	chroma_server_host=chroma_db_ip,
	chroma_server_http_port=chroma_db_port
	))
	return chroma_client
	"""

	def multidocs_loader(files_path, file_ext):
	full_files_pattern = "*." + file_ext
	loader = DirectoryLoader(files_path, glob=full_files_pattern, show_progress=True)
	data = loader.load()
	text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
	documents = text_splitter.split_documents(data)
	return documents

	def unstructure_file_loader(filename_path):
	loader = UnstructuredFileLoader(filename_path)
	data = loader.load()
	text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
	documents = text_splitter.split_documents(data)
	return documents

	def add_documents_into_cromadb(db_name, file_path, collection_name):
	_db_name = db_name

	documents = multidocs_loader(file_path,"*")
	embeddings = get_openaiembeddings()

	chroma_db = Chroma.from_documents(
	documents,
	embeddings,
	collection_name=collection_name,
	persist_directory=root_file_path+ persist_db,
	chroma_db_impl=chroma_db_impl
	)

	chroma_db.persist()
	print('adding documents done!')

	def initial_croma_db(db_name, files_path, file_ext, collection_name):
	_db_name = db_name

	documents = multidocs_loader(files_path, file_ext)
	embeddings = get_openaiembeddings()

	chroma_db = Chroma.from_documents(
	documents,
	embeddings,
	collection_name = collection_name,
	persist_directory= root_file_path+ persist_db,
	chroma_db_impl=chroma_db_impl
	)

	chroma_db.persist()
	print('vectorstore done!')

	def add_files_to_collection(input_file_path, collection_name):
	file_path=root_file_path+input_file_path
	add_documents_into_cromadb(persist_db, file_path, collection_name)

	def get_prompt_summary_string():
	return """使用中文替下面內容做個精簡摘要:

	{text}

	精簡摘要:"""


	def get_prompt_template_string():
	today = datetime.date.today().strftime("%Y年%m月%d日")
	template_string = f"我是鴻海(等同Foxconn)的員工, 你是一個鴻海的人資專家. 今天是{today}".format(today=today)+"""
	請根據歷史對話,針對這次的問題, 形成獨立問題. 請優先從提供的文件中尋找答案, 你被允許回答不知道, 但回答不知道時需要給中央人資的客服聯絡窗口資訊.
	不論什麼問題, 都以中文回答

	歷史對話: {chat_history}
	這次的問題: {question}
	人資專家:
	"""
	return template_string

	def get_default_template_prompt():
	template = "你是個知識廣泛的超級助手, 以下所有問題請用中文回答, 並請在500個中文字以內來解釋 {concept} 概念"
	prompt = PromptTemplate(
	input_variables = ["concept"],
	template = template
	)

	return prompt

	def fine_tuning_model_chat(my_deployment_id, my_model_name):
	_prompt = get_default_template_prompt()
	llm = AzureOpenAI(model_name=my_model_name, deployment_name = my_deployment_id)
	while 1:
	text = input(inputText)
	if text == 'q':
	break
	response = llm(_prompt.format(concept = text))
	ai_answer(response)

	def chat_conversation():
	print("resource: " + global_deployment_id + " / " + global_model_name)
	chat = AzureChatOpenAI(
	deployment_name = global_deployment_id,
	model_name = global_model_name,
	)

	history = ChatMessageHistory()
	history.add_ai_message("你是一個超級助理, 以下問題都用中文回答")
	while 1:
	text = input(inputText)
	if text == 'q':
	break
	history.add_user_message(text)
	ai_response = chat(history.messages)
	ai_answer(ai_response.content)

	def local_vector_search(question_str,chat_history, collection_name = hr_collection_name):
	embedding = get_openaiembeddings()
	vectorstore = Chroma( embedding_function=embedding,
	collection_name=collection_name,
	persist_directory=root_file_path+persist_db,
	)

	memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, ai_prefix = "AI超級助理")

	llm = AzureOpenAI(
	deployment_name = global_deployment_id,
	model_name= global_model_name,
	temperature = 0.0)

	chat_llm = AzureChatOpenAI(
	deployment_name = global_deployment_id,
	model_name= global_model_name,
	temperature = 0.2)

	prompt = PromptTemplate(
	template=get_prompt_template_string(),
	input_variables=["question","chat_history"]
	)
	prompt.format(question=question_str,chat_history=chat_history)
	km_chain = ConversationalRetrievalChain.from_llm(
	llm=chat_llm,
	retriever=vectorstore.as_retriever(),
	memory=memory,
	condense_question_prompt=prompt,
	)
	km_tool = Tool(
	name='Knowledge Base',
	func=km_chain.run,
	description='一個非常有用的工具, 當要查詢任何公司政策以及鴻海相關資料都使用這個工具'
	)

	math_math = LLMMathChain(llm=llm,verbose=True)
	math_tool = Tool(
	name='Calculator',
	func=math_math.run,
	description='Useful for when you need to answer questions about math.'
	)

	search = SerpAPIWrapper()
	search_tool = Tool(
	name="Search",
	func=search.run,
	description="當你需要回答一般問題時,非常有用; 不可以用來回答任何跟鴻海有關的問題.",
	)
	tools=[math_tool,km_tool, search_tool]
	agent=initialize_agent(
	agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
	tools=tools,
	llm=chat_llm,
	verbose=True,
	memory=memory,
	max_iterations=30,
	)
	result=km_chain(question_str)

	#result=agent.run(question_str)
	return result["answer"]

	def make_markdown_table(array):
	nl = "\n"
	markdown = ""
	for entry in array:
	markdown += f"{entry} {nl}"
	return markdown

	def get_hr_files():
	files = load_sqlite(sqlite_key)
	if files == None:
	return
	else:
	return make_markdown_table(files)

	def update_hr_km(files):
	file_paths = [file.name for file in files]
	dest_file_path=root_file_path+hr_source_path
	if not os.path.exists(dest_file_path):
	os.makedirs(dest_file_path)

	for file in file_paths:
	shutil.copy(file, dest_file_path)
	add_files_to_collection(hr_source_path, hr_collection_name)

	save_sqlite(sqlite_key, [Path(file_path).name for file_path in file_paths])
	return get_hr_files()

	def clear_all_collection(collection_name):
	pass

	def all_files_under_diretory(path):
	files = glob.glob(path+'\*')
	for f in files:
	os.remove(f)

	def clear_hr_datas():
	#remove hr collection
	client = get_chroma_client(hr_collection_name)
	client.delete_collection(name=hr_collection_name)
	print("Collection removed completely!")

	#remove files
	all_files_under_diretory(root_file_path+hr_source_path)
	delete_sql(sqlite_key)
	return get_hr_files()

	def num_of_collection(collection_name):
	client = get_chroma_client(collection_name)
	number = client.get_collection(collection_name).count()
	return f"目前知識卷裡有{number}卷項目"

	def clear_tmp_collection():
	client = get_chroma_client(tmp_collection)
	client.delete_collection(name=tmp_collection)
	all_files_under_diretory(root_file_path+ks_source_path)
	return num_of_collection(tmp_collection)

	def content_summary(split_documents):
	llm = AzureChatOpenAI(
	deployment_name=global_deployment_id,
	model_name=global_model_name,
	temperature=0.2)
	map_prompt = get_prompt_summary_string()
	map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text"])
	chain = load_summarize_chain(
	llm=llm,
	chain_type="map_reduce",
	verbose=True,
	map_prompt=map_prompt_template,
	combine_prompt=map_prompt_template
	)
	try:
	output = chain({"input_documents": split_documents}, return_only_outputs=True)
	return output
	except Exception as e:
	print(e)
	return {'output_text':refuse_string}

	def pdf_summary(file_name):
	print("file_name: "+file_name)
	loader = UnstructuredFileLoader(file_name)
	document = loader.load()
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=20
	)
	split_documents = text_splitter.split_documents(document)
	return content_summary(split_documents)

	def youtube_summary(youtube_url):
	loader=YoutubeLoader.from_youtube_url(youtube_url, add_video_info=True, language=['en','zh-TW'], translation='zh-TW')
	document=loader.load()
	text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
	split_documents=text_splitter.split_documents(document)
	result = content_summary(split_documents)
	return result['output_text']
	def summary_large_file(files):
	file_paths = [file.name for file in files]
	print(file_paths[0])
	result = pdf_summary(file_paths[0])
	return result["output_text"]

	def upload_large_file(files):
	file_paths = [file.name for file in files]
	return Path(file_paths[0]).stem

	def set_allow_lightweight_delete():
	client = clickhouse_connect.get_client(host='127.0.0.1',port=8123)
	command = "SET allow_experimental_lightweight_delete = true;"
	#command = "show databases;"
	res=client.command(command)
	print(res)
	def get_chroma_client(collection_name):
	vectorstore = Chroma(
	embedding_function=get_openaiembeddings(),
	collection_name=collection_name,
	persist_directory= root_file_path+persist_db,
	)
	return vectorstore._client

	def create_db():
	files_path = root_file_path+hr_source_path
	file_ext = "pdf"
	initial_croma_db(persist_db, files_path, file_ext, hr_collection_name)

	def generate_iframe_for_youtube(youtube_link):
	regex = r"(?:https:\/\/)?(?:www\.)?(?:youtube\.com\|youtu\.be)\/(?:watch\?v=)?(.+)"
	_url=re.sub(regex, r"https://www.youtube.com/embed/\1", youtube_link)
	embed_html = f'<iframe width="650" height="365" src="{_url}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
	print(embed_html)
	return embed_html

	def create_html_video(file_name, width, temp_file_url):
	html_video = f'<video width={width} height={width} autoplay muted loop><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
	return html_video

	def do_html_audio_speak(words_to_speak):
	polly_client = boto3.Session(
	aws_access_key_id="AKIAV7Q7AAGW54RBR6FZ",
	aws_secret_access_key="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",
	region_name='us-west-2'
	).client('polly')

	language_code="cmn-CN"
	engine = NEURAL_ENGINE
	voice_id = "Zhiyu"

	print("voice_id: "+voice_id+"\nlanguage_code="+language_code)
	response = polly_client.synthesize_speech(
	Text=words_to_speak,
	OutputFormat='mp3',
	VoiceId=voice_id,
	LanguageCode=language_code,
	Engine=engine
	)

	html_audio = '<pre>no audio</pre>'

	# Save the audio stream returned by Amazon Polly on Lambda's temp directory
	if "AudioStream" in response:
	with closing(response["AudioStream"]) as stream:
	try:
	with open('./data/audios/tempfile.mp3', 'wb') as f:
	f.write(stream.read())
	temp_aud_file = gr.File("./data/audios/tempfile.mp3")
	temp_aud_file_url = "/file=" + temp_aud_file.value['name']
	html_audio = f'<audio autoplay><source src={temp_aud_file_url} type="audio/mp3"></audio>'
	except IOError as error:
	# Could not write to file, exit gracefully
	print(error)
	return None, None
	else:
	# The response didn't contain audio data, exit gracefully
	print("Could not stream audio")
	return None, None

	return html_audio, "./data/audios/tempfile.mp3"

	def do_html_video_speak():

	key = "eyJhbGciOiJIUzUxMiJ9.eyJ1c2VybmFtZSI6ImNhdHNreXR3QGdtYWlsLmNvbSJ9.OypOUZF-xv4-b8i9F4_aaMQiJpxv0mXRT5kyuJwTMXVd4awV-O-Obntp--AqGghNNowzQ9oG7zArSnQjz2vQgg"
	url = "https://api.exh.ai/animations/v2/generate_lipsync_from_audio"
	files = {"audio_file": ("./data/audios/tempfile.mp3", open("./data/audios/tempfile.mp3", "rb"), "audio/mpeg")}
	payload = {
	"animation_pipeline": "high_quality",
	"idle_url": "https://ugc-idle.s3-us-west-2.amazonaws.com/5fd9ba1b1607b39a4d559300c1e35bee.mp4"
	}
	headers = {
	"accept": "application/json",
	"authorization": f"Bearer {key}"
	}

	res = requests.post(url, data=payload, files=files, headers=headers)

	print("res.status_code: ", res.status_code)

	html_video = '<pre>no video</pre>'
	if isinstance(res.content, bytes):
	response_stream = io.BytesIO(res.content)
	print("len(res.content)): ", len(res.content))

	with open('./data/videos/tempfile.mp4', 'wb') as f:
	f.write(response_stream.read())
	temp_file = gr.File("./data/videos/tempfile.mp4")
	temp_file_url = "/file=" + temp_file.value['name']
	html_video = f'<video width={TALKING_HEAD_WIDTH} height={TALKING_HEAD_WIDTH} autoplay><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
	else:
	print('video url unknown')
	return res, html_video, "./data/videos/tempfile.mp4"

	def kh_update_km(files):
	file_paths = [file.name for file in files]
	dest_file_path = root_file_path + ks_source_path

	if not os.path.exists(dest_file_path):
	os.makedirs(dest_file_path)

	for file in file_paths:
	shutil.copy(file, dest_file_path)
	add_files_to_collection(ks_source_path, tmp_collection)

	return num_of_collection(tmp_collection)

	class Logger:
	def __init__(self, filename):
	self.terminal = sys.stdout
	self.log = open(filename, "w", encoding='UTF-8')

	def write(self, message):
	self.terminal.write(message)
	self.log.write(message)

	def flush(self):
	self.terminal.flush()
	self.log.flush()

	def isatty(self):
	return False

	def read_logs():
	sys.stdout.flush()
	ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]\|\[[0-?][ -/][@-~])')

	with open("output.log", "r", encoding='UTF-8') as f:
	return ansi_escape.sub('', f.read())

	def pandas_analysis(prompt_str, message, chat_history):
	dir_path = f"{root_file_path}{believe_source_path}/*.csv"
	res = glob.glob(dir_path)
	df = pd.concat(map(pd.read_csv, res))
	local_deploy_id= "text-davinci-003"
	local_model_name = "text-davinci-003"
	llm = AzureOpenAI(
	deployment_name=local_deploy_id,
	model_name=local_model_name,
	max_tokens=2000,
	temperature=0,
	)

	be_agent = create_pandas_dataframe_agent(
	llm,
	df,
	prefix="Remove any ` from the Action Input",
	max_iterations=30,
	return_intermediate_steps=False,
	max_execution_time=60,
	handle_parsing_errors="Check your output and make sure it conforms!",
	verbose=True)
	new_str = prompt_str.format(message=message, chat_history=chat_history)
	print(new_str)
	answer = be_agent.run(new_str)
	chat_history.append((message, answer))
	return '', chat_history

	def lunch_style(demo, logs=gr.Text()):
	sys.stdout = Logger("output.log")
	demo.load(read_logs, None, logs, every=1)

	if len(sys.argv)==1:
	print("running server as default value")
	demo.launch(allowed_paths=[root_file_path, root_file_path+hr_source_path])
	elif len(sys.argv)==2 and sys.argv[1] == "server":
	local_ip = "10.40.23.232"
	local_port = 7788
	print(f"running server on http://{local_ip}:{local_port}")
	demo.launch(allowed_paths=[root_file_path, root_file_path+hr_source_path],auth=("Foxconn", "Foxconn123!"),server_name=local_ip, server_port=local_port)
	elif len(sys.argv)==4:
	local_ip = sys.argv[2]
	local_port = sys.argv[3]
	print(f"running server on http://{local_ip}:{local_port}")
	demo.launch(allowed_paths=[root_file_path, root_file_path+hr_source_path],auth=("Foxconn", "Foxconn123!"),server_name=local_ip, server_port=local_port)
	else:
	print("syntax: pythong <your_app>.py [server {ip_address, port}] ")

	def gradio_run():
	print("User Login")
	with gr.Blocks(theme='bethecloud/storj_theme') as demo:
	with gr.Row():
	gr.Markdown("# HH Azure Openai Demo")
	#Header section
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("""
	### 這是一個基於各場景製造的Azure Openai Demo, 目前預計會包含場景有:

	- 超長文本的摘要 ☑
	- HR 智能客服小幫手 ☑
	- 上傳過去歷史資料, 預測未來發展
	- 上傳初步構想後, AI生成方案
	- 網路上搜尋各式資料(包含google, wikipedia, youtube) 等, 綜合分析給結論

	### 基礎的技術架構:
	* 給予資料, 持續累加
	* 存入vector(向量化) database, 依不同的collection 存放
	* 問題以相似度(Similarity search), 結果再丟給gpt 做綜合回應

	### 已知bug:
	* N/A

	如有任何Bug 歡迎隨時回饋
	""")
	with gr.Column(scale=1):
	gr.Image(type="pil", value=root_file_path+"vector.png", label="技術概念圖")
	gr.Markdown("""
	> 中央資訊 Change Liao(廖晨志)
	> teams/email: change.cc.liao@foxconn.com
	> 分機: 5010108
	""")
	with gr.Row():
	gr.Markdown("""
	------
	## Playground
	請切換下方Tab 鍵試驗各項功能

	""")
	#First PoC Section
	with gr.Tab("文本摘要"):
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown(f"""
	## 第一項實驗: 超長文本摘要
	請上傳任何文檔(.pdf, .doc, .csv, text 格式),上傳完成後稍等一會, AI 會在右側TextField 提供文本摘要

	* 使用方式:
	* 請在右邊按下 `請上傳超長文本(可接受text, pdf, doc, csv 格式)` 上傳你的文本
	* AI 會開始解析內容, 檔案愈大解析愈久
	* 上傳完後可以按同個按鍵, 再次上傳
	* 後續會支援video 以及 audio格式

	""")

	with gr.Column(scale=1):
	gr.Markdown("1.")
	file_name_field = gr.Textbox(max_lines=1, label="上傳檔案",placeholder="目前沒有上傳檔案")
	upload_button = gr.UploadButton("請上傳超長文本(可接受text, pdf, doc, csv 格式)",
	file_types=["text", ".pdf", ".doc", ".csv"], file_count="multiple")
	gr.Markdown("2.")
	summary_text = gr.Textbox()
	summary_text.label = "AI 摘要:"
	summary_text.change = False
	summary_text.lines = 12
	upload_button.upload(upload_large_file, upload_button, file_name_field).then(summary_large_file,upload_button,summary_text)
	#2nd Hr Section
	with gr.Tab("HR 客服助手"):
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown(
	"""
	## 第二項實驗: HR 資料庫智能客服助手 AI 試驗
	"""
	)
	gr.Markdown("""
	### 使用方法
	* 測試人員可在下方加入任何HR 相關資料, 亦可全部刪除後上傳.
	* 系統會將資料向量化後,納入右方人資客服機器人資料庫
	* 測試人員可在右方與客服機器人對話

	(溫馨提醒: 儘可能所有檔案全部清掉, 再一次上傳所有想納入的檔案;且次數不要太多,以節省經費)
	""")
	file_list=gr.Textbox(get_hr_files, label="已存在知識庫的檔案(text,pdf,doc,csv)", placeholder="沒有任何檔案存在", max_lines=16, lines=16)
	with gr.Row():
	with gr.Column(scale=1):
	upload_button = gr.UploadButton("上傳HR知識庫檔案",
	file_types=["text", ".pdf", ".doc", ".csv"], file_count="multiple")
	upload_button.upload(update_hr_km, inputs=upload_button, outputs=file_list)
	with gr.Column(scale=1):
	cleanDataBtn = gr.Button(value="刪除所有知識以及檔案")
	cleanDataBtn.click(clear_hr_datas,outputs=file_list)

	with gr.Column(scale=1):
	with gr.Row():
	with gr.Column():
	tmp_file = gr.File(LOOPING_TALKING_HEAD, visible=False)
	tmp_file_url = "/file=" + tmp_file.value['name']
	htm_video = create_html_video(LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH, tmp_file_url)
	video_html = gr.HTML(htm_video)

	# my_aud_file = gr.File(label="Audio file", type="file", visible=True)
	tmp_aud_file = gr.File("./data/audios/tempfile.mp3", visible=False)
	tmp_aud_file_url = "/file=" + tmp_aud_file.value['name']
	htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
	audio_html = gr.HTML(htm_audio, visible=False)
	with gr.Column():
	isAudio = gr.Checkbox(label="是否要有語音", info="要開啟語音嗎?查詢時間會增長")
	gr.Markdown("""
	### AI 虛擬客服:
	* 這是一個實驗性質的AI 客服
	* 講話超過15秒就不會產生,正在要求放寬限制
	* 想要放誰的頭像都可以, 要放董事長也可以.
	* 訂閱制(有效時間 6/13~7/13)
	""")

	with gr.Row():
	chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=400)
	with gr.Row():
	with gr.Column(scale=5):
	msg = gr.Textbox(
	show_label=False,
	placeholder="輸入你的問題",
	)
	with gr.Column(scale=1):
	clear = gr.Button("清除")

	def respond(message, chat_history):
	vector_search_message = local_vector_search(message, chat_history)
	chat_history.append((message, vector_search_message))
	print("vector_search:"+vector_search_message)
	if isAudio.value is False:
	print("isAudio is False")
	return '', chat_history, htm_video, ''
	else:
	print("isAudio is True")
	html_audio, audio_file_path = do_html_audio_speak(vector_search_message)
	res, new_html_video, video_file_path = do_html_video_speak()

	if res.status_code == 200:
	return '', chat_history, new_html_video, ''
	else:
	return '', chat_history, htm_video, html_audio

	msg.submit(respond, [msg, chatbot], [msg, chatbot, video_html, audio_html], queue=True)
	clear.click(lambda: None, None, chatbot, queue=False)
	#3rd youtube
	with gr.Tab("Youtube 影片摘要"):
	with gr.Row():
	with gr.Column(scale=1):
	youtube_gr = gr.HTML(generate_iframe_for_youtube("https://www.youtube.com/embed/"))
	youtube_link=gr.Textbox(interactive=True, label="在此貼上Youtube link:", placeholder="e.g. https://www.youtube.com/watch?v=xxxxxxxxx")
	youtube_link.change(generate_iframe_for_youtube,youtube_link,youtube_gr)
	youtube_analysis_btn=gr.Button("送出解析")
	with gr.Column(scale=1):
	youtube_summary_textbox=gr.Textbox(interactive=False, label="AI 解析", lines=20)
	youtube_analysis_btn.click(youtube_summary,youtube_link,youtube_summary_textbox)
	with gr.Tab("統計助手"):
	with gr.Row():
	gr.Markdown("""
	### 使用方式
	已經讀取所有提供的csv 資料, 可以詢問資料任何問題(Talk to data)
	建議先詢問欄位後, 後續再構思其他問題
	""")

	invField = gr.Textbox(visible=False)
	dir_path = f"{root_file_path}{believe_source_path}/*.csv"
	res = glob.glob(dir_path)
	gr.Examples(res, label="資料庫檔案", inputs=invField, examples_per_page=4)
	with gr.Row():
	with gr.Column():

	tmp_chatbot = gr.Chatbot(value=[], elem_id="tmp_chatbot").style(height=500)
	with gr.Row():
	with gr.Column(scale=5):
	tmp_msg = gr.Textbox(show_label=False,placeholder="輸入你的問題",)
	with gr.Column(scale=1):
	tmp_clear = gr.Button("清除對話")
	with gr.Column():
	prompt_textbox = gr.Textbox("""
	你是一位專業HR專家以及資料科學家,提供給你的資料是現有的人員資料表.
	有下列定義:

	1.

	請以中文回答我下面的問題:{message}
	""", lines=10, label="Prompt:有{chat_history}及{message}, 請至少保留{message}變數",interactive=True, max_lines=10)
	console = gr.Textbox(lines=11, label="Console", max_lines=11)
	tmp_msg.submit(pandas_analysis, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot],queue=True)
	tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False)
	with gr.Row():
	gr.Examples([
	'你有什麼欄位?',
	'資料裡有屬於台灣(TW)的員工有多少位?',
	'全台灣的員工, 每人每天問五個問題, 1個月花費多少錢?',
	'如果龍華廠區的員工每人每天問3個問題,台灣員工每人每天問7個問題, 請問這樣一個月多少錢?'
	], label="訊息範例", inputs=tmp_msg)

	demo.queue(concurrency_count=10)
	lunch_style(demo,console)

	gradio_run()