|
import os |
|
import datetime |
|
import glob |
|
import shutil |
|
import requests |
|
import io |
|
import sys |
|
import re |
|
import boto3 |
|
from os import listdir |
|
from os.path import isfile, join |
|
|
|
import gradio |
|
from sqlitedict import SqliteDict |
|
|
|
import gradio as gr |
|
|
|
from langchain import PromptTemplate |
|
from langchain.agents import Tool |
|
from langchain.agents import load_tools |
|
from langchain.agents import initialize_agent |
|
|
|
from langchain.agents import AgentType |
|
|
|
from langchain.chains import LLMMathChain |
|
from langchain import SerpAPIWrapper |
|
from langchain.chains import ConversationalRetrievalChain |
|
|
|
from langchain.chains.summarize import load_summarize_chain |
|
|
|
from langchain.llms import AzureOpenAI |
|
from langchain.chat_models import AzureChatOpenAI |
|
|
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
|
|
from langchain.memory import ChatMessageHistory |
|
from langchain.memory import ConversationBufferMemory |
|
|
|
from langchain.vectorstores import Chroma |
|
|
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
|
|
from langchain.document_loaders import DirectoryLoader |
|
from langchain.document_loaders import UnstructuredFileLoader |
|
|
|
|
|
import clickhouse_connect |
|
from pathlib import Path |
|
|
|
from langchain.document_loaders import YoutubeLoader |
|
|
|
from azure_utils import AzureVoiceData |
|
from polly_utils import PollyVoiceData, NEURAL_ENGINE |
|
from contextlib import closing |
|
from langchain.agents import create_pandas_dataframe_agent |
|
import pandas as pd |
|
|
|
|
|
os.environ["OPENAI_API_TYPE"] = "azure" |
|
os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview" |
|
os.environ["OPENAI_API_BASE"] = "https://civet-project-001.openai.azure.com/" |
|
os.environ["OPENAI_API_KEY"] = "0e3e5b666818488fa1b5cb4e4238ffa7" |
|
os.environ["SERPAPI_API_KEY"] = "a5b67b8805b4e12b0ae147c9c6b2a7dbf3ab84fca5f24e531b6963b1f7fc1ff7" |
|
|
|
global_deployment_id = "CivetGPT" |
|
global_model_name = "gpt-35-turbo" |
|
|
|
|
|
chroma_api_impl = "HH_Azure_Openai" |
|
root_file_path = "./data/" |
|
hr_source_path = "hr_source" |
|
ks_source_path = "ks_source" |
|
believe_source_path = 'be_source' |
|
|
|
sqlite_name = "cache.sqlite3" |
|
sqlite_key="stored_files" |
|
persist_db = "persist_db" |
|
hr_collection_name = "hr_db" |
|
chroma_db_impl="localdb+langchain" |
|
tmp_collection="tmp_collection" |
|
davinci = "text-davinci-003" |
|
|
|
|
|
inputText = "問題(按q 或Ctrl + c跳出): " |
|
refuse_string="服務被拒. 內容可能涉及敏感字詞,政治,煽動他人或是其他不當言詞, 請改以其他內容嚐試" |
|
|
|
|
|
LOOPING_TALKING_HEAD = "./data/videos/Masahiro.mp4" |
|
TALKING_HEAD_WIDTH = "192" |
|
AZURE_VOICE_DATA = AzureVoiceData() |
|
POLLY_VOICE_DATA = PollyVoiceData() |
|
|
|
|
|
def save_sqlite(key,value): |
|
try: |
|
with SqliteDict(sqlite_name) as mydict: |
|
old_value = mydict[key] |
|
mydict[key] = value+old_value |
|
mydict.commit() |
|
except Exception as ex: |
|
print("Error during storing data (Possibly unsupported):", ex) |
|
|
|
def load_sqlite(key): |
|
try: |
|
with SqliteDict(sqlite_name) as mydict: |
|
value = mydict[key] |
|
return value |
|
except Exception as ex: |
|
print("Error during loading data:", ex) |
|
|
|
def delete_sql(key): |
|
try: |
|
with SqliteDict(sqlite_name) as mydict: |
|
mydict[key] = [] |
|
mydict.commit() |
|
except Exception as ex: |
|
print("Error during storing data (Possibly unsupported):", ex) |
|
|
|
def ai_answer(answer): |
|
print('AI 回答: \033[32m' + answer +'\033[0m') |
|
|
|
def get_openaiembeddings(): |
|
return OpenAIEmbeddings( |
|
deployment="CivetGPT_embedding", |
|
model="text-embedding-ada-002", |
|
|
|
chunk_size=1 |
|
) |
|
|
|
""" |
|
def get_chroma_client(): |
|
chroma_client = chromadb.Client(Settings(chroma_api_impl=chroma_api_impl, |
|
chroma_server_host=chroma_db_ip, |
|
chroma_server_http_port=chroma_db_port |
|
)) |
|
return chroma_client |
|
""" |
|
|
|
def multidocs_loader(files_path, file_ext): |
|
full_files_pattern = "*." + file_ext |
|
loader = DirectoryLoader(files_path, glob=full_files_pattern, show_progress=True) |
|
data = loader.load() |
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) |
|
documents = text_splitter.split_documents(data) |
|
return documents |
|
|
|
def unstructure_file_loader(filename_path): |
|
loader = UnstructuredFileLoader(filename_path) |
|
data = loader.load() |
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) |
|
documents = text_splitter.split_documents(data) |
|
return documents |
|
|
|
def add_documents_into_cromadb(db_name, file_path, collection_name): |
|
_db_name = db_name |
|
|
|
documents = multidocs_loader(file_path,"*") |
|
embeddings = get_openaiembeddings() |
|
|
|
chroma_db = Chroma.from_documents( |
|
documents, |
|
embeddings, |
|
collection_name=collection_name, |
|
persist_directory=root_file_path+ persist_db, |
|
chroma_db_impl=chroma_db_impl |
|
) |
|
|
|
chroma_db.persist() |
|
print('adding documents done!') |
|
|
|
def initial_croma_db(db_name, files_path, file_ext, collection_name): |
|
_db_name = db_name |
|
|
|
documents = multidocs_loader(files_path, file_ext) |
|
embeddings = get_openaiembeddings() |
|
|
|
chroma_db = Chroma.from_documents( |
|
documents, |
|
embeddings, |
|
collection_name = collection_name, |
|
persist_directory= root_file_path+ persist_db, |
|
chroma_db_impl=chroma_db_impl |
|
) |
|
|
|
chroma_db.persist() |
|
print('vectorstore done!') |
|
|
|
def add_files_to_collection(input_file_path, collection_name): |
|
file_path=root_file_path+input_file_path |
|
add_documents_into_cromadb(persist_db, file_path, collection_name) |
|
|
|
def get_prompt_summary_string(): |
|
return """使用中文替下面內容做個精簡摘要: |
|
|
|
{text} |
|
|
|
精簡摘要:""" |
|
|
|
|
|
def get_prompt_template_string(): |
|
today = datetime.date.today().strftime("%Y年%m月%d日") |
|
template_string = f"我是鴻海(等同Foxconn)的員工, 你是一個鴻海的人資專家. 今天是{today}".format(today=today)+""" |
|
請根據歷史對話,針對這次的問題, 形成獨立問題. 請優先從提供的文件中尋找答案, 你被允許回答不知道, 但回答不知道時需要給中央人資的客服聯絡窗口資訊. |
|
不論什麼問題, 都以中文回答 |
|
|
|
歷史對話: {chat_history} |
|
這次的問題: {question} |
|
人資專家: |
|
""" |
|
return template_string |
|
|
|
def get_default_template_prompt(): |
|
template = "你是個知識廣泛的超級助手, 以下所有問題請用中文回答, 並請在500個中文字以內來解釋 {concept} 概念" |
|
prompt = PromptTemplate( |
|
input_variables = ["concept"], |
|
template = template |
|
) |
|
|
|
return prompt |
|
|
|
def fine_tuning_model_chat(my_deployment_id, my_model_name): |
|
_prompt = get_default_template_prompt() |
|
llm = AzureOpenAI(model_name=my_model_name, deployment_name = my_deployment_id) |
|
while 1: |
|
text = input(inputText) |
|
if text == 'q': |
|
break |
|
response = llm(_prompt.format(concept = text)) |
|
ai_answer(response) |
|
|
|
def chat_conversation(): |
|
print("resource: " + global_deployment_id + " / " + global_model_name) |
|
chat = AzureChatOpenAI( |
|
deployment_name = global_deployment_id, |
|
model_name = global_model_name, |
|
) |
|
|
|
history = ChatMessageHistory() |
|
history.add_ai_message("你是一個超級助理, 以下問題都用中文回答") |
|
while 1: |
|
text = input(inputText) |
|
if text == 'q': |
|
break |
|
history.add_user_message(text) |
|
ai_response = chat(history.messages) |
|
ai_answer(ai_response.content) |
|
|
|
def local_vector_search(question_str,chat_history, collection_name = hr_collection_name): |
|
embedding = get_openaiembeddings() |
|
vectorstore = Chroma( embedding_function=embedding, |
|
collection_name=collection_name, |
|
persist_directory=root_file_path+persist_db, |
|
) |
|
|
|
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, ai_prefix = "AI超級助理") |
|
|
|
llm = AzureOpenAI( |
|
deployment_name = global_deployment_id, |
|
model_name= global_model_name, |
|
temperature = 0.0) |
|
|
|
chat_llm = AzureChatOpenAI( |
|
deployment_name = global_deployment_id, |
|
model_name= global_model_name, |
|
temperature = 0.2) |
|
|
|
prompt = PromptTemplate( |
|
template=get_prompt_template_string(), |
|
input_variables=["question","chat_history"] |
|
) |
|
prompt.format(question=question_str,chat_history=chat_history) |
|
km_chain = ConversationalRetrievalChain.from_llm( |
|
llm=chat_llm, |
|
retriever=vectorstore.as_retriever(), |
|
memory=memory, |
|
condense_question_prompt=prompt, |
|
) |
|
km_tool = Tool( |
|
name='Knowledge Base', |
|
func=km_chain.run, |
|
description='一個非常有用的工具, 當要查詢任何公司政策以及鴻海相關資料都使用這個工具' |
|
) |
|
|
|
math_math = LLMMathChain(llm=llm,verbose=True) |
|
math_tool = Tool( |
|
name='Calculator', |
|
func=math_math.run, |
|
description='Useful for when you need to answer questions about math.' |
|
) |
|
|
|
search = SerpAPIWrapper() |
|
search_tool = Tool( |
|
name="Search", |
|
func=search.run, |
|
description="當你需要回答一般問題時,非常有用; 不可以用來回答任何跟鴻海有關的問題.", |
|
) |
|
tools=[math_tool,km_tool, search_tool] |
|
agent=initialize_agent( |
|
agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, |
|
tools=tools, |
|
llm=chat_llm, |
|
verbose=True, |
|
memory=memory, |
|
max_iterations=30, |
|
) |
|
result=km_chain(question_str) |
|
|
|
|
|
return result["answer"] |
|
|
|
def make_markdown_table(array): |
|
nl = "\n" |
|
markdown = "" |
|
for entry in array: |
|
markdown += f"{entry} {nl}" |
|
return markdown |
|
|
|
def get_hr_files(): |
|
files = load_sqlite(sqlite_key) |
|
if files == None: |
|
return |
|
else: |
|
return make_markdown_table(files) |
|
|
|
def update_hr_km(files): |
|
file_paths = [file.name for file in files] |
|
dest_file_path=root_file_path+hr_source_path |
|
if not os.path.exists(dest_file_path): |
|
os.makedirs(dest_file_path) |
|
|
|
for file in file_paths: |
|
shutil.copy(file, dest_file_path) |
|
add_files_to_collection(hr_source_path, hr_collection_name) |
|
|
|
save_sqlite(sqlite_key, [Path(file_path).name for file_path in file_paths]) |
|
return get_hr_files() |
|
|
|
def clear_all_collection(collection_name): |
|
pass |
|
|
|
def all_files_under_diretory(path): |
|
files = glob.glob(path+'\*') |
|
for f in files: |
|
os.remove(f) |
|
|
|
def clear_hr_datas(): |
|
|
|
client = get_chroma_client(hr_collection_name) |
|
client.delete_collection(name=hr_collection_name) |
|
print("Collection removed completely!") |
|
|
|
|
|
all_files_under_diretory(root_file_path+hr_source_path) |
|
delete_sql(sqlite_key) |
|
return get_hr_files() |
|
|
|
def num_of_collection(collection_name): |
|
client = get_chroma_client(collection_name) |
|
number = client.get_collection(collection_name).count() |
|
return f"目前知識卷裡有{number}卷項目" |
|
|
|
def clear_tmp_collection(): |
|
client = get_chroma_client(tmp_collection) |
|
client.delete_collection(name=tmp_collection) |
|
all_files_under_diretory(root_file_path+ks_source_path) |
|
return num_of_collection(tmp_collection) |
|
|
|
def content_summary(split_documents): |
|
llm = AzureChatOpenAI( |
|
deployment_name=global_deployment_id, |
|
model_name=global_model_name, |
|
temperature=0.2) |
|
map_prompt = get_prompt_summary_string() |
|
map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text"]) |
|
chain = load_summarize_chain( |
|
llm=llm, |
|
chain_type="map_reduce", |
|
verbose=True, |
|
map_prompt=map_prompt_template, |
|
combine_prompt=map_prompt_template |
|
) |
|
try: |
|
output = chain({"input_documents": split_documents}, return_only_outputs=True) |
|
return output |
|
except Exception as e: |
|
print(e) |
|
return {'output_text':refuse_string} |
|
|
|
def pdf_summary(file_name): |
|
print("file_name: "+file_name) |
|
loader = UnstructuredFileLoader(file_name) |
|
document = loader.load() |
|
text_splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=1000, |
|
chunk_overlap=20 |
|
) |
|
split_documents = text_splitter.split_documents(document) |
|
return content_summary(split_documents) |
|
|
|
def youtube_summary(youtube_url): |
|
loader=YoutubeLoader.from_youtube_url(youtube_url, add_video_info=True, language=['en','zh-TW'], translation='zh-TW') |
|
document=loader.load() |
|
text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) |
|
split_documents=text_splitter.split_documents(document) |
|
result = content_summary(split_documents) |
|
return result['output_text'] |
|
def summary_large_file(files): |
|
file_paths = [file.name for file in files] |
|
print(file_paths[0]) |
|
result = pdf_summary(file_paths[0]) |
|
return result["output_text"] |
|
|
|
def upload_large_file(files): |
|
file_paths = [file.name for file in files] |
|
return Path(file_paths[0]).stem |
|
|
|
def set_allow_lightweight_delete(): |
|
client = clickhouse_connect.get_client(host='127.0.0.1',port=8123) |
|
command = "SET allow_experimental_lightweight_delete = true;" |
|
|
|
res=client.command(command) |
|
print(res) |
|
def get_chroma_client(collection_name): |
|
vectorstore = Chroma( |
|
embedding_function=get_openaiembeddings(), |
|
collection_name=collection_name, |
|
persist_directory= root_file_path+persist_db, |
|
) |
|
return vectorstore._client |
|
|
|
def create_db(): |
|
files_path = root_file_path+hr_source_path |
|
file_ext = "pdf" |
|
initial_croma_db(persist_db, files_path, file_ext, hr_collection_name) |
|
|
|
def generate_iframe_for_youtube(youtube_link): |
|
regex = r"(?:https:\/\/)?(?:www\.)?(?:youtube\.com|youtu\.be)\/(?:watch\?v=)?(.+)" |
|
_url=re.sub(regex, r"https://www.youtube.com/embed/\1", youtube_link) |
|
embed_html = f'<iframe width="650" height="365" src="{_url}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>' |
|
print(embed_html) |
|
return embed_html |
|
|
|
def create_html_video(file_name, width, temp_file_url): |
|
html_video = f'<video width={width} height={width} autoplay muted loop><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>' |
|
return html_video |
|
|
|
def do_html_audio_speak(words_to_speak): |
|
polly_client = boto3.Session( |
|
aws_access_key_id="AKIAV7Q7AAGW54RBR6FZ", |
|
aws_secret_access_key="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", |
|
region_name='us-west-2' |
|
).client('polly') |
|
|
|
language_code="cmn-CN" |
|
engine = NEURAL_ENGINE |
|
voice_id = "Zhiyu" |
|
|
|
print("voice_id: "+voice_id+"\nlanguage_code="+language_code) |
|
response = polly_client.synthesize_speech( |
|
Text=words_to_speak, |
|
OutputFormat='mp3', |
|
VoiceId=voice_id, |
|
LanguageCode=language_code, |
|
Engine=engine |
|
) |
|
|
|
html_audio = '<pre>no audio</pre>' |
|
|
|
|
|
if "AudioStream" in response: |
|
with closing(response["AudioStream"]) as stream: |
|
try: |
|
with open('./data/audios/tempfile.mp3', 'wb') as f: |
|
f.write(stream.read()) |
|
temp_aud_file = gr.File("./data/audios/tempfile.mp3") |
|
temp_aud_file_url = "/file=" + temp_aud_file.value['name'] |
|
html_audio = f'<audio autoplay><source src={temp_aud_file_url} type="audio/mp3"></audio>' |
|
except IOError as error: |
|
|
|
print(error) |
|
return None, None |
|
else: |
|
|
|
print("Could not stream audio") |
|
return None, None |
|
|
|
return html_audio, "./data/audios/tempfile.mp3" |
|
|
|
def do_html_video_speak(): |
|
|
|
key = "eyJhbGciOiJIUzUxMiJ9.eyJ1c2VybmFtZSI6ImNhdHNreXR3QGdtYWlsLmNvbSJ9.OypOUZF-xv4-b8i9F4_aaMQiJpxv0mXRT5kyuJwTMXVd4awV-O-Obntp--AqGghNNowzQ9oG7zArSnQjz2vQgg" |
|
url = "https://api.exh.ai/animations/v2/generate_lipsync_from_audio" |
|
files = {"audio_file": ("./data/audios/tempfile.mp3", open("./data/audios/tempfile.mp3", "rb"), "audio/mpeg")} |
|
payload = { |
|
"animation_pipeline": "high_quality", |
|
"idle_url": "https://ugc-idle.s3-us-west-2.amazonaws.com/5fd9ba1b1607b39a4d559300c1e35bee.mp4" |
|
} |
|
headers = { |
|
"accept": "application/json", |
|
"authorization": f"Bearer {key}" |
|
} |
|
|
|
res = requests.post(url, data=payload, files=files, headers=headers) |
|
|
|
print("res.status_code: ", res.status_code) |
|
|
|
html_video = '<pre>no video</pre>' |
|
if isinstance(res.content, bytes): |
|
response_stream = io.BytesIO(res.content) |
|
print("len(res.content)): ", len(res.content)) |
|
|
|
with open('./data/videos/tempfile.mp4', 'wb') as f: |
|
f.write(response_stream.read()) |
|
temp_file = gr.File("./data/videos/tempfile.mp4") |
|
temp_file_url = "/file=" + temp_file.value['name'] |
|
html_video = f'<video width={TALKING_HEAD_WIDTH} height={TALKING_HEAD_WIDTH} autoplay><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>' |
|
else: |
|
print('video url unknown') |
|
return res, html_video, "./data/videos/tempfile.mp4" |
|
|
|
def kh_update_km(files): |
|
file_paths = [file.name for file in files] |
|
dest_file_path = root_file_path + ks_source_path |
|
|
|
if not os.path.exists(dest_file_path): |
|
os.makedirs(dest_file_path) |
|
|
|
for file in file_paths: |
|
shutil.copy(file, dest_file_path) |
|
add_files_to_collection(ks_source_path, tmp_collection) |
|
|
|
return num_of_collection(tmp_collection) |
|
|
|
class Logger: |
|
def __init__(self, filename): |
|
self.terminal = sys.stdout |
|
self.log = open(filename, "w", encoding='UTF-8') |
|
|
|
def write(self, message): |
|
self.terminal.write(message) |
|
self.log.write(message) |
|
|
|
def flush(self): |
|
self.terminal.flush() |
|
self.log.flush() |
|
|
|
def isatty(self): |
|
return False |
|
|
|
def read_logs(): |
|
sys.stdout.flush() |
|
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') |
|
|
|
with open("output.log", "r", encoding='UTF-8') as f: |
|
return ansi_escape.sub('', f.read()) |
|
|
|
def pandas_analysis(prompt_str, message, chat_history): |
|
dir_path = f"{root_file_path}{believe_source_path}/*.csv" |
|
res = glob.glob(dir_path) |
|
df = pd.concat(map(pd.read_csv, res)) |
|
local_deploy_id= "text-davinci-003" |
|
local_model_name = "text-davinci-003" |
|
llm = AzureOpenAI( |
|
deployment_name=local_deploy_id, |
|
model_name=local_model_name, |
|
max_tokens=2000, |
|
temperature=0, |
|
) |
|
|
|
be_agent = create_pandas_dataframe_agent( |
|
llm, |
|
df, |
|
prefix="Remove any ` from the Action Input", |
|
max_iterations=30, |
|
return_intermediate_steps=False, |
|
max_execution_time=60, |
|
handle_parsing_errors="Check your output and make sure it conforms!", |
|
verbose=True) |
|
new_str = prompt_str.format(message=message, chat_history=chat_history) |
|
print(new_str) |
|
answer = be_agent.run(new_str) |
|
chat_history.append((message, answer)) |
|
return '', chat_history |
|
|
|
def lunch_style(demo, logs=gr.Text()): |
|
sys.stdout = Logger("output.log") |
|
demo.load(read_logs, None, logs, every=1) |
|
|
|
if len(sys.argv)==1: |
|
print("running server as default value") |
|
demo.launch(allowed_paths=[root_file_path, root_file_path+hr_source_path]) |
|
elif len(sys.argv)==2 and sys.argv[1] == "server": |
|
local_ip = "10.40.23.232" |
|
local_port = 7788 |
|
print(f"running server on http://{local_ip}:{local_port}") |
|
demo.launch(allowed_paths=[root_file_path, root_file_path+hr_source_path],auth=("Foxconn", "Foxconn123!"),server_name=local_ip, server_port=local_port) |
|
elif len(sys.argv)==4: |
|
local_ip = sys.argv[2] |
|
local_port = sys.argv[3] |
|
print(f"running server on http://{local_ip}:{local_port}") |
|
demo.launch(allowed_paths=[root_file_path, root_file_path+hr_source_path],auth=("Foxconn", "Foxconn123!"),server_name=local_ip, server_port=local_port) |
|
else: |
|
print("syntax: pythong <your_app>.py [server {ip_address, port}] ") |
|
|
|
def gradio_run(): |
|
print("User Login") |
|
with gr.Blocks(theme='bethecloud/storj_theme') as demo: |
|
with gr.Row(): |
|
gr.Markdown("# HH Azure Openai Demo") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
gr.Markdown(""" |
|
### 這是一個基於各場景製造的Azure Openai Demo, 目前預計會包含場景有: |
|
|
|
- 超長文本的摘要 ☑ |
|
- HR 智能客服小幫手 ☑ |
|
- 上傳過去歷史資料, 預測未來發展 |
|
- 上傳初步構想後, AI生成方案 |
|
- 網路上搜尋各式資料(包含google, wikipedia, youtube) 等, 綜合分析給結論 |
|
|
|
### 基礎的技術架構: |
|
* 給予資料, 持續累加 |
|
* 存入vector(向量化) database, 依不同的collection 存放 |
|
* 問題以相似度(Similarity search), 結果再丟給gpt 做綜合回應 |
|
|
|
### 已知bug: |
|
* N/A |
|
|
|
如有任何Bug 歡迎隨時回饋 |
|
""") |
|
with gr.Column(scale=1): |
|
gr.Image(type="pil", value=root_file_path+"vector.png", label="技術概念圖") |
|
gr.Markdown(""" |
|
> 中央資訊 Change Liao(廖晨志) |
|
> teams/email: change.cc.liao@foxconn.com |
|
> 分機: 5010108 |
|
""") |
|
with gr.Row(): |
|
gr.Markdown(""" |
|
------ |
|
## Playground |
|
請切換下方Tab 鍵試驗各項功能 |
|
|
|
""") |
|
|
|
with gr.Tab("文本摘要"): |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
gr.Markdown(f""" |
|
## 第一項實驗: 超長文本摘要 |
|
請上傳任何文檔(.pdf, .doc, .csv, text 格式),上傳完成後稍等一會, AI 會在右側TextField 提供文本摘要 |
|
|
|
* 使用方式: |
|
* 請在右邊按下 `請上傳超長文本(可接受text, pdf, doc, csv 格式)` 上傳你的文本 |
|
* AI 會開始解析內容, 檔案愈大解析愈久 |
|
* 上傳完後可以按同個按鍵, 再次上傳 |
|
* 後續會支援video 以及 audio格式 |
|
|
|
""") |
|
|
|
with gr.Column(scale=1): |
|
gr.Markdown("1.") |
|
file_name_field = gr.Textbox(max_lines=1, label="上傳檔案",placeholder="目前沒有上傳檔案") |
|
upload_button = gr.UploadButton("請上傳超長文本(可接受text, pdf, doc, csv 格式)", |
|
file_types=["text", ".pdf", ".doc", ".csv"], file_count="multiple") |
|
gr.Markdown("2.") |
|
summary_text = gr.Textbox() |
|
summary_text.label = "AI 摘要:" |
|
summary_text.change = False |
|
summary_text.lines = 12 |
|
upload_button.upload(upload_large_file, upload_button, file_name_field).then(summary_large_file,upload_button,summary_text) |
|
|
|
with gr.Tab("HR 客服助手"): |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
gr.Markdown( |
|
""" |
|
## 第二項實驗: HR 資料庫智能客服助手 AI 試驗 |
|
""" |
|
) |
|
gr.Markdown(""" |
|
### 使用方法 |
|
* 測試人員可在下方加入任何HR 相關資料, 亦可全部刪除後上傳. |
|
* 系統會將資料向量化後,納入右方人資客服機器人資料庫 |
|
* 測試人員可在右方與客服機器人對話 |
|
|
|
(溫馨提醒: 儘可能所有檔案全部清掉, 再一次上傳所有想納入的檔案;且次數不要太多,以節省經費) |
|
""") |
|
file_list=gr.Textbox(get_hr_files, label="已存在知識庫的檔案(text,pdf,doc,csv)", placeholder="沒有任何檔案存在", max_lines=16, lines=16) |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
upload_button = gr.UploadButton("上傳HR知識庫檔案", |
|
file_types=["text", ".pdf", ".doc", ".csv"], file_count="multiple") |
|
upload_button.upload(update_hr_km, inputs=upload_button, outputs=file_list) |
|
with gr.Column(scale=1): |
|
cleanDataBtn = gr.Button(value="刪除所有知識以及檔案") |
|
cleanDataBtn.click(clear_hr_datas,outputs=file_list) |
|
|
|
with gr.Column(scale=1): |
|
with gr.Row(): |
|
with gr.Column(): |
|
tmp_file = gr.File(LOOPING_TALKING_HEAD, visible=False) |
|
tmp_file_url = "/file=" + tmp_file.value['name'] |
|
htm_video = create_html_video(LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH, tmp_file_url) |
|
video_html = gr.HTML(htm_video) |
|
|
|
|
|
tmp_aud_file = gr.File("./data/audios/tempfile.mp3", visible=False) |
|
tmp_aud_file_url = "/file=" + tmp_aud_file.value['name'] |
|
htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>' |
|
audio_html = gr.HTML(htm_audio, visible=False) |
|
with gr.Column(): |
|
isAudio = gr.Checkbox(label="是否要有語音", info="要開啟語音嗎?查詢時間會增長") |
|
gr.Markdown(""" |
|
### AI 虛擬客服: |
|
* 這是一個實驗性質的AI 客服 |
|
* 講話超過15秒就不會產生,正在要求放寬限制 |
|
* 想要放誰的頭像都可以, 要放董事長也可以. |
|
* 訂閱制(有效時間 6/13~7/13) |
|
""") |
|
|
|
with gr.Row(): |
|
chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=400) |
|
with gr.Row(): |
|
with gr.Column(scale=5): |
|
msg = gr.Textbox( |
|
show_label=False, |
|
placeholder="輸入你的問題", |
|
) |
|
with gr.Column(scale=1): |
|
clear = gr.Button("清除") |
|
|
|
def respond(message, chat_history): |
|
vector_search_message = local_vector_search(message, chat_history) |
|
chat_history.append((message, vector_search_message)) |
|
print("vector_search:"+vector_search_message) |
|
if isAudio.value is False: |
|
print("isAudio is False") |
|
return '', chat_history, htm_video, '' |
|
else: |
|
print("isAudio is True") |
|
html_audio, audio_file_path = do_html_audio_speak(vector_search_message) |
|
res, new_html_video, video_file_path = do_html_video_speak() |
|
|
|
if res.status_code == 200: |
|
return '', chat_history, new_html_video, '' |
|
else: |
|
return '', chat_history, htm_video, html_audio |
|
|
|
msg.submit(respond, [msg, chatbot], [msg, chatbot, video_html, audio_html], queue=True) |
|
clear.click(lambda: None, None, chatbot, queue=False) |
|
|
|
with gr.Tab("Youtube 影片摘要"): |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
youtube_gr = gr.HTML(generate_iframe_for_youtube("https://www.youtube.com/embed/")) |
|
youtube_link=gr.Textbox(interactive=True, label="在此貼上Youtube link:", placeholder="e.g. https://www.youtube.com/watch?v=xxxxxxxxx") |
|
youtube_link.change(generate_iframe_for_youtube,youtube_link,youtube_gr) |
|
youtube_analysis_btn=gr.Button("送出解析") |
|
with gr.Column(scale=1): |
|
youtube_summary_textbox=gr.Textbox(interactive=False, label="AI 解析", lines=20) |
|
youtube_analysis_btn.click(youtube_summary,youtube_link,youtube_summary_textbox) |
|
with gr.Tab("統計助手"): |
|
with gr.Row(): |
|
gr.Markdown(""" |
|
### 使用方式 |
|
已經讀取所有提供的csv 資料, 可以詢問資料任何問題(Talk to data) |
|
建議先詢問欄位後, 後續再構思其他問題 |
|
""") |
|
|
|
invField = gr.Textbox(visible=False) |
|
dir_path = f"{root_file_path}{believe_source_path}/*.csv" |
|
res = glob.glob(dir_path) |
|
gr.Examples(res, label="資料庫檔案", inputs=invField, examples_per_page=4) |
|
with gr.Row(): |
|
with gr.Column(): |
|
|
|
tmp_chatbot = gr.Chatbot(value=[], elem_id="tmp_chatbot").style(height=500) |
|
with gr.Row(): |
|
with gr.Column(scale=5): |
|
tmp_msg = gr.Textbox(show_label=False,placeholder="輸入你的問題",) |
|
with gr.Column(scale=1): |
|
tmp_clear = gr.Button("清除對話") |
|
with gr.Column(): |
|
prompt_textbox = gr.Textbox(""" |
|
你是一位專業HR專家以及資料科學家,提供給你的資料是現有的人員資料表. |
|
有下列定義: |
|
|
|
1. |
|
|
|
請以中文回答我下面的問題:{message} |
|
""", lines=10, label="Prompt:有{chat_history}及{message}, 請至少保留{message}變數",interactive=True, max_lines=10) |
|
console = gr.Textbox(lines=11, label="Console", max_lines=11) |
|
tmp_msg.submit(pandas_analysis, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot],queue=True) |
|
tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False) |
|
with gr.Row(): |
|
gr.Examples([ |
|
'你有什麼欄位?', |
|
'資料裡有屬於台灣(TW)的員工有多少位?', |
|
'全台灣的員工, 每人每天問五個問題, 1個月花費多少錢?', |
|
'如果龍華廠區的員工每人每天問3個問題,台灣員工每人每天問7個問題, 請問這樣一個月多少錢?' |
|
], label="訊息範例", inputs=tmp_msg) |
|
|
|
demo.queue(concurrency_count=10) |
|
lunch_style(demo,console) |
|
|
|
gradio_run() |
|
|
|
|
|
|