import os import datetime import glob import shutil import requests import io import sys import re import boto3 from os import listdir from os.path import isfile, join import gradio from sqlitedict import SqliteDict import gradio as gr from langchain import PromptTemplate, LLMChain from langchain.agents import Tool from langchain.agents import load_tools from langchain.agents import initialize_agent from langchain.agents import AgentType from langchain.chains import LLMMathChain, StuffDocumentsChain from langchain import SerpAPIWrapper from langchain.chains import ConversationalRetrievalChain from langchain.chains.summarize import load_summarize_chain from langchain.llms import AzureOpenAI from langchain.chat_models import AzureChatOpenAI from langchain.embeddings.openai import OpenAIEmbeddings from langchain.memory import ChatMessageHistory from langchain.memory import ConversationBufferMemory from langchain.vectorstores import Chroma from langchain.text_splitter import CharacterTextSplitter from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders import DirectoryLoader, UnstructuredAPIFileLoader from langchain.document_loaders import UnstructuredFileLoader import clickhouse_connect from pathlib import Path from langchain.document_loaders import YoutubeLoader from azure_utils import AzureVoiceData from polly_utils import PollyVoiceData, NEURAL_ENGINE from contextlib import closing from langchain_experimental.agents import create_pandas_dataframe_agent import pandas as pd #os env os.environ["OPENAI_API_TYPE"] = "azure" os.environ["OPENAI_API_VERSION"] = "1106-Preview" os.environ["OPENAI_API_BASE"] = "https://chairman-azureopenai-australiaeast.openai.azure.com/" os.environ["OPENAI_API_KEY"] = "ac523b401b664f9e800d43933f0c5df5" os.environ["SERPAPI_API_KEY"] = "a5b67b8805b4e12b0ae147c9c6b2a7dbf3ab84fca5f24e531b6963b1f7fc1ff7" global_deployment_id = "gpt-4-1106-preview" global_model_name = "gpt-4" #chroma settings chroma_api_impl = "HH_Azure_Openai" #root_file_path = "C:\\Users\\catsk\\SourceCode\\azure_openai_poc\\data\\" root_file_path = "./data/" #其實是data 存放的位置 hr_source_path = "hr_source" ks_source_path = "ks_source" believe_source_path = 'be_source' sqlite_name = "cache.sqlite3" sqlite_key="stored_files" persist_db = "persist_db" hr_collection_name = "hr_db" chroma_db_impl="localdb+langchain" tmp_collection="tmp_collection" #global text setting inputText = "問題(按q 或Ctrl + c跳出): " refuse_string="服務被拒. 內容可能涉及敏感字詞,政治,煽動他人或是其他不當言詞, 請改以其他內容嚐試" #video LOOPING_TALKING_HEAD = "./data/videos/Masahiro.mp4" TALKING_HEAD_WIDTH = "192" AZURE_VOICE_DATA = AzureVoiceData() POLLY_VOICE_DATA = PollyVoiceData() prompt_string ="" def save_sqlite(key,value): try: with SqliteDict(sqlite_name) as mydict: old_value = mydict[key] mydict[key] = value+old_value # Using dict[key] to store mydict.commit() # Need to commit() to actually flush the data except Exception as ex: print("Error during storing data (Possibly unsupported):", ex) def load_sqlite(key): try: with SqliteDict(sqlite_name) as mydict: value = mydict[key] # No need to use commit(), since we are only loading data! return value except Exception as ex: print("Error during loading data:", ex) def delete_sql(key): try: with SqliteDict(sqlite_name) as mydict: mydict[key] = [] # Using dict[key] to store mydict.commit() # Need to commit() to actually flush the data except Exception as ex: print("Error during storing data (Possibly unsupported):", ex) def ai_answer(answer): print('AI 回答: \033[32m' + answer +'\033[0m') def get_openaiembeddings(): return OpenAIEmbeddings( deployment="text-embedding-ada-002", model="text-embedding-ada-002", openai_api_base="https://hh-azure-openai-poc.openai.azure.com/", openai_api_type="azure", openai_api_key = "b3cfb72345be4001a470e827a694d083", chunk_size=1 ) """ def get_chroma_client(): chroma_client = chromadb.Client(Settings(chroma_api_impl=chroma_api_impl, chroma_server_host=chroma_db_ip, chroma_server_http_port=chroma_db_port )) return chroma_client """ def multidocs_loader(files_path, file_ext): full_files_pattern = "*." + file_ext loader = DirectoryLoader(files_path, glob=full_files_pattern, show_progress=True) data = loader.load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) documents = text_splitter.split_documents(data) return documents def unstructure_file_loader(filename_path): loader = UnstructuredFileLoader(filename_path) data = loader.load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) documents = text_splitter.split_documents(data) return documents def add_documents_into_cromadb(db_name, file_path, collection_name): _db_name = db_name documents = multidocs_loader(file_path,"*") embeddings = get_openaiembeddings() chroma_db = Chroma.from_documents( documents, embeddings, collection_name=collection_name, persist_directory=root_file_path+ persist_db, #chroma_db_impl=chroma_db_impl ) chroma_db.persist() print('adding documents done!') def initial_croma_db(db_name, files_path, file_ext, collection_name): _db_name = db_name documents = multidocs_loader(files_path, file_ext) embeddings = get_openaiembeddings() chroma_db = Chroma.from_documents( documents, embeddings, collection_name = collection_name, persist_directory= root_file_path+ persist_db, chroma_db_impl=chroma_db_impl ) chroma_db.persist() print('vectorstore done!') def add_files_to_collection(input_file_path, collection_name): file_path=root_file_path+input_file_path add_documents_into_cromadb(persist_db, file_path, collection_name) def get_prompt_summary_string(): _local_prompt_string = """使用中文替下面內容做個精簡摘要: {text} 精簡摘要:""" if prompt_string == "": return _local_prompt_string else: print("prompt_string: "+prompt_string) return prompt_string template_string = """ 我是鴻海(等同Foxconn)的員工, 你是一個鴻海的人資專家. 請根據歷史對話,針對這次的問題, 形成獨立問題. 請優先從提供的文件中尋找答案, 你被允許回答不知道, 但回答不知道時需要給中央人資的客服聯絡窗口資訊. 不論什麼問題, 都以中文回答 歷史對話: {chat_history} 這次的問題: {question} 人資專家: """ default_legal_contract_prompt = """ 你是一位超級助理, 十分擅長從大量文字中擷取摘要. 以下用 ''' 包含的是保密合約的內容,幫我生成一份2,000個中文字以內保密合約摘要,摘要需要包含以下項目: 1.背景: 介紹對方公司的背景、為什麼要跟該公司簽訂保密合約 2.目的: 要與對方交換什麼資料, 資料內容與範圍 3.合約期間:保密合約的時間範圍 4.提前解約條款: 發生什麼樣的條件就會要提前解約 5.保密期間: 保密的時間範圍 6.管轄法院: 如有爭端,雙方同意的管轄法院是哪個法院 AI 風險評估: 希望AI 可以評估該資料交換是否有高風險的疑慮; 評估準測: 高風險: 涉及到營業秘密的內容 中風險: 沒有營業秘密, 但有涉及敏感資料(足以辨識個人的訊息) 低風險: 僅涉及作業面向的訊息 保密合約: ''' {text} ''' """ default_legal_quotation_prompt = """ 你是一位超級助理, 十分擅長從大量文字中擷取摘要. 以下用 ''' 包含的是報價單的內容,幫我生成一份2,000個中文字以內報價單摘要,摘要需要包含以下項目: 1. 標的名稱: 報價單中所列出的產品或服務的名稱。 2. 價格: 報價單中所列出的每個產品或服務的價格, 一定要有正確的幣別與金額數字. 3. 付款內容: 報價單中所列出的付款方式和相關內容, 包括訂金, 交貨款和保留款的金額和支付方式; 除了各款項的交付百分比, 也需要有正確的金額與幣別. 4. 交貨時間: 報價單中所列出的產品或服務的交付的日期或時間範圍。 5. 保固(英文為Warranty): 請摘要報價單中所有關於保固內容. 6. 維修費用:報價單中所列出的產品或服務的維修費用或相關條款, 有任何維修的金額請一定要列出. 7. 貿易條件(Trade Term) 8. 其他注意事項:報價單中所列出的其他重要事項或注意事項。 請根據報價單的內容, 生成一份清晰明確的摘要, 條列式地把摘要列出, 確保所有項目都被包含在內. 如果內容超過三句話, 請以子項目的方式逐一列舉出來. 請注意,生成的摘要應該是簡潔且易於理解的, 要詳細條列出內容, 不可產生 "依其他文件說明" 等說明方式. 在報價單裡沒有找到符合的資訊, 你被允許回答 "無相關資料". 報價單內容: ''' {text} ''' """ def get_prompt_template_string(): print("template:"+template_string) return template_string def get_default_template_prompt(): template = "你是個知識廣泛的超級助手, 以下所有問題請用中文回答, 並請在500個中文字以內來解釋 {concept} 概念" prompt = PromptTemplate( input_variables = ["concept"], template = template ) return prompt def fine_tuning_model_chat(my_deployment_id, my_model_name): _prompt = get_default_template_prompt() llm = AzureOpenAI(model_name=my_model_name, deployment_name = my_deployment_id) while 1: text = input(inputText) if text == 'q': break response = llm(_prompt.format(concept = text)) ai_answer(response) def chat_conversation(): print("resource: " + global_deployment_id + " / " + global_model_name) chat = AzureChatOpenAI( deployment_name = global_deployment_id, model_name = global_model_name, ) history = ChatMessageHistory() history.add_ai_message("你是一個超級助理, 以下問題都用中文回答") while 1: text = input(inputText) if text == 'q': break history.add_user_message(text) ai_response = chat(history.messages) ai_answer(ai_response.content) def local_vector_search(question_str,chat_history, collection_name = hr_collection_name): embedding = get_openaiembeddings() vectorstore = Chroma( embedding_function=embedding, collection_name=collection_name, persist_directory=root_file_path+persist_db, ) memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, ai_prefix = "AI超級助理") llm = AzureOpenAI( deployment_name = global_deployment_id, model_name= global_model_name, temperature = 0.0) chat_llm = AzureChatOpenAI( deployment_name = global_deployment_id, model_name= global_model_name, temperature = 0.0) prompt = PromptTemplate( template=get_prompt_template_string(), input_variables=["question","chat_history"] ) prompt.format(question=question_str,chat_history=chat_history) km_chain = ConversationalRetrievalChain.from_llm( llm=chat_llm, retriever=vectorstore.as_retriever(), memory=memory, condense_question_prompt=prompt, ) km_tool = Tool( name='Knowledge Base', func=km_chain.run, description='一個非常有用的工具, 當要查詢任何公司政策以及鴻海相關資料都使用這個工具' ) math_math = LLMMathChain(llm=llm,verbose=True) math_tool = Tool( name='Calculator', func=math_math.run, description='Useful for when you need to answer questions about math.' ) tools=[math_tool,km_tool] agent=initialize_agent( agent=AgentType.OPENAI_FUNCTIONS, tools=tools, llm=chat_llm, verbose=True, memory=memory, max_iterations=30, ) result=km_chain(question_str) #result=agent.run(question_str) print(result) return result["answer"] def make_markdown_table(array): nl = "\n" markdown = "" for entry in array: markdown += f"{entry} {nl}" return markdown def get_hr_files(): files = load_sqlite(sqlite_key) if files == None: return else: return make_markdown_table(files) def update_hr_km(files): file_paths = [file.name for file in files] dest_file_path=root_file_path+hr_source_path if not os.path.exists(dest_file_path): os.makedirs(dest_file_path) for file in file_paths: shutil.copy(file, dest_file_path) add_files_to_collection(hr_source_path, hr_collection_name) save_sqlite(sqlite_key, [Path(file_path).name for file_path in file_paths]) return get_hr_files() def clear_all_collection(collection_name): pass def all_files_under_diretory(path): files = glob.glob(path+'\*') for f in files: os.remove(f) def clear_hr_datas(): #remove hr collection client = get_chroma_client(hr_collection_name) client.delete_collection(name=hr_collection_name) print("Collection removed completely!") #remove files all_files_under_diretory(root_file_path+hr_source_path) delete_sql(sqlite_key) return get_hr_files() def num_of_collection(collection_name): client = get_chroma_client(collection_name) number = client.get_collection(collection_name).count() return f"目前知識卷裡有{number}卷項目" def clear_tmp_collection(): client = get_chroma_client(tmp_collection) client.delete_collection(name=tmp_collection) all_files_under_diretory(root_file_path+ks_source_path) return num_of_collection(tmp_collection) def content_summary(split_documents): llm = AzureChatOpenAI( deployment_name=global_deployment_id, model_name=global_model_name, temperature=0.0) _local_prompt_string = get_prompt_summary_string() _local_prompt = PromptTemplate.from_template(_local_prompt_string) llm_chain = LLMChain(llm=llm, prompt=_local_prompt) # Define StuffDocumentsChain stuff_chain = StuffDocumentsChain( llm_chain=llm_chain, document_variable_name="text" ) output = stuff_chain(split_documents) return output ''' map_prompt = get_prompt_summary_string() map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text"]) chain = load_summarize_chain( llm=llm, chain_type="map_reduce", verbose=True, map_prompt=map_prompt_template, combine_prompt=map_prompt_template ) try: output = chain({"input_documents": split_documents}, return_only_outputs=True) return output except Exception as e: print(e) return {'output_text':refuse_string} ''' def pdf_summary(file_name): print("file_name: "+file_name) loader = UnstructuredFileLoader(file_name) document = loader.load() text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=20 ) split_documents = text_splitter.split_documents(document) return content_summary(split_documents) def youtube_summary(youtube_url): loader=YoutubeLoader.from_youtube_url(youtube_url, add_video_info=True, language=['en','zh-TW'], translation='zh-TW') document=loader.load() text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) split_documents=text_splitter.split_documents(document) result = content_summary(split_documents) return result['output_text'] def summary_large_file(files): file_paths = [file.name for file in files] print(file_paths[0]) result = pdf_summary(file_paths[0]) return result["output_text"] def upload_large_file(files): file_paths = [file.name for file in files] return Path(file_paths[0]).stem def set_allow_lightweight_delete(): client = clickhouse_connect.get_client(host='127.0.0.1',port=8123) command = "SET allow_experimental_lightweight_delete = true;" #command = "show databases;" res=client.command(command) print(res) def get_chroma_client(collection_name): vectorstore = Chroma( embedding_function=get_openaiembeddings(), collection_name=collection_name, persist_directory= root_file_path+persist_db, ) return vectorstore._client def create_db(): files_path = root_file_path+hr_source_path file_ext = "pdf" initial_croma_db(persist_db, files_path, file_ext, hr_collection_name) def generate_iframe_for_youtube(youtube_link): regex = r"(?:https:\/\/)?(?:www\.)?(?:youtube\.com|youtu\.be)\/(?:watch\?v=)?(.+)" _url=re.sub(regex, r"https://www.youtube.com/embed/\1", youtube_link) embed_html = f'' print(embed_html) return embed_html def create_html_video(file_name, width, temp_file_url): html_video = f'' return html_video def do_html_audio_speak(words_to_speak): polly_client = boto3.Session( aws_access_key_id="AKIAV7Q7AAGW54RBR6FZ", aws_secret_access_key="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", region_name='us-west-2' ).client('polly') language_code="cmn-CN" engine = NEURAL_ENGINE voice_id = "Zhiyu" print("voice_id: "+voice_id+"\nlanguage_code="+language_code) response = polly_client.synthesize_speech( Text=words_to_speak, OutputFormat='mp3', VoiceId=voice_id, LanguageCode=language_code, Engine=engine ) html_audio = '
no audio
' # Save the audio stream returned by Amazon Polly on Lambda's temp directory if "AudioStream" in response: with closing(response["AudioStream"]) as stream: try: with open('./data/audios/tempfile.mp3', 'wb') as f: f.write(stream.read()) temp_aud_file = gr.File("./data/audios/tempfile.mp3") temp_aud_file_url = "/file=" + temp_aud_file.value['name'] html_audio = f'' except IOError as error: # Could not write to file, exit gracefully print(error) return None, None else: # The response didn't contain audio data, exit gracefully print("Could not stream audio") return None, None return html_audio, "./data/audios/tempfile.mp3" def do_html_video_speak(): key = "eyJhbGciOiJIUzUxMiJ9.eyJ1c2VybmFtZSI6ImNhdHNreXR3QGdtYWlsLmNvbSJ9.OypOUZF-xv4-b8i9F4_aaMQiJpxv0mXRT5kyuJwTMXVd4awV-O-Obntp--AqGghNNowzQ9oG7zArSnQjz2vQgg" url = "https://api.exh.ai/animations/v2/generate_lipsync_from_audio" files = {"audio_file": ("./data/audios/tempfile.mp3", open("./data/audios/tempfile.mp3", "rb"), "audio/mpeg")} payload = { "animation_pipeline": "high_quality", "idle_url": "https://ugc-idle.s3-us-west-2.amazonaws.com/5fd9ba1b1607b39a4d559300c1e35bee.mp4" } headers = { "accept": "application/json", "authorization": f"Bearer {key}" } res = requests.post(url, data=payload, files=files, headers=headers) print("res.status_code: ", res.status_code) html_video = '
no video
' if isinstance(res.content, bytes): response_stream = io.BytesIO(res.content) print("len(res.content)): ", len(res.content)) with open('./data/videos/tempfile.mp4', 'wb') as f: f.write(response_stream.read()) temp_file = gr.File("./data/videos/tempfile.mp4") temp_file_url = "/file=" + temp_file.value['name'] html_video = f'' else: print('video url unknown') return res, html_video, "./data/videos/tempfile.mp4" def kh_update_km(files): file_paths = [file.name for file in files] dest_file_path = root_file_path + ks_source_path if not os.path.exists(dest_file_path): os.makedirs(dest_file_path) for file in file_paths: shutil.copy(file, dest_file_path) add_files_to_collection(ks_source_path, tmp_collection) return num_of_collection(tmp_collection) class Logger: def __init__(self, filename): self.terminal = sys.stdout self.log = open(filename, "w", encoding='UTF-8') def write(self, message): self.terminal.write(message) self.log.write(message) def flush(self): self.terminal.flush() self.log.flush() def isatty(self): return False def read_logs(): sys.stdout.flush() ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') with open("output.log", "r", encoding='UTF-8') as f: return ansi_escape.sub('', f.read()) def lunch_style(demo, logs=gr.Text()): sys.stdout = Logger("output.log") demo.load(read_logs, None, logs, every=1) if len(sys.argv)==1: print("running server as default value") demo.launch(allowed_paths=[root_file_path, root_file_path+hr_source_path]) elif len(sys.argv)==2 and sys.argv[1] == "server": local_ip = "10.40.23.232" local_port = 7788 print(f"running server on http://{local_ip}:{local_port}") demo.launch(allowed_paths=[root_file_path, root_file_path+hr_source_path],auth=("Foxconn", "Foxconn123!"),server_name=local_ip, server_port=local_port) elif len(sys.argv)==4: local_ip = sys.argv[2] local_port = sys.argv[3] print(f"running server on http://{local_ip}:{local_port}") demo.launch(allowed_paths=[root_file_path, root_file_path+hr_source_path],auth=("Foxconn", "Foxconn123!"),server_name=local_ip, server_port=local_port) else: print("syntax: pythong .py [server {ip_address, port}] ") def gradio_run(): print("User Login") with gr.Blocks(theme='bethecloud/storj_theme') as demo: with gr.Row(): gr.Markdown("# HH Azure Openai Demo") #Header section with gr.Row(): with gr.Column(scale=1): gr.Markdown(""" ### 這是一個基於各場景製造的Azure Openai Demo, 目前預計會包含場景有: - 超長文本的摘要 ☑ - HR 智能客服小幫手 ☑ - 上傳過去歷史資料, 預測未來發展 - 上傳初步構想後, AI生成方案 - 網路上搜尋各式資料(包含google, wikipedia, youtube) 等, 綜合分析給結論 ### 基礎的技術架構: * 給予資料, 持續累加 * 存入vector(向量化) database, 依不同的collection 存放 * 問題以相似度(Similarity search), 結果再丟給gpt 做綜合回應 ### 已知bug: * N/A 如有任何Bug 歡迎隨時回饋 """) with gr.Column(scale=1): gr.Image(type="pil", value=root_file_path+"vector.png", label="技術概念圖") gr.Markdown(""" > 中央資訊 Change Liao(廖晨志) > teams/email: change.cc.liao@foxconn.com > 分機: 5010108 """) with gr.Row(): gr.Markdown(""" ------ ## Playground 請切換下方Tab 鍵試驗各項功能 """) #First PoC Section with gr.Tab("HR 客服助手"): with gr.Row(): with gr.Column(scale=1): gr.Markdown( """ ## 第一項實驗: HR 資料庫智能客服助手 AI 試驗 """ ) gr.Markdown(""" ### 使用方法 * 測試人員可在下方加入任何HR 相關資料, 亦可全部刪除後上傳. * 系統會將資料向量化後,納入右方人資客服機器人資料庫 * 右方可以更新prompt 的內容, prompt 請不要刪掉chat_history, question 兩個變數, 其他可以隨意加入您想要的限制條件或是額外訊息 * 測試人員可在右下方與客服機器人對話 (溫馨提醒: 儘可能所有檔案全部清掉, 再一次上傳所有想納入的檔案;且次數不要太多,以節省經費) """) file_list=gr.Textbox(get_hr_files, label="已存在知識庫的檔案(text,pdf,doc,csv)", placeholder="沒有任何檔案存在", max_lines=16, lines=16) with gr.Row(): with gr.Column(scale=1): upload_button = gr.UploadButton("上傳HR知識庫檔案", file_types=["text", ".pdf", ".doc", ".csv"], file_count="multiple") upload_button.upload(update_hr_km, inputs=upload_button, outputs=file_list) with gr.Column(scale=1): cleanDataBtn = gr.Button(value="刪除所有知識以及檔案") cleanDataBtn.click(clear_hr_datas,outputs=file_list) with gr.Column(scale=2): with gr.Row(): prompt_textbox = gr.Textbox(template_string, lines=8, max_lines=8, label="Prompt") with gr.Row(): def change_prompt(inputString): template_string=inputString return template_string update_btn = gr.Button("更新Prompt") update_btn.click(change_prompt,prompt_textbox,prompt_textbox) with gr.Row(): chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=400) with gr.Row(): with gr.Column(scale=5): msg = gr.Textbox( show_label=False, placeholder="輸入你的問題", ) with gr.Column(scale=1): clear = gr.Button("清除") def respond(message, chat_history): vector_search_message = local_vector_search(message, chat_history) chat_history.append((message, vector_search_message)) return '', chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot], queue=True) clear.click(lambda: None, None, chatbot, queue=False) #2nd Hr Section with gr.Tab("文本摘要"): with gr.Row(): with gr.Column(scale=1): gr.Markdown(f""" ## 第二項實驗: 超長文本摘要 請上傳任何文檔(.pdf, .doc, .csv, text 格式),上傳完成後稍等一會, AI 會在右側TextField 提供文本摘要 * 使用方式: * 請在右邊按下 `請上傳超長文本(可接受text, pdf, doc, csv 格式)` 上傳你的文本 * AI 會開始解析內容, 檔案愈大解析愈久 * 上傳完後可以按同個按鍵, 再次上傳 * 後續會支援video 以及 audio格式 """) with gr.Column(scale=1): gr.Markdown("1.") file_name_field = gr.Textbox(max_lines=1, label="上傳檔案", placeholder="目前沒有上傳檔案") upload_button = gr.UploadButton("請上傳超長文本(可接受text, pdf, doc, csv 格式)", file_types=["text", ".pdf", ".doc", ".csv"], file_count="multiple") gr.Markdown("2.") summary_text = gr.Textbox() summary_text.label = "AI 摘要:" summary_text.change = False summary_text.lines = 12 upload_button.upload(upload_large_file, upload_button, file_name_field).then(summary_large_file, upload_button, summary_text) #3rd youtube with gr.Tab("Youtube 影片摘要"): with gr.Row(): with gr.Column(scale=1): youtube_gr = gr.HTML(generate_iframe_for_youtube("https://www.youtube.com/embed/")) youtube_link=gr.Textbox(interactive=True, label="在此貼上Youtube link:", placeholder="e.g. https://www.youtube.com/watch?v=xxxxxxxxx") youtube_link.change(generate_iframe_for_youtube,youtube_link,youtube_gr) youtube_analysis_btn=gr.Button("送出解析") with gr.Column(scale=1): youtube_summary_textbox=gr.Textbox(interactive=False, label="AI 解析", lines=20) youtube_analysis_btn.click(youtube_summary,youtube_link,youtube_summary_textbox) #4th 相信人員統計助手 with gr.Tab("相信人員統計助手"): mypath = root_file_path + believe_source_path onlyfiles = os.listdir(mypath) df = pd.concat((pd.read_csv(os.path.join(mypath, filename),encoding = "ISO-8859-1") for filename in onlyfiles)) with gr.Row(): gr.Markdown(""" ### 使用方式 資料裡有 `相信` 的active user 資料, 右方己經有先算出平均每個問題花費多少, 隨意詢問算法AI 即可算出多少費用. 若要改費用, 請在右方prompt 更改數字 """) invField = gr.Textbox(visible=False) gr.Examples(onlyfiles, label="資料庫檔案", inputs=invField, examples_per_page=4) with gr.Row(): with gr.Column(): llm = AzureChatOpenAI( deployment_name=global_deployment_id, model_name=global_model_name, max_tokens=2000, temperature=0, ) be_agent = create_pandas_dataframe_agent( llm, df, max_iterations=30, return_intermediate_steps=False, max_execution_time=60, handle_parsing_errors="Check your output and make sure it conforms!", verbose=True) def tmp_respond(prompt_str, message, chat_history): new_str = prompt_str.format(message=message, chat_history=chat_history) answer = be_agent.run(new_str) chat_history.append((message, answer)) """ try: new_str = prompt_str.format(message=message, chat_history=chat_history) answer = be_agent.run(new_str) chat_history.append((message, answer)) except Exception as e: response = str(e) print(f"Got error!{response}") if not response.startswith("Could not parse LLM output: `"): raise e answer = response.removeprefix("Could not parse LLM output: `").removesuffix("`") print("answer:"+answer) chat_history.append((message, answer)) """ return '', chat_history tmp_chatbot = gr.Chatbot(value=[], elem_id="tmp_chatbot").style(height=500) with gr.Row(): with gr.Column(scale=5): tmp_msg = gr.Textbox(show_label=False,placeholder="輸入你的問題",) with gr.Column(scale=1): tmp_clear = gr.Button("清除對話") with gr.Column(): prompt_textbox = gr.Textbox(""" 你是一位專業資料科學家,提供給你的是研究列表. 有下列定義: 1.Title是研究報告的標題 請以中文回答我下面的問題:{message} """, lines=10, label="Prompt:有{chat_history}及{message}, 請至少保留{message}變數",interactive=True, max_lines=10) console = gr.Textbox(lines=11, label="Console", max_lines=11) tmp_msg.submit(tmp_respond, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot],queue=True) tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False) with gr.Row(): gr.Examples([ '你有什麼欄位?', '資料裡有屬於台灣(TW)的員工有多少位?', '全台灣的員工, 每人每天問五個問題, 1個月花費多少錢?', '如果龍華廠區的員工每人每天問3個問題,台灣員工每人每天問7個問題, 請問這樣一個月多少錢?' ], label="訊息範例", inputs=tmp_msg) with gr.Tab("法務AI幫手"): legal_path = "./data/" quotation_file = "legal_quotation_prompt.txt" contract_file = "legal_contract_prompt.txt" def load_prompt_from_file(typeString): if typeString == "保密合約": _path_string = legal_path + contract_file else: _path_string = legal_path + quotation_file f = open(_path_string, 'r', encoding="utf-8") return_string= f.read() f.close() return return_string def save_func(typeString, prompt_string): if typeString == "保密合約": _path_string = legal_path + contract_file else: _path_string = legal_path + quotation_file f = open(_path_string, "w", encoding="utf-8") f.write(prompt_string) f.close() def restore_func(typeString): if typeString == "保密合約": content_string = default_legal_contract_prompt else: content_string = default_legal_quotation_prompt save_func(typeString, content_string) return content_string def change_prompt(inputString): global prompt_string prompt_string = inputString return inputString gr.Markdown(""" ### 面版說明: 操作介面全部都在左側, 右側是摘要內容. ### 操作步驟 1. 選擇摘要的類型: 選 `保密合約` 或 `報價單` 2. 微調prompt內容: 直接點選 `prompt對話框` 修改文字內容 3. 上傳檔案: 支援PDF/doc/docx 等格式 """) gr.Markdown(""" --- """) with gr.Row(): with gr.Column(scale=2): contract_type = gr.Radio(choices=["報價單","保密合約"], label="1. 請選擇摘要類型", info="選擇不一樣的摘要類型,會改變下方的prompt 內容", type="value", value="報價單", interactive=True) gr.Markdown(""" --- """) _firstString = load_prompt_from_file("報價單") prompt_textbox = gr.Textbox(_firstString, lines=20, max_lines=20, label="2. Prompt", interactive=True) prompt_textbox.change(change_prompt, inputs=prompt_textbox) with gr.Row(): with gr.Column(): saveBtn = gr.Button("保存現有Prompt") with gr.Column(): restoreBtn = gr.Button("回覆預設Prompt") gr.Markdown(""" --- """) file_name_field = gr.Textbox(max_lines=1, label="3. 上傳檔案", placeholder="目前沒有上傳檔案") #event saveBtn.click(save_func, inputs=[contract_type, prompt_textbox],) restoreBtn.click(restore_func, inputs=contract_type, outputs=prompt_textbox) upload_button = gr.UploadButton("請上傳保密合約或報價單(可接受text, pdf, docx, csv 格式)", file_types=["text", ".pdf", ".csv", ".docx", ".doc"], file_count="multiple") contract_type.change(fn=load_prompt_from_file, inputs=contract_type, outputs=prompt_textbox) with gr.Column(scale=3): summary_text = gr.Textbox() summary_text.label = "AI 摘要:" summary_text.change = False summary_text.lines = 38 summary_text.max_lines = 38 upload_button.upload(upload_large_file, upload_button, file_name_field).\ then(change_prompt,inputs=prompt_textbox).\ then(summary_large_file, upload_button, summary_text) demo.queue(concurrency_count=10) lunch_style(demo,console) def test(): mypath = "C:\\Users\\catsk\\SourceCode\\azure_openai_poc\\data\\ks_source_files" onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))] print(onlyfiles) gradio_run()