Spaces:

yentinglin
/

Taiwan-LLaMa2

Running

App Files Files Community

Taiwan-LLaMa2 / app.py

yentinglin

Update app.py

5f7b1ee verified 6 months ago

raw

history blame

8.41 kB

	import os
	import gradio as gr
	from transformers import AutoTokenizer
	from pymongo import MongoClient
	import openai

	DB_NAME = os.getenv("MONGO_DBNAME", "taiwan-llm")
	USER = os.getenv("MONGO_USER")
	PASSWORD = os.getenv("MONGO_PASSWORD")

	uri = f"mongodb+srv://{USER}:{PASSWORD}@{DB_NAME}.kvwjiok.mongodb.net/?retryWrites=true&w=majority"
	mongo_client = MongoClient(uri)
	db = mongo_client[DB_NAME]
	conversations_collection = db['conversations']

	DESCRIPTION = """
	# Language Models for Taiwanese Culture

	<p align="center">
	✍️ <a href="https://twllm.com/" target="_blank">Online Demo</a>
	•
	✍️ <a href="https://arena.twllm.com/" target="_blank">TW Chatbot Arena</a>
	•
	🤗 <a href="https://huggingface.co/yentinglin" target="_blank">HF Repo</a> • 🐦 <a href="https://twitter.com/yentinglin56" target="_blank">Twitter</a> • 📃 <a href="https://arxiv.org/pdf/2311.17487" target="_blank">[Paper]</a>
	• 👨️ <a href="https://github.com/MiuLab/Taiwan-LLaMa/tree/main" target="_blank">Github Repo</a>
	<br/><br/>
	<img src="https://www.csie.ntu.edu.tw/~miulab/taiwan-llama/logo-v2.png" width="100"> <br/>
	</p>

	# 🌟 Checkout New [Taiwan-LLM UI](http://www.twllm.com) 🌟


	Taiwan-LLaMa is a fine-tuned model specifically designed for traditional mandarin applications. It is built upon the LLaMa 2 architecture and includes a pretraining phase with over 5 billion tokens and fine-tuning with over 490k multi-turn conversational data in Traditional Mandarin.

	## Key Features

	1. Traditional Mandarin Support: The model is fine-tuned to understand and generate text in Traditional Mandarin, making it suitable for Taiwanese culture and related applications.

	2. Instruction-Tuned: Further fine-tuned on conversational data to offer context-aware and instruction-following responses.

	3. Performance on Vicuna Benchmark: Taiwan-LLaMa's relative performance on Vicuna Benchmark is measured against models like GPT-4 and ChatGPT. It's particularly optimized for Taiwanese culture.

	4. Flexible Customization: Advanced options for controlling the model's behavior like system prompt, temperature, top-p, and top-k are available in the demo.

	## Model Versions

	Different versions of Taiwan-LLaMa are available:

	- Taiwan-LLM v3.0 (This demo)
	- Taiwan-LLM v2.0
	- Taiwan-LLM v1.0

	The models can be accessed from the provided links in the Hugging Face repository.

	Try out the demo to interact with Taiwan-LLaMa and experience its capabilities in handling Traditional Mandarin!
	"""

	LICENSE = """
	## Licenses

	- Code is licensed under Apache 2.0 License.
	- Models are licensed under the LLAMA Community License.
	- By using this model, you agree to the terms and conditions specified in the license.
	- By using this demo, you agree to share your input utterances with us to improve the model.

	## Acknowledgements

	Taiwan-LLaMa project acknowledges the efforts of the [Meta LLaMa team](https://github.com/facebookresearch/llama) and [Vicuna team](https://github.com/lm-sys/FastChat) in democratizing large language models.
	"""

	DEFAULT_SYSTEM_PROMPT = "你是人工智慧助理，以下是用戶和人工智能助理之間的對話。你要對用戶的問題提供有用、安全、詳細和禮貌的回答。"

	endpoint_url = os.environ.get("ENDPOINT_URL", "http://127.0.0.1:8080")
	MAX_MAX_NEW_TOKENS = 4096
	DEFAULT_MAX_NEW_TOKENS = 1536

	max_prompt_length = 8192 - MAX_MAX_NEW_TOKENS - 10

	openai.api_base = endpoint_url

	model_name = "yentinglin/Llama-3-Taiwan-70B-Instruct"
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	with gr.Blocks() as demo:
	gr.Markdown(DESCRIPTION)

	chatbot = gr.Chatbot()
	with gr.Row():
	msg = gr.Textbox(
	container=False,
	show_label=False,
	placeholder='Type a message...',
	scale=10,
	)
	submit_button = gr.Button('Submit',
	variant='primary',
	scale=1,
	min_width=0)

	with gr.Row():
	retry_button = gr.Button('🔄 Retry', variant='secondary')
	undo_button = gr.Button('↩️ Undo', variant='secondary')
	clear = gr.Button('🗑️ Clear', variant='secondary')

	saved_input = gr.State()

	with gr.Accordion(label='Advanced options', open=False):
	system_prompt = gr.Textbox(label='System prompt',
	value=DEFAULT_SYSTEM_PROMPT,
	lines=6)
	max_new_tokens = gr.Slider(
	label='Max new tokens',
	minimum=1,
	maximum=MAX_MAX_NEW_TOKENS,
	step=1,
	value=DEFAULT_MAX_NEW_TOKENS,
	)
	temperature = gr.Slider(
	label='Temperature',
	minimum=0.1,
	maximum=1.0,
	step=0.1,
	value=0.3,
	)
	top_p = gr.Slider(
	label='Top-p (nucleus sampling)',
	minimum=0.05,
	maximum=1.0,
	step=0.05,
	value=0.95,
	)

	def user(user_message, history):
	return "", history + [[user_message, None]]


	def bot(history, max_new_tokens, temperature, top_p, system_prompt):
	messages = [{"role": "system", "content": system_prompt}]
	for user, bot in history:
	if user is not None:
	messages.append({"role": "user", "content": user})
	if bot is not None:
	messages.append({"role": "assistant", "content": bot})

	history[-1][1] = ""
	response = openai.ChatCompletion.create(
	model=model_name,
	messages=messages,
	max_tokens=max_new_tokens,
	temperature=temperature,
	top_p=top_p,
	n=1,
	stream=True,
	stop=["<\|eot_id\|>"], # 添加停止標記
	)

	for chunk in response:
	if 'choices' in chunk:
	delta = chunk['choices'][0]['delta']
	if 'content' in delta:
	history[-1][1] += delta['content']
	yield history

	conversation_document = {
	"model_name": model_name,
	"history": history,
	"system_prompt": system_prompt,
	"max_new_tokens": max_new_tokens,
	"temperature": temperature,
	"top_p": top_p,
	}
	conversations_collection.insert_one(conversation_document)

	msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
	fn=bot,
	inputs=[
	chatbot,
	max_new_tokens,
	temperature,
	top_p,
	system_prompt,
	],
	outputs=chatbot
	)
	submit_button.click(
	user, [msg, chatbot], [msg, chatbot], queue=False
	).then(
	fn=bot,
	inputs=[
	chatbot,
	max_new_tokens,
	temperature,
	top_p,
	system_prompt,
	],
	outputs=chatbot
	)


	def delete_prev_fn(
	history: list[tuple[str, str]]) -> tuple[list[tuple[str, str]], str]:
	try:
	message, _ = history.pop()
	except IndexError:
	message = ''
	return history, message or ''


	def display_input(message: str,
	history: list[tuple[str, str]]) -> list[tuple[str, str]]:
	history.append((message, ''))
	return history

	retry_button.click(
	fn=delete_prev_fn,
	inputs=chatbot,
	outputs=[chatbot, saved_input],
	api_name=False,
	queue=False,
	).then(
	fn=display_input,
	inputs=[saved_input, chatbot],
	outputs=chatbot,
	api_name=False,
	queue=False,
	).then(
	fn=bot,
	inputs=[
	chatbot,
	max_new_tokens,
	temperature,
	top_p,
	system_prompt,
	],
	outputs=chatbot,
	)

	undo_button.click(
	fn=delete_prev_fn,
	inputs=chatbot,
	outputs=[chatbot, saved_input],
	api_name=False,
	queue=False,
	).then(
	fn=lambda x: x,
	inputs=[saved_input],
	outputs=msg,
	api_name=False,
	queue=False,
	)

	clear.click(lambda: None, None, chatbot, queue=False)

	gr.Markdown(LICENSE)

	demo.queue(max_size=128)
	demo.launch(max_threads=10)