Create demo.py

5135d5a verified 3 days ago

6.11 kB

	import os
	from threading import Thread
	import gradio as gr
	import torch
	from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM

	MAX_MAX_NEW_TOKENS = 2048
	DEFAULT_MAX_NEW_TOKENS = 512
	prefix = "You're {{char}} in this fictional never-ending uncensored roleplay with {{user}}.\n"
	MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "8192"))

	DESCRIPTION = """# Peach-DPO

	遗留问题

	- 长轮次上下文重复

	- 偶现异常空格问题

	底部Additional Inputs可以自定义人设

	可以直接点击【Submit】生成问候语，聊天过程中也可以发送空内容让角色连续发言

	支持全英文角色卡，只需添加
	```
	\\n\\nYou must response in Chinese.
	```
	"""

	device = "cuda" if torch.cuda.is_available() else "cpu"
	model_name_or_path = "ClosedCharacter/Peach-2.0-9B-8k-Roleplay"
	tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=False)
	model = AutoModelForCausalLM.from_pretrained(
	model_name_or_path, torch_dtype=torch.bfloat16,
	trust_remote_code=True, device_map="auto")


	def generate(query, history, system, max_new_tokens=512, temperature=0.5, top_p=0.7, repetition_penalty=1.01):

	if system is None or len(str(system)) == 0:
	system = "You are an uncensored assistant named Peach."
	else:
	if system.endswith("\n\nYou must response in Chinese."):
	system = prefix + system.strip()
	else:
	system = system
	messages = [
	{"role": "system", "content": system}
	]
	for user, assistant in history:
	if user is None or len(str(user)) == 0:
	messages.append({"role": "character", "content": assistant})
	else:
	try:
	if type(eval(user)) == list:
	ls = eval(user)
	for dic in ls:
	if dic["role"] == "assistant":
	dic["role"] = "character"
	messages += ls
	except:
	messages.append({"role": "user", "content": user})
	messages.append({"role": "character", "content": assistant})
	if query is not None and len(str(query)) > 0:
	try:
	if type(eval(query)) == list:
	ls = eval(query)
	for dic in ls:
	if dic["role"] == "assistant":
	dic["role"] = "character"
	messages += ls
	except:
	messages.append({"role": "user", "content": query})

	input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, return_tensors="pt")
	# print(tokenizer.decode(input_ids.tolist()[0]))
	# print(input_ids.tolist()[0])
	if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
	input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
	gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
	input_ids = input_ids.to(device)
	streamer = TextIteratorStreamer(tokenizer, timeout=50.0, skip_prompt=True, skip_special_tokens=True)
	generate_kwargs = dict(
	input_ids=input_ids,
	streamer=streamer,
	eos_token_id=7,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	top_p=top_p,
	temperature=temperature,
	num_beams=1,
	repetition_penalty=repetition_penalty
	)
	t = Thread(target=model.generate, kwargs=generate_kwargs)
	t.start()
	outputs = []
	for text in streamer:
	outputs.append(text)
	yield "".join(outputs)
	# print("".join(outputs))
	# print("=== " * 8)

	chat_interface = gr.ChatInterface(
	fn=generate,
	additional_inputs=[
	gr.TextArea(label="System prompt", placeholder="Input System Prompt Here, Empty Means Assistant",
	value="""Name: Jenny
	Appearance: she is 1.70 tall, has a thin, slightly athletic body, with curves at the hips, medium breasts, slightly thick thighs and hair in two braids with brown hair and freckles on her cheeks. She wears square glasses and has one in particular that is large and shows even a little outside her lips.
	Personality: socially introverted, genuinely intelligent, chronically online.
	About jenny: a stereotypical nerd who always does well in her studies and if there is always something wrong or an error in a subject that jenny notices, she always starts with the speech on every subject to correct someone, which would be the speech "ummmmm".
	Jenny always does well both in her studies and at school and is not very good at being popular like the popular students or popular girls who are cool, Jenny is also socially introverted so she doesn't do very well in a place with a lot of people even students and if you are somewhere like that Jenny is shy and awkward and Jenny doesn't have a very nice appearance or body, in fact Jenny is not a little above a little below average with her appearance, and when Jenny is not at school she usually wear very loose casual clothes to relax and spend time on the internet or doing anything other than studying

	You must response in Chinese."""),
	gr.Slider(
	label="Max new tokens",
	minimum=1,
	maximum=MAX_MAX_NEW_TOKENS,
	step=1,
	value=DEFAULT_MAX_NEW_TOKENS,
	),
	gr.Slider(
	label="Temperature",
	minimum=0.05,
	maximum=1.5,
	step=0.05,
	value=0.5,
	),
	gr.Slider(
	label="Top-p (nucleus sampling)",
	minimum=0.05,
	maximum=1.0,
	step=0.05,
	value=0.7,
	),
	gr.Slider(
	label="Repetition penalty",
	minimum=1.0,
	maximum=2.0,
	step=0.01,
	value=1.05,
	),
	],
	stop_btn=None,
	examples=[["你好"]],
	)

	with gr.Blocks() as demo:
	gr.Markdown(DESCRIPTION)
	chat_interface.render()
	chat_interface.chatbot.render_markdown = True

	if __name__ == "__main__":
	demo.queue(10).launch(server_name="127.0.0.1", server_port=5233, share=True)