Spaces:

RootYuan
/

LLM-Lab

Runtime error

App Files Files Community

LLM-Lab / app.py

RootYuan

Update app.py

9b21da5 almost 2 years ago

raw

history blame

6.86 kB

	import torch
	import gradio as gr
	from uuid import uuid4
	from threading import Thread
	from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer

	from theme_dropdown import create_theme_dropdown


	model_name = "RootYuan/RootYuan-RedLing-7B-v0.1"
	max_new_tokens = 2048
	device = 'cuda' if torch.cuda.is_available() else 'cpu'


	DEFAULT_SYSTEM_MESSAGE = """
	A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
	"""

	VISION_TOKEN = '<img>'
	VISION_TOKENS = '\n' + VISION_TOKEN * 32 + '\n'
	EOT_TOKEN = "<EOT>"

	PROMPT_TEMPLATE = "USER:{user}<EOT>ASSISTANT:{assistant}{eos_token}"

	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)

	dropdown, js = create_theme_dropdown()

	def get_uuid():
	return str(uuid4())


	def add_text(message, history):
	# Append the user's message to the conversation history
	return "", history + [[message, ""]]


	def add_media(media, history):
	media_name = media.name
	media_format = media_name.split(".")[-1]
	if media_format in ["jpg", "jpeg", "png"]:
	media_type = "image"
	history = history + [[(media_name, media_type), ""]]
	return history


	def convert_history_to_text(history):
	conversations = []
	add_vision_tokens = False
	for item in history[:-1]:
	if isinstance(item[0], tuple):
	add_vision_tokens = True
	else:
	if add_vision_tokens:
	conversation = PROMPT_TEMPLATE.format(
	media=VISION_TOKENS,
	user=item[0],
	assistant=item[1],
	eos_token=EOT_TOKEN,
	)
	add_vision_tokens = False
	else:
	conversation = PROMPT_TEMPLATE.format(
	media='',
	user=item[0],
	assistant=item[1],
	eos_token=EOT_TOKEN,
	)
	conversations.append(conversation)

	text = "".join(conversations)
	last = PROMPT_TEMPLATE.format(
	media='',
	user=history[-1][0],
	assistant=history[-1][1],
	eos_token='',
	)
	text += last

	return text


	def bot(history, temperature, top_k, sys_msg):
	print(f"history: {history}")

	# Construct the input message string for the model by concatenating the current system message and conversation history
	messages = sys_msg + convert_history_to_text(history)
	input_ids = tokenizer(messages, return_tensors="pt").input_ids.to(device)
	streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
	generation_kwargs = dict(
	input_ids=input_ids,
	temperature=temperature,
	max_new_tokens=max_new_tokens,
	top_k=top_k,
	streamer=streamer,
	)

	thread = Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	# Initialize an empty string to store the generated text
	generated_text = ""
	for new_text in streamer:
	generated_text += new_text
	history[-1][1] = generated_text
	yield history


	with gr.Blocks(theme='sudeepshouche/minimalist') as demo:
	with gr.Row().style(equal_height=True):
	with gr.Column(scale=12):
	gr.Markdown(
	"""
	# Visual Assistant Lab
	"""
	)
	with gr.Column(scale=2):
	with gr.Box():
	dropdown.render()
	toggle_dark = gr.Button(value="Toggle Dark").style(full_width=True)
	dropdown.change(None, dropdown, None, _js=js)
	toggle_dark.click(lambda: None, None, None, _js="() => {document.body.classList.toggle('dark')}")
	# conversation_id = gr.State(get_uuid)
	with gr.Row():
	with gr.Accordion("System Message", open=False):
	sys_msg = gr.Textbox(
	value=DEFAULT_SYSTEM_MESSAGE,
	label="System Message",
	info="Instruct the AI Assistant to set its beaviour",
	show_label=False,
	)
	with gr.Row():
	chatbot = gr.Chatbot(label="Assistant").style(height=500)
	with gr.Row():
	with gr.Accordion("Advanced Settings:", open=False):
	with gr.Row().style(equal_height=True):
	with gr.Column():
	temperature = gr.Slider(
	label="Temperature",
	value=0.1,
	minimum=0.0,
	maximum=1.0,
	step=0.1,
	interactive=True,
	info="Higher values produce more diverse outputs",
	)
	with gr.Column():
	top_k = gr.Slider(
	label="Top-k",
	value=0,
	minimum=0.0,
	maximum=200,
	step=1,
	interactive=True,
	info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.",
	)
	with gr.Row().style(equal_height=True):
	with gr.Column(scale=12):
	msg = gr.Textbox(
	label="Chat Message Box",
	placeholder="Hi! Type here, Press [Enter] to send...",
	show_label=False,
	).style(container=False)
	with gr.Column(scale=2):
	send = gr.Button("Send")
	with gr.Row().style(equal_height=True):
	media = gr.UploadButton("Upload files", file_types=["image", "video", "audio"])
	stop = gr.Button("Stop")
	clear = gr.Button("Clear")

	send_event = msg.submit(
	fn=add_text,
	inputs=[msg, chatbot],
	outputs=[msg, chatbot],
	queue=False,
	).then(
	fn=bot,
	inputs=[chatbot, temperature, top_k, sys_msg],
	outputs=chatbot,
	queue=True,
	)

	media.upload(
	fn=add_media,
	inputs=[media, chatbot],
	outputs=[chatbot],
	)

	send_click_event = send.click(
	fn=add_text,
	inputs=[msg, chatbot],
	outputs=[msg, chatbot],
	queue=False,
	).then(
	fn=bot,
	inputs=[chatbot, temperature, top_k, sys_msg],
	outputs=chatbot,
	queue=True,
	)

	stop.click(
	fn=None,
	inputs=None,
	outputs=None,
	cancels=[send_event, send_click_event],
	queue=False,
	)
	clear.click(lambda: None, None, chatbot, queue=False)


	if __name__ == "__main__":
	demo.queue(max_size=128, concurrency_count=2)
	demo.launch()