Spaces:

likewendy
/

phi-4

Running

App Files Files Community

phi-4 / app.py

likewendy

code

90aa4a9 10 days ago

raw

history blame contribute delete

3.13 kB

	import spaces
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	import gradio as gr
	import os
	from threading import Thread

	os.system("rm -rf /data-nvme/zerogpu-offload/*")

	os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

	model = AutoModelForCausalLM.from_pretrained(
	"NyxKrage/Microsoft_Phi-4",
	device_map="cuda",
	torch_dtype="auto",
	trust_remote_code=True,
	)
	tokenizer = AutoTokenizer.from_pretrained("NyxKrage/Microsoft_Phi-4")

	streamer = TextIteratorStreamer(tokenizer)

	@spaces.GPU
	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	seed,
	):
	messages = [{"role": "system", "content": system_message}]

	for val in history:
	if val[0]:
	messages.append({"role": "user", "content": val[0]})
	if val[1]:
	messages.append({"role": "assistant", "content": val[1]})

	messages.append({"role": "user", "content": message})

	# Convert messages to the format expected by the model
	input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

	torch.random.manual_seed(seed)

	generation_kwargs = dict(
	input_ids=input_ids,
	max_new_tokens=max_tokens,
	temperature=temperature,
	streamer=streamer,
	top_p=top_p,
	return_full_text=False,
	do_sample=True,
	)

	response = ""
	# Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
	thread = Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()
	# Print the generated text in real-time
	for new_text in streamer:
	response += new_text
	yield response

	with gr.Blocks() as demo:
	with gr.Row():
	gr.LoginButton(min_width=100)
	gr.Markdown("""
	This is the space I built.
	As of 2025/1/7, this is the first phi-4 space.
	If this helps you, and if you have enough money, can you give me 1$? I am facing a financial crisis.
	If you do this, I will pass on the kindness.
	This is my bank card number:5592921230414708
	Thank you!!
	""")

	with gr.Row():
	with gr.Column():
	system_message = gr.Textbox(value="You are a friendly Chatbot.", label="System message")
	max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
	temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
	top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
	seed = gr.Slider(minimum=0, maximum=20091114, value=42, step=1, label="Seed")

	with gr.Column():
	gr.ChatInterface(
	respond,
	additional_inputs=[system_message, max_tokens, temperature, top_p, seed],
	)


	if __name__ == "__main__":
	demo.launch()