Spaces:

CK0607
/

doks

Runtime error

App Files Files Community

doks / app.py

CK0607

Update app.py

fcfb5aa verified 18 days ago

raw history blame contribute delete

No virus

2.03 kB

	import gradio as gr
	from unsloth import FastLanguageModel
	from transformers import TextStreamer
	import torch

	# Function to load the model
	def load_model(model_name, max_seq_length, dtype, load_in_4bit, token=None):
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name=model_name,
	max_seq_length=max_seq_length,
	dtype=dtype,
	load_in_4bit=load_in_4bit,
	token=token
	)
	FastLanguageModel.for_inference(model) # Enable native 2x faster inference
	return model, tokenizer

	# Load the model
	model_name = "unsloth/Phi-3-mini-4k-instruct"
	token = None # Replace with your token if required

	model, tokenizer = load_model(model_name, max_seq_length=2048, dtype=None, load_in_4bit=True, token=token)

	def generate_response(instruction, input_text, max_new_tokens):
	alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

	### Instruction:
	{}

	### Input:
	{}

	### Response:
	{}"""

	inputs = tokenizer(
	[
	alpaca_prompt.format(
	instruction, # instruction
	input_text, # input
	"" # output - leave this blank for generation!
	)
	], return_tensors="pt").to("cpu")

	text_streamer = TextStreamer(tokenizer)
	output = model.generate(**inputs, streamer=text_streamer, max_new_tokens=max_new_tokens)

	response = tokenizer.decode(output[0], skip_special_tokens=True)
	return response

	# Gradio Interface
	iface = gr.Interface(
	fn=generate_response,
	inputs=[
	gr.Textbox(lines=2, label="Instruction", placeholder="Continue the Fibonacci sequence."),
	gr.Textbox(lines=2, label="Input", placeholder="1, 1, 2, 3, 5, 8"),
	gr.Slider(1, 2048, value=128, step=1, label="Max New Tokens")
	],
	outputs=gr.Textbox(label="Response", lines=10),
	title="Language Model Chat UI"
	)

	iface.launch()