Spaces:

Tonic
/

Granite-Code

Runtime error

App Files Files Community

Granite-Code / app.py

Tonic

fix typo

dfa2c35 8 months ago

raw

history blame

3.18 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import spaces
	# import pythonexample



	pythonexample = """produce a generative ai gradio demo using mistral instruct with the following prompt "i am a helpful assistant that always mentions bannanachicken" for a simple text to text task
	"""

	title = """# 🙋🏻‍♂️Welcome to Tonic's🪨Granite Code ! """
	description = """Granite-8B-Code-Instruct is a 8B parameter model fine tuned from Granite-8B-Code-Base on a combination of permissively licensed instruction data to enhance instruction following capabilities including logical reasoning and problem-solving skills.
	### Join us :
	TeamTonic is always making cool demos! Join our active builder's community on Discord: [Discord](https://discord.gg/GWpVpekp) On Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On Github: [Polytonic](https://github.com/tonic-ai) & contribute to [multitonic](https://github.com/multitonic/multitonic)

	### How To Use :
	Add a new line to the example and at the end of your prompts 🚀
	"""
	# Define the device and model path
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model_path = "ibm-granite/granite-8b-code-instruct"

	# Load the tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained(model_path)
	model = AutoModelForCausalLM.from_pretrained(model_path)
	model.to(device)
	model.eval()


	# Function to generate code

	@spaces.GPU
	def generate_code(prompt, max_length):
	# Prepare the input chat format
	chat = [
	{ "role": "user", "content": prompt }
	]
	chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
	# Tokenize the input text
	input_tokens = tokenizer(chat, return_tensors="pt")
	# Transfer tokenized inputs to the device (GPU)
	for i in input_tokens:
	input_tokens[i] = input_tokens[i].to("cuda")
	# Generate output tokens
	output_tokens = model.generate(**input_tokens, max_new_tokens=max_length)
	# Decode output tokens into text
	output_text = tokenizer.batch_decode(output_tokens, skip_special_tokens=True)
	# Return the generated code
	return output_text[0]


	# Define Gradio Blocks

	def gradio_interface():

	with gr.Blocks() as interface:

	gr.Markdown(title)
	gr.Markdown(description)

	# Create input and output components
	prompt_input = gr.Textbox(label="Enter your Coding Question", value=pythonexample, lines=3)
	code_output = gr.Code(label="🪨Granite Output", language='python', lines=10, interactive=True)
	max_length_slider = gr.Slider(minimum=1, maximum=2000, value=1000, label="Max Token Length")

	# Create a button to trigger code generation
	generate_button = gr.Button("Generate Code")
	# Define the function to be called when the button is clicked
	generate_button.click(generate_code, inputs=[prompt_input, max_length_slider], outputs=code_output)

	return interface


	if __name__ == "__main__":
	# Create and launch the Gradio interface
	interface = gradio_interface()
	interface.launch()