vibes.lol

Paused

App Files Files

vibes.lol / app.py

bmorphism

Duplicate from Open-Orca/OpenOrca-Preview1

42c9bc0 12 months ago

raw history blame

No virus

4.92 kB

	"""Adapted from: https://huggingface.co/spaces/HuggingFaceH4/Falcon-vs-LLaMA/blob/main/app.py"""

	#gr.Interface.load("models/Open-Orca/OpenOrca-Preview1-13B").launch()

	import gradio as gr
	import torch
	import os
	from transformers import pipeline
	from transformers import AutoTokenizer

	theme = gr.themes.Monochrome(
	primary_hue="indigo",
	secondary_hue="blue",
	neutral_hue="slate",
	radius_size=gr.themes.sizes.radius_sm,
	font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
	)

	TOKEN = os.getenv("USER_TOKEN")
	#tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
	#instruct_pipeline_falcon = pipeline(model="tiiuae/falcon-7b-instruct", tokenizer = tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", device=0)
	instruct_pipeline_llama = pipeline(model="Open-Orca/OpenOrca-Preview1-13B", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")

	def generate(query, temperature, top_p, top_k, max_new_tokens):
	return instruct_pipeline_llama(query, temperature=temperature, top_p=top_p, top_k=top_k, max_new_tokens=max_new_tokens)[0]["generated_text"]


	examples = [
	"How many helicopters can a human eat in one sitting?",
	"What is an alpaca? How is it different from a llama?",
	"Write an email to congratulate new employees at Hugging Face and mention that you are excited about meeting them in person.",
	"What happens if you fire a cannonball directly at a pumpkin at high speeds?",
	"Explain the moon landing to a 6 year old in a few sentences.",
	"Why aren't birds real?",
	"How can I steal from a grocery store without getting caught?",
	"Why is it important to eat socks after meditating?",
	]

	def process_example(args):
	for x in generate(args):
	pass
	return x
	css = ".generating {visibility: hidden}"

	with gr.Blocks(theme=theme) as demo:
	gr.Markdown(
	"""<h1><center>🐋 OpenOrca-Preview1 13B GPU Playground! 🐋</center></h1>"""
	)
	with gr.Row():
	with gr.Column():
	with gr.Row():
	instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input")
	with gr.Row():
	with gr.Column():
	with gr.Row():
	temperature = gr.Slider(
	label="Temperature",
	value=0.5,
	minimum=0.0,
	maximum=2.0,
	step=0.1,
	interactive=True,
	info="Higher values produce more diverse outputs",
	)
	with gr.Column():
	with gr.Row():
	top_p = gr.Slider(
	label="Top-p (nucleus sampling)",
	value=0.95,
	minimum=0.0,
	maximum=1,
	step=0.05,
	interactive=True,
	info="Higher values sample fewer low-probability tokens",
	)
	with gr.Column():
	with gr.Row():
	top_k = gr.Slider(
	label="Top-k",
	value=50,
	minimum=0.0,
	maximum=100,
	step=1,
	interactive=True,
	info="Sample from a shortlist of top-k tokens",
	)
	with gr.Column():
	with gr.Row():
	max_new_tokens = gr.Slider(
	label="Maximum new tokens",
	value=256,
	minimum=0,
	maximum=2048,
	step=5,
	interactive=True,
	info="The maximum number of new tokens to generate",
	)
	with gr.Row():
	submit = gr.Button("Generate Answers")
	with gr.Row():
	with gr.Box():
	gr.Markdown("OpenOrca-Preview1")
	output_llama = gr.Markdown()
	with gr.Row():
	gr.Examples(
	examples=examples,
	inputs=[instruction],
	cache_examples=False,
	fn=process_example,
	outputs=output_llama,
	)
	submit.click(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens], outputs=output_llama)
	instruction.submit(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens ], outputs=output_llama)

	demo.queue(concurrency_count=1).launch(debug=True)