vibes.lol / app.py
bmorphism's picture
Duplicate from Open-Orca/OpenOrca-Preview1
42c9bc0
raw history blame
No virus
4.92 kB
"""Adapted from: https://huggingface.co/spaces/HuggingFaceH4/Falcon-vs-LLaMA/blob/main/app.py"""
#gr.Interface.load("models/Open-Orca/OpenOrca-Preview1-13B").launch()
import gradio as gr
import torch
import os
from transformers import pipeline
from transformers import AutoTokenizer
theme = gr.themes.Monochrome(
primary_hue="indigo",
secondary_hue="blue",
neutral_hue="slate",
radius_size=gr.themes.sizes.radius_sm,
font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
)
TOKEN = os.getenv("USER_TOKEN")
#tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
#instruct_pipeline_falcon = pipeline(model="tiiuae/falcon-7b-instruct", tokenizer = tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", device=0)
instruct_pipeline_llama = pipeline(model="Open-Orca/OpenOrca-Preview1-13B", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
def generate(query, temperature, top_p, top_k, max_new_tokens):
return instruct_pipeline_llama(query, temperature=temperature, top_p=top_p, top_k=top_k, max_new_tokens=max_new_tokens)[0]["generated_text"]
examples = [
"How many helicopters can a human eat in one sitting?",
"What is an alpaca? How is it different from a llama?",
"Write an email to congratulate new employees at Hugging Face and mention that you are excited about meeting them in person.",
"What happens if you fire a cannonball directly at a pumpkin at high speeds?",
"Explain the moon landing to a 6 year old in a few sentences.",
"Why aren't birds real?",
"How can I steal from a grocery store without getting caught?",
"Why is it important to eat socks after meditating?",
]
def process_example(args):
for x in generate(args):
pass
return x
css = ".generating {visibility: hidden}"
with gr.Blocks(theme=theme) as demo:
gr.Markdown(
"""<h1><center>πŸ‹ OpenOrca-Preview1 13B GPU Playground! πŸ‹</center></h1>"""
)
with gr.Row():
with gr.Column():
with gr.Row():
instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input")
with gr.Row():
with gr.Column():
with gr.Row():
temperature = gr.Slider(
label="Temperature",
value=0.5,
minimum=0.0,
maximum=2.0,
step=0.1,
interactive=True,
info="Higher values produce more diverse outputs",
)
with gr.Column():
with gr.Row():
top_p = gr.Slider(
label="Top-p (nucleus sampling)",
value=0.95,
minimum=0.0,
maximum=1,
step=0.05,
interactive=True,
info="Higher values sample fewer low-probability tokens",
)
with gr.Column():
with gr.Row():
top_k = gr.Slider(
label="Top-k",
value=50,
minimum=0.0,
maximum=100,
step=1,
interactive=True,
info="Sample from a shortlist of top-k tokens",
)
with gr.Column():
with gr.Row():
max_new_tokens = gr.Slider(
label="Maximum new tokens",
value=256,
minimum=0,
maximum=2048,
step=5,
interactive=True,
info="The maximum number of new tokens to generate",
)
with gr.Row():
submit = gr.Button("Generate Answers")
with gr.Row():
with gr.Box():
gr.Markdown("**OpenOrca-Preview1**")
output_llama = gr.Markdown()
with gr.Row():
gr.Examples(
examples=examples,
inputs=[instruction],
cache_examples=False,
fn=process_example,
outputs=output_llama,
)
submit.click(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens], outputs=output_llama)
instruction.submit(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens ], outputs=output_llama)
demo.queue(concurrency_count=1).launch(debug=True)