Spaces:

Locutusque
/

hyperion-preview

Runtime error

File size: 1,996 Bytes

91ae465
 
 
 
 
daecaae
91ae465
 
dec3cbd
 
91ae465
28276a4
91ae465
 
 
 
f99c184
daecaae
91ae465
 
2fb1b1c
91ae465
 
 
 
 
 
 
 
 
92ecee9
 
91ae465
 
 
 
 
c3ce568
07f49e6
991b767
 
 
 
91ae465
c3ce568
f09eac6
 
91ae465

import gradio as gr
from transformers import pipeline
import torch
import subprocess
import spaces
import os


# Install flash-attn
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
# Initialize the model pipeline
generator = pipeline('text-generation', model='Locutusque/NeuralHyperion-2.0-Mistral-7B', torch_dtype=torch.bfloat16, token=os.environ["HF"])
@spaces.GPU
def generate_text(prompt, temperature, top_p, top_k, repetition_penalty, max_length):
    # Generate text using the model
    generator.model.cuda()
    generator.device = torch.device("cuda")
    prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
    outputs = generator(
        prompt, 
        do_sample=True,
        max_new_tokens=max_length,
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        repetition_penalty=repetition_penalty,
        return_full_text=False
    )
    # Extract the generated text and return it
    generated_text = outputs[0]['generated_text']
    generator.model.cpu()
    generator.device = torch.device("cpu")
    return generated_text
# Create the Gradio interface
iface = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Textbox(label="Prompt", lines=2, placeholder="Type a prompt..."),
        gr.Slider(minimum=0.1, maximum=2.0, step=0.01, value=0.7, label="Temperature"),
        gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.95, label="Top p"),
        gr.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
        gr.Slider(minimum=1.0, maximum=2.0, step=0.01, value=1.10, label="Repetition Penalty"),
        gr.Slider(minimum=5, maximum=4096, step=5, value=1024, label="Max Length")
    ],
    outputs=gr.Textbox(label="Generated Text"),
    title="Hyperion-2.0-Mistral-7B",
    description="Try out the Hyperion-2.0-Mistral-7B model for free! This is a preview version, and the model will be released soon"
)

iface.launch()