Spaces:
Runtime error
Runtime error
# Import necessary libraries | |
from threading import Thread | |
import argparse | |
import torch | |
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStreamer, AutoModelForCausalLM | |
from peft import PeftConfig, PeftModel | |
from utils import get_device # Angenommen, diese Funktion existiert bereits | |
# Create the parser | |
parser = argparse.ArgumentParser(description='Check model usage.') | |
# Add the arguments | |
parser.add_argument('--baseonly', action='store_true', | |
help='A boolean switch to indicate base only mode') | |
# Execute the parse_args() method | |
args = parser.parse_args() | |
# Define model and adapter names, data type, and quantization type | |
model_name = "microsoft/Phi-3-mini-4k-instruct" | |
adapters_name = "zurd46/eliAI" | |
torch_dtype = torch.bfloat16 # Set the appropriate torch data type | |
# Display device and CPU thread information | |
device = get_device() | |
print("Running on device:", device) | |
print("CPU threads:", torch.get_num_threads()) | |
# Load tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# Load base model | |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch_dtype) | |
model.resize_token_embeddings(len(tokenizer)) | |
# Load adapter if available and not baseonly | |
usingAdapter = False | |
if not args.baseonly: | |
usingAdapter = True | |
model = PeftModel.from_pretrained(model, adapters_name) | |
model.to(device) | |
print(f"Model {model_name} loaded successfully on {device}") | |
# Function to run the text generation process | |
def run_generation(user_text, top_p, temperature, top_k, max_new_tokens): | |
template = "\n{}\n" | |
model_inputs = tokenizer(template.format(user_text) if usingAdapter else user_text, return_tensors="pt") | |
model_inputs = model_inputs.to(device) | |
# Generate text in a separate thread | |
streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True) | |
generate_kwargs = dict( | |
input_ids=model_inputs['input_ids'], | |
streamer=streamer, | |
max_new_tokens=max_new_tokens, | |
do_sample=True, | |
top_p=top_p, | |
temperature=float(temperature), | |
top_k=top_k, | |
pad_token_id=tokenizer.pad_token_id, | |
eos_token_id=tokenizer.eos_token_id | |
) | |
t = Thread(target=model.generate, kwargs=generate_kwargs) | |
t.start() | |
# Retrieve and yield the generated text | |
model_output = "" | |
for new_text in streamer: | |
model_output += new_text | |
return model_output | |
# Gradio UI setup | |
with gr.Blocks(css=""" | |
div.svelte-sfqy0y { | |
display: flex; | |
flex-direction: inherit; | |
flex-wrap: wrap; | |
gap: var(--form-gap-width); | |
box-shadow: var(--block-shadow); | |
border: var(--block-border-width) solid var(--border-color-primary); | |
border-radius: var(--block-radius); | |
background: var(--block-background-fill); | |
overflow-y: hidden; | |
padding: 20px; | |
} | |
body { | |
font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; | |
background-color: var(--body-background-fill); | |
color: #e0e0e0; | |
margin: 0; | |
padding: 0; | |
box-sizing: border-box; | |
} | |
.gradio-container { | |
max-width: 900px; | |
margin: auto; | |
padding: 20px; | |
border-radius: 8px; | |
box-shadow: 0 0 10px rgba(0,0,0,0.5); | |
background: var(--body-background-fill); | |
} | |
.gr-button { | |
background-color: var(--block-background-fill); | |
color: white; | |
border: none; | |
border-radius: 4px; | |
padding: 10px 24px; | |
cursor: pointer; | |
} | |
.gr-button:hover { | |
background-color: #3700b3; | |
} | |
.gr-slider input[type=range] { | |
-webkit-appearance: none; | |
width: 100%; | |
height: 8px; | |
border-radius: 5px; | |
background: #333; | |
outline: none; | |
opacity: 0.9; | |
-webkit-transition: .2s; | |
transition: opacity .2s; | |
} | |
.gr-slider input[type=range]:hover { | |
opacity: 1; | |
} | |
.gr-textbox { | |
background-color: var(--block-background-fill); | |
color: white; | |
border: none; | |
border-radius: 4px; | |
padding: 10px; | |
} | |
.chatbox { | |
max-height: 400px; | |
overflow-y: auto; | |
margin-bottom: 20px; | |
} | |
""") as demo: | |
gr.Markdown( | |
""" | |
<div style="text-align: center; padding: 20px;"> | |
<h1>🌙 eliAI Text Generation Interface</h1> | |
<h3>Model: Phi-3-mini-4k-instruct</h3> | |
<h4>Developed by Daniel Zurmühle</h4> | |
</div> | |
""") | |
with gr.Row(): | |
with gr.Column(scale=3): | |
user_text = gr.Textbox(placeholder="Enter your question here", label="User Input", lines=3, elem_classes="gr-textbox") | |
button_submit = gr.Button(value="Submit", elem_classes="gr-button") | |
max_new_tokens = gr.Slider(minimum=1, maximum=1000, value=1000, step=1, label="Max New Tokens") | |
top_p = gr.Slider(minimum=0.05, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)") | |
top_k = gr.Slider(minimum=1, maximum=50, value=50, step=1, label="Top-k") | |
temperature = gr.Slider(minimum=0.1, maximum=5.0, value=0.8, step=0.1, label="Temperature") | |
with gr.Column(scale=7): | |
model_output = gr.Chatbot(label="Chatbot Output", height=566) | |
def handle_submit(text, top_p, temperature, top_k, max_new_tokens): | |
response = run_generation(text, top_p, temperature, top_k, max_new_tokens) | |
return [(text, response)] | |
button_submit.click(handle_submit, [user_text, top_p, temperature, top_k, max_new_tokens], model_output) | |
user_text.submit(handle_submit, [user_text, top_p, temperature, top_k, max_new_tokens], model_output) | |
demo.queue(max_size=32).launch(server_name="0.0.0.0", server_port=7860) | |