Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,806 Bytes
1527830 a6b5b11 1527830 a6b5b11 8d7f304 a6b5b11 8d7f304 a6b5b11 8d7f304 a6b5b11 8d7f304 29223cd a6b5b11 29223cd a6b5b11 29223cd 8d7f304 a6b5b11 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import gradio as gr
from huggingface_hub import InferenceClient
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import time
import traceback
model_name_or_path = "ClosedCharacter/Peach-9B-8k-Roleplay"
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
# Check if GPU is available
if torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
print("GPU not available, using CPU.")
model = AutoModelForCausalLM.from_pretrained(
model_name_or_path, torch_dtype=torch.bfloat16,
trust_remote_code=True).to(device)
def slow_echo(system_message, user_message):
try:
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": user_message},
]
input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, return_tensors="pt").to(device)
output = model.generate(
inputs=input_ids,
do_sample=True,
temperature=0.3,
top_p=0.5,
no_repeat_ngram_size=6,
repetition_penalty=1.1,
max_new_tokens=512)
generated_response = tokenizer.decode(output[0])
for i in range(len(generated_response)):
time.sleep(0.05)
yield generated_response[: i + 1]
except Exception as e:
error_message = f"An error occurred: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
yield error_message
iface = gr.Interface(
fn=slow_echo,
inputs=[
gr.Textbox(label="System Message"),
gr.Textbox(label="User Message")
],
outputs=gr.Textbox(label="Generated Response"),
title="Roleplay Chatbot"
)
if __name__ == "__main__":
iface.launch() |