Spaces:
Runtime error
Runtime error
File size: 2,560 Bytes
63a5c24 9513cae 63a5c24 9513cae 63a5c24 5fb8127 a3e95e6 63a5c24 9513cae 43b8dd7 9513cae a3e95e6 9513cae a3e95e6 9513cae 5fb8127 a3e95e6 5fb8127 63a5c24 9513cae aca8fee 43b8dd7 8b0e392 5fb8127 a3e95e6 63a5c24 5fb8127 af66144 a3e95e6 af66144 a3e95e6 af66144 9513cae 8b0e392 5fb8127 5b981d0 a3e95e6 63a5c24 5fb8127 63a5c24 5fb8127 9513cae aca8fee 43b8dd7 8b0e392 5fb8127 181d76c e00c743 de14443 5fb8127 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import os
import json
from typing import List, Tuple
from collections import OrderedDict
import gradio as gr
from shared import Client
config = json.loads(os.environ['CONFIG'])
model_names = list(config.keys())
clients = {}
for name in config:
model_personas = config[name].get("personas", {})
client = Client(
api_url=os.environ[config[name]['api_url']],
api_key=os.environ[config[name]['api_key']],
personas=model_personas
)
clients[name] = client
personas = list(OrderedDict.fromkeys(persona for name in model_names for persona in clients[name].personas))
info = "\n".join([f"{model} ({config[model]['name']}): {list(clients[model].personas.keys())}" for model in model_names])
def respond(
message,
history: List[Tuple[str, str]],
persona,
model,
info,
conversational,
max_tokens,
):
client = clients[model]
messages = []
try:
system_prompt = client.personas[persona]
except KeyError:
supported_personas = list(client.personas.keys())
raise gr.Error(f"Model '{model}' does not support persona '{persona}', only {supported_personas}")
if system_prompt is not None:
messages.append({"role": "system", "content": system_prompt})
if conversational:
for val in history[-2:]:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
completion = client.openai.chat.completions.create(
model=client.vllm_model_name,
messages=messages,
max_tokens=max_tokens,
temperature=0,
extra_body={
"repetition_penalty": 1.05,
"use_beam_search": True,
"best_of": 5,
},
)
response = completion.choices[0].message.content
return response
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Radio(choices=personas, value="default", label="persona"),
gr.Radio(choices=model_names, value="stable", label="model"),
gr.Textbox(value=info, interactive=False, label="info"),
gr.Checkbox(value=True, label="conversational"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
],
additional_inputs_accordion=gr.Accordion(label="Config", open=True),
title="NeonLLM (v2024-06-17)",
concurrency_limit=5,
)
if __name__ == "__main__":
demo.launch() |