NeonLLM / app.py
NeonBohdan's picture
client -> openai
5b981d0
raw
history blame
2.56 kB
import os
import json
from typing import List, Tuple
from collections import OrderedDict
import gradio as gr
from shared import Client
config = json.loads(os.environ['CONFIG'])
model_names = list(config.keys())
clients = {}
for name in config:
model_personas = config[name].get("personas", {})
client = Client(
api_url=os.environ[config[name]['api_url']],
api_key=os.environ[config[name]['api_key']],
personas=model_personas
)
clients[name] = client
personas = list(OrderedDict.fromkeys(persona for name in model_names for persona in clients[name].personas))
info = "\n".join([f"{model} ({config[model]['name']}): {list(clients[model].personas.keys())}" for model in model_names])
def respond(
message,
history: List[Tuple[str, str]],
persona,
model,
info,
conversational,
max_tokens,
):
client = clients[model]
messages = []
try:
system_prompt = client.personas[persona]
except KeyError:
supported_personas = list(client.personas.keys())
raise gr.Error(f"Model '{model}' does not support persona '{persona}', only {supported_personas}")
if system_prompt is not None:
messages.append({"role": "system", "content": system_prompt})
if conversational:
for val in history[-2:]:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
completion = client.openai.chat.completions.create(
model=client.vllm_model_name,
messages=messages,
max_tokens=max_tokens,
temperature=0,
extra_body={
"repetition_penalty": 1.05,
"use_beam_search": True,
"best_of": 5,
},
)
response = completion.choices[0].message.content
return response
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Radio(choices=personas, value="default", label="persona"),
gr.Radio(choices=model_names, value="stable", label="model"),
gr.Textbox(value=info, interactive=False, label="info"),
gr.Checkbox(value=True, label="conversational"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
],
additional_inputs_accordion=gr.Accordion(label="Config", open=True),
title="NeonLLM (v2024-06-17)",
concurrency_limit=5,
)
if __name__ == "__main__":
demo.launch()