File size: 3,000 Bytes
e0ccd06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ae724e
 
e0ccd06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from openai import OpenAI
import gradio as gr
import os
import json
import functools


api_key = os.environ.get('FEATHERLESS_API_KEY')
client = OpenAI(
    base_url="https://api.featherless.ai/v1",
    api_key=api_key
)

def respond(message, history, model):
    history_openai_format = []
    for human, assistant in history:
        history_openai_format.append({"role": "user", "content": human })
        history_openai_format.append({"role": "assistant", "content":assistant})
    history_openai_format.append({"role": "user", "content": message})
  
    response = client.chat.completions.create(
        model=model,
        messages= history_openai_format,
        temperature=1.0,
        stream=True,
        max_tokens=2000
    )

    partial_message = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
              partial_message = partial_message + chunk.choices[0].delta.content
              yield partial_message

logo = open('./logo.svg').read()

with open('./model-cache.json', 'r') as f_model_cache:
    model_cache = json.load(f_model_cache)

def build_model_choices():
    all_choices = []
    for model_class in model_cache:
        if model_class in ['llama3-70b-8k', 'qwen2-72b-lc']:
            continue
        all_choices += [ (f"{model_id} ({model_class})", model_id) for model_id in model_cache[model_class] ]

    return all_choices

model_choices = build_model_choices()

def initial_model(referer=None):
    print(f"initial_model({referer})")
    if referer == 'http://127.0.0.1:7860/':
        return 'Sao10K/L3-70B-Euryale-v2.1'
    if referer and referer.startswith("https://huggingface.co/"):
        possible_model = referer[23:]
        full_model_list = functools.reduce(lambda x,y: x+y, model_cache.values(), [])
        model_is_supported = possible_model in full_model_list
        if model_is_supported:
            return possible_model

    return 'anakin87/yo-Llama-3-8B-Instruct'

title_text="HuggingFace's missing inference widget"
with gr.Blocks(title_text, css='.logo-mark { fill: #ffe184; }') as demo:
    gr.HTML("""
        <h1 align="center">HuggingFace's missing inference widget</h1>
        <p align="center">
            Test any <=15B LLM from the hub.
        </p>
    """)

    # hidden_state = gr.State(value=initial_model)

    model_selector = gr.Dropdown(
        label="Model",
        choices=build_model_choices(),
        value=initial_model
        # value=hidden_state
    )

    gr.ChatInterface(
        respond,
        additional_inputs=[model_selector],
        head=""",
        <script>console.log("Hello from gradio!")</script>
        """,
    )
    gr.HTML(f"""
        <p align="center">
            Inference by <a href="https://featherless.ai">{logo}</a>
        </p>
    """)
    def update_initial_model_choice(request: gr.Request):
        return initial_model(request.headers.get('referer'))

    demo.load(update_initial_model_choice, outputs=model_selector)

demo.launch()