alexkueck's picture
Update app.py
a4c44c9
raw
history blame
5.98 kB
from huggingface_hub import InferenceClient
import os
# HF Inference Endpoints parameter
endpoint_url = "https://YOUR_ENDPOINT.endpoints.huggingface.cloud"
hf_token = os.getenv("TOKEN_HF")
# Streaming Client
client = InferenceClient(endpoint_url, token=hf_token)
# generation parameter
gen_kwargs = dict(
max_new_tokens=512,
top_k=30,
top_p=0.9,
temperature=0.2,
repetition_penalty=1.02,
stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
)
# prompt
prompt = "What can you do in Nuremberg, Germany? Give me 3 Tips"
stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
# yield each generated token
for r in stream:
# skip special tokens
if r.token.special:
continue
# stop if we encounter a stop sequence
if r.token.text in gen_kwargs["stop_sequences"]:
break
# yield the generated token
print(r.token.text, end = "")
# yield r.token.text
#######################################################################
#Darstellung mit Gradio
with open("custom.css", "r", encoding="utf-8") as f:
customCSS = f.read()
with gr.Blocks(theme=small_and_beautiful_theme) as demo:
history = gr.State([])
user_question = gr.State("")
gr.Markdown("KIs am LI - wähle aus, was du bzgl. KI-Bots ausprobieren möchtest!")
with gr.Tabs():
with gr.TabItem("LI-Chat"):
with gr.Row():
gr.HTML(title)
status_display = gr.Markdown("Erfolg", elem_id="status_display")
gr.Markdown(description_top)
with gr.Row(scale=1).style(equal_height=True):
with gr.Column(scale=5):
with gr.Row(scale=1):
chatbotGr = gr.Chatbot(elem_id="LI_chatbot").style(height="100%")
with gr.Row(scale=1):
with gr.Column(scale=12):
user_input = gr.Textbox(
show_label=False, placeholder="Gib deinen Text / Frage ein."
).style(container=False)
with gr.Column(min_width=100, scale=1):
submitBtn = gr.Button("Absenden")
with gr.Column(min_width=100, scale=1):
cancelBtn = gr.Button("Stoppen")
with gr.Row(scale=1):
emptyBtn = gr.Button(
"🧹 Neuer Chat",
)
with gr.Column():
with gr.Column(min_width=50, scale=1):
with gr.Tab(label="Nur zum Testen:"):
gr.Markdown("# Parameter")
top_p = gr.Slider(
minimum=-0,
maximum=1.0,
value=0.95,
step=0.05,
interactive=True,
label="Top-p",
)
temperature = gr.Slider(
minimum=0.1,
maximum=2.0,
value=1,
step=0.1,
interactive=True,
label="Temperature",
)
max_length_tokens = gr.Slider(
minimum=0,
maximum=512,
value=512,
step=8,
interactive=True,
label="Max Generation Tokens",
)
max_context_length_tokens = gr.Slider(
minimum=0,
maximum=4096,
value=2048,
step=128,
interactive=True,
label="Max History Tokens",
)
gr.Markdown(description)
with gr.TabItem("Übersetzungen"):
with gr.Row():
gr.Textbox(
show_label=False, placeholder="Ist noch in Arbeit..."
).style(container=False)
with gr.TabItem("Code-Generierungen"):
with gr.Row():
gr.Textbox(
show_label=False, placeholder="Ist noch in Arbeit..."
).style(container=False)
predict_args = dict(
fn=predict,
inputs=[
user_question,
chatbotGr,
history,
top_p,
temperature,
max_length_tokens,
max_context_length_tokens,
],
outputs=[chatbotGr, history, status_display],
show_progress=True,
)
#neuer Chat
reset_args = dict(
#fn=reset_chat, inputs=[], outputs=[user_input, status_display]
fn=reset_textbox, inputs=[], outputs=[user_input, status_display]
)
# Chatbot
transfer_input_args = dict(
fn=transfer_input, inputs=[user_input], outputs=[user_question, user_input, submitBtn], show_progress=True
)
#Listener auf Start-Click auf Button oder Return
predict_event1 = user_input.submit(**transfer_input_args).then(**predict_args)
predict_event2 = submitBtn.click(**transfer_input_args).then(**predict_args)
#Listener, Wenn reset...
emptyBtn.click(
reset_state,
outputs=[chatbotGr, history, status_display],
show_progress=True,
)
emptyBtn.click(**reset_args)
demo.title = "LI Chat"
#demo.queue(concurrency_count=1).launch(share=True)
demo.queue(concurrency_count=1).launch(debug=True)