Spaces:

alexkueck
/

test-endpoint-li

Paused

App Files Files Community

test-endpoint-li / app.py

alexkueck

Update app.py

a4c44c9 over 1 year ago

raw

history blame

5.98 kB

	from huggingface_hub import InferenceClient
	import os

	# HF Inference Endpoints parameter
	endpoint_url = "https://YOUR_ENDPOINT.endpoints.huggingface.cloud"

	hf_token = os.getenv("TOKEN_HF")

	# Streaming Client
	client = InferenceClient(endpoint_url, token=hf_token)

	# generation parameter
	gen_kwargs = dict(
	max_new_tokens=512,
	top_k=30,
	top_p=0.9,
	temperature=0.2,
	repetition_penalty=1.02,
	stop_sequences=["\nUser:", "<\|endoftext\|>", "</s>"],
	)
	# prompt
	prompt = "What can you do in Nuremberg, Germany? Give me 3 Tips"

	stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)

	# yield each generated token
	for r in stream:
	# skip special tokens
	if r.token.special:
	continue
	# stop if we encounter a stop sequence
	if r.token.text in gen_kwargs["stop_sequences"]:
	break
	# yield the generated token
	print(r.token.text, end = "")
	# yield r.token.text


	#######################################################################
	#Darstellung mit Gradio

	with open("custom.css", "r", encoding="utf-8") as f:
	customCSS = f.read()

	with gr.Blocks(theme=small_and_beautiful_theme) as demo:
	history = gr.State([])
	user_question = gr.State("")
	gr.Markdown("KIs am LI - wähle aus, was du bzgl. KI-Bots ausprobieren möchtest!")
	with gr.Tabs():
	with gr.TabItem("LI-Chat"):
	with gr.Row():
	gr.HTML(title)
	status_display = gr.Markdown("Erfolg", elem_id="status_display")
	gr.Markdown(description_top)
	with gr.Row(scale=1).style(equal_height=True):
	with gr.Column(scale=5):
	with gr.Row(scale=1):
	chatbotGr = gr.Chatbot(elem_id="LI_chatbot").style(height="100%")
	with gr.Row(scale=1):
	with gr.Column(scale=12):
	user_input = gr.Textbox(
	show_label=False, placeholder="Gib deinen Text / Frage ein."
	).style(container=False)
	with gr.Column(min_width=100, scale=1):
	submitBtn = gr.Button("Absenden")
	with gr.Column(min_width=100, scale=1):
	cancelBtn = gr.Button("Stoppen")
	with gr.Row(scale=1):
	emptyBtn = gr.Button(
	"🧹 Neuer Chat",
	)
	with gr.Column():
	with gr.Column(min_width=50, scale=1):
	with gr.Tab(label="Nur zum Testen:"):
	gr.Markdown("# Parameter")
	top_p = gr.Slider(
	minimum=-0,
	maximum=1.0,
	value=0.95,
	step=0.05,
	interactive=True,
	label="Top-p",
	)
	temperature = gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=1,
	step=0.1,
	interactive=True,
	label="Temperature",
	)
	max_length_tokens = gr.Slider(
	minimum=0,
	maximum=512,
	value=512,
	step=8,
	interactive=True,
	label="Max Generation Tokens",
	)
	max_context_length_tokens = gr.Slider(
	minimum=0,
	maximum=4096,
	value=2048,
	step=128,
	interactive=True,
	label="Max History Tokens",
	)
	gr.Markdown(description)

	with gr.TabItem("Übersetzungen"):
	with gr.Row():
	gr.Textbox(
	show_label=False, placeholder="Ist noch in Arbeit..."
	).style(container=False)
	with gr.TabItem("Code-Generierungen"):
	with gr.Row():
	gr.Textbox(
	show_label=False, placeholder="Ist noch in Arbeit..."
	).style(container=False)

	predict_args = dict(
	fn=predict,
	inputs=[
	user_question,
	chatbotGr,
	history,
	top_p,
	temperature,
	max_length_tokens,
	max_context_length_tokens,
	],
	outputs=[chatbotGr, history, status_display],
	show_progress=True,
	)

	#neuer Chat
	reset_args = dict(
	#fn=reset_chat, inputs=[], outputs=[user_input, status_display]
	fn=reset_textbox, inputs=[], outputs=[user_input, status_display]
	)

	# Chatbot
	transfer_input_args = dict(
	fn=transfer_input, inputs=[user_input], outputs=[user_question, user_input, submitBtn], show_progress=True
	)

	#Listener auf Start-Click auf Button oder Return
	predict_event1 = user_input.submit(transfer_input_args).then(predict_args)
	predict_event2 = submitBtn.click(transfer_input_args).then(predict_args)

	#Listener, Wenn reset...
	emptyBtn.click(
	reset_state,
	outputs=[chatbotGr, history, status_display],
	show_progress=True,
	)
	emptyBtn.click(**reset_args)

	demo.title = "LI Chat"
	#demo.queue(concurrency_count=1).launch(share=True)
	demo.queue(concurrency_count=1).launch(debug=True)