Spaces:
Sleeping
Sleeping
import json | |
import os | |
import shutil | |
import requests | |
import gradio as gr | |
from huggingface_hub import Repository, InferenceClient | |
HF_TOKEN = os.environ.get("HF_TOKEN", None) | |
API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat" | |
STOP_SEQUENCES = ["\nUser:", "<|endoftext|>", " User:", "###"] | |
client = InferenceClient( | |
API_URL, | |
headers={"Authorization": f"Bearer {HF_TOKEN}"}, | |
) | |
def query(bot_name, system_prompt, user_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0): | |
print(temperature, max_new_tokens, top_p, repetition_penalty) | |
seed = 42 | |
generate_kwargs = dict( | |
temperature=temperature, | |
max_new_tokens=max_new_tokens, | |
top_p=top_p, | |
repetition_penalty=repetition_penalty, | |
stop_sequences=STOP_SEQUENCES, | |
do_sample=True, | |
seed=seed, | |
) | |
print(bot_name) | |
print(system_prompt) | |
print(user_prompt) | |
print('-' * 20) | |
prompt = f"System: {system_prompt}\nUser: {user_prompt}\n{bot_name}: " | |
stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) | |
output = "" | |
for response in stream: | |
output += response.token.text | |
for stop_str in STOP_SEQUENCES: | |
if output.endswith(stop_str): | |
output = output[:-len(stop_str)] | |
output = output.rstrip() | |
#yield output | |
#yield output | |
print(output) | |
print('-' * 20) | |
return output | |
iface = gr.Interface( | |
query, | |
inputs=["text","text","text","text","text","text","text"], | |
outputs="text", | |
) | |
iface.queue() | |
iface.launch() | |