File size: 4,148 Bytes
2e74f63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ab75c1
2e74f63
 
 
 
 
 
 
 
a2e12c6
4c87f5c
 
a2e12c6
2e74f63
 
 
 
4c87f5c
20f4682
3ab75c1
20e3a37
82d43cd
ab8e6c2
2e74f63
a2e12c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e74f63
 
3ab75c1
 
2e74f63
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import os
import gradio as gr
from text_generation import Client

HF_TOKEN = os.getenv("HF_TOKEN")
INFERENCE_ENDPOINT = os.getenv("INFERENCE_ENDPOINT")


USER_NAME = "User"
BOT_NAME = "Falcon"
DEFAULT_INSTRUCTIONS = f"""The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, and a human user, called User. In the following interactions, User and Falcon will converse in natural language, and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. Falcon was built by the Technology Innovation Institute in Abu Dhabi. Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins.
"""
RETRY_COMMAND = "/retry"
STOP_STR = f"\n{USER_NAME}:"
STOP_SUSPECT_LIST = [":", "\n", "User"]

client = None
if INFERENCE_ENDPOINT:
    client = Client(INFERENCE_ENDPOINT, headers={"Authorization": f"Bearer {HF_TOKEN}"})



def format_chat_prompt(message: str, chat_history, instructions: str) -> str:
    instructions = instructions.strip(" ").strip("\n")
    prompt = instructions
    for turn in chat_history:
        user_message, bot_message = turn
        prompt = f"{prompt}\n{USER_NAME}: {user_message}\n{BOT_NAME}: {bot_message}"
    prompt = f"{prompt}\n{USER_NAME}: {message}\n{BOT_NAME}:"
    return prompt



def run_chat(message: str, chat_history):
    prompt = format_chat_prompt(message, chat_history, DEFAULT_INSTRUCTIONS)
    chat_history = chat_history + [[message, ""]]
    response = client.generate(
        prompt,
        do_sample=True,
        max_new_tokens=1024,
        stop_sequences=[STOP_STR, "<|endoftext|>"],
        temperature=0.8,
        top_p=0.9,
    ).generated_text.replace("\nUser:", "")
    chat_history[-1][1] = response
    return response, chat_history


with gr.Blocks() as demo:
    gr.Markdown("""
    # Falcon-7b-instruct Discord Bot Powered by Gradio and Hugging Face Endpoints
    
    Make sure you read the 'Inference Endpoints' section below first! 🦅
                
    ### First install the `gradio_client`
        
    ```bash
    pip install gradio_client
    ```
    
    ### Then deploy to discord in one line! ⚡️
    
    ```python
    secrets = {"HF_TOKEN": "<your-key-here>", "INFERENCE_ENDPOINT": "<endpoint-url>"}
    client = grc.Client.duplicate("gradio-discord-bots/falcon-7b-instruct", private=False, secrets=secrets, sleep_timeout=2880)
    client.deploy_discord(api_names=["chat"])
    """)
    with gr.Accordion(label="Inference Endpoints", open=False):
        gr.Markdown("""
    ## Setting Up Inference Endpoints 💪
    To deploy this space as a discord bot, you will need to deploy your own Falcon model to Hugging Face Endpoints.
    Don't worry it's super easy!
                
    1. Go to the [model page](tiiuae/falcon-7b-instruct) 🦅
    2. Click Deploy > Inference Endpoints
    <img src="https://gradio-builds.s3.amazonaws.com/demo-files/discordbots/inference_endpoints/modelpage.png" alt="drawing" width="800" height=400/>
    3. Select your desired cloud provider and region ☁️
    <img src="https://gradio-builds.s3.amazonaws.com/demo-files/discordbots/inference_endpoints/falcon_instruct.png" alt="drawing" width="800" height=400/>
    4. Optional: Set Automatic Scale to Zero. This will pause your endpoint after 15 minutes of inactivity to prevent unwanted billing. 💰
    <img src="https://gradio-builds.s3.amazonaws.com/demo-files/discordbots/inference_endpoints/autoscale.png" alt="drawing" width="800" height=400/>
    5. Create the endpoint! Copy the endpoint URL after it's complete.
    <img src="https://gradio-builds.s3.amazonaws.com/demo-files/discordbots/inference_endpoints/running_model.png" alt="drawing" width="800" height=400/>
    """
    )

    button = gr.Button(visible=False)
    history = gr.State([])
    message = gr.Textbox(visible=False)
    response = gr.Textbox(visible=False)
    button.click(run_chat, [message, history], [response, history], api_name="chat")


demo.queue(concurrency_count=70).launch()