File size: 4,607 Bytes
46ef3d8
 
590064e
 
 
 
 
 
 
 
 
46ef3d8
fd7914e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f1be44
fd7914e
 
 
 
 
35e3254
fd7914e
 
 
64721de
 
590064e
 
17ca086
590064e
17ca086
590064e
 
17ca086
 
590064e
 
 
 
35e3254
 
 
 
64721de
35e3254
 
 
 
 
64721de
35e3254
64721de
35e3254
 
 
 
 
 
 
64721de
 
 
 
 
 
 
 
 
 
 
 
 
35e3254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64721de
35e3254
64721de
35e3254
 
 
 
 
 
 
64721de
 
 
 
35e3254
64721de
 
 
 
35e3254
 
 
 
 
64721de
35e3254
 
 
 
 
 
 
64721de
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import gradio as gr

from backend import get_message_single, get_message_spam, send_single, send_spam
from defaults import (
    ADDRESS_BETTERTRANSFORMER,
    ADDRESS_VANILLA,
    defaults_bt_single,
    defaults_bt_spam,
    defaults_vanilla_single,
    defaults_vanilla_spam,
)

TTILE_IMAGE = """
<div
    style="
        display: block;
        margin-left: auto;
        margin-right: auto;
        width: 50%;
    "
>
<img src="https://huggingface.co/spaces/fxmarty/bettertransformer-demo/resolve/main/header.webp"/>
</div>
"""

TITLE = """
<div
    style="
        display: inline-flex;
        align-items: center;
        text-align: center;
        max-width: 1400px;
        gap: 0.8rem;
        font-size: 2.2rem;
    "
>
<h1 style="font-weight: 700; margin-bottom: 10px; margin-top: 10px;">
    Speed up your inference and support more workload with PyTorch's BetterTransformer 🤗
</h1>
</div>
"""

with gr.Blocks() as demo:
    gr.HTML(TTILE_IMAGE)
    gr.HTML(TITLE)

    gr.Markdown(
        """
    Let's try out TorchServe + BetterTransformer!

    BetterTransformer is a stable feature made available with [PyTorch 1.13](https://pytorch.org/blog/PyTorch-1.13-release/) allowing to use a fastpath execution for encoder attention blocks.

    As a one-liner, you can convert your 🤗 Transformers models to use BetterTransformer thanks to the [🤗 Optimum](https://huggingface.co/docs/optimum/main/en/index) library:

    ```
    from optimum.bettertransformer import BetterTransformer

    better_model = BetterTransformer.transform(model)
    ```

    This Space is a demo of an **end-to-end** deployement of PyTorch eager-mode models, both with and without BetterTransformer. The goal is to see what are the benefits server-side and client-side of using BetterTransformer.
    
    ## Inference using...
    """
    )

    with gr.Row():
        with gr.Column(scale=50):
            gr.Markdown("### Vanilla Transformers + TorchServe")

            address_input_vanilla = gr.Textbox(
                max_lines=1, label="ip vanilla", value=ADDRESS_VANILLA, visible=False
            )

            input_model_vanilla = gr.Textbox(
                max_lines=1,
                label="Text",
                value="Expectations were low, enjoyment was high",
            )

            btn_single_vanilla = gr.Button("Send single text request")
            output_single_vanilla = gr.Markdown(
                label="Output single vanilla",
                value=get_message_single(**defaults_vanilla_single),
            )

            btn_spam_vanilla = gr.Button(
                "Spam text requests (from sst2 validation set)"
            )
            output_spam_vanilla = gr.Markdown(
                label="Output spam vanilla",
                value=get_message_spam(**defaults_vanilla_spam),
            )

            btn_single_vanilla.click(
                fn=send_single,
                inputs=[input_model_vanilla, address_input_vanilla],
                outputs=output_single_vanilla,
            )
            btn_spam_vanilla.click(
                fn=send_spam,
                inputs=[address_input_vanilla],
                outputs=output_spam_vanilla,
            )

        with gr.Column(scale=50):
            gr.Markdown("### BetterTransformer + TorchServe")

            address_input_bettertransformer = gr.Textbox(
                max_lines=1,
                label="ip bettertransformer",
                value=ADDRESS_BETTERTRANSFORMER,
                visible=False,
            )

            input_model_bettertransformer = gr.Textbox(
                max_lines=1,
                label="Text",
                value="Expectations were low, enjoyment was high",
            )

            btn_single_bt = gr.Button("Send single text request")
            output_single_bt = gr.Markdown(
                label="Output single bt", value=get_message_single(**defaults_bt_single)
            )

            btn_spam_bt = gr.Button("Spam text requests (from sst2 validation set)")
            output_spam_bt = gr.Markdown(
                label="Output spam bt", value=get_message_spam(**defaults_bt_spam)
            )

            btn_single_bt.click(
                fn=send_single,
                inputs=[input_model_bettertransformer, address_input_bettertransformer],
                outputs=output_single_bt,
            )

            btn_spam_bt.click(
                fn=send_spam,
                inputs=[address_input_bettertransformer],
                outputs=output_spam_bt,
            )

demo.queue(concurrency_count=1)
demo.launch()