File size: 3,843 Bytes
df3b007
 
ebb2014
 
df3b007
5ff1a7c
df3b007
11c4f8e
df3b007
 
0fd5020
df3b007
 
b32681b
6755c50
 
 
5ff1a7c
1c3f925
 
acf2478
1c3f925
 
 
5ff1a7c
0eefaf8
6755c50
 
df3b007
f631ef8
 
 
 
df3b007
 
6755c50
 
 
d264d99
20106f5
df3b007
 
92eafc3
 
 
 
 
 
 
809daf0
80f1d19
b74a958
 
 
9aba162
b74a958
404d045
b74a958
 
 
 
 
df3b007
 
 
 
 
 
 
404d045
df3b007
f631ef8
 
 
 
b32681b
 
 
 
 
 
df3b007
b74a958
 
 
 
f631ef8
ebb2014
3504435
 
0eefaf8
 
b74a958
0eefaf8
3504435
630326f
0eefaf8
f631ef8
2a22f13
8e4d744
809daf0
 
0eefaf8
 
809daf0
2a22f13
 
 
 
 
 
 
 
 
 
 
8e4d744
630326f
87b3c09
 
 
 
 
0eefaf8
2bfa3bf
 
12ea991
 
5ff1a7c
92eafc3
 
 
 
8e4d744
ebb2014
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from __future__ import print_function, division, unicode_literals

import gradio as gr

import sys
import os
from os.path import abspath, dirname

import json
import numpy as np

from torchmoji.sentence_tokenizer import SentenceTokenizer
from torchmoji.model_def import torchmoji_emojis
from emoji import emojize

from huggingface_hub import hf_hub_download

HF_TOKEN = os.getenv('HF_TOKEN')
hf_writer = gr.HuggingFaceDatasetSaver(
    HF_TOKEN,
    "crowdsourced-deepmoji-flags",
    private=True,
    separate_dirs=False
)

model_name = "Uberduck/torchmoji"
model_path = hf_hub_download(repo_id=model_name, filename="pytorch_model.bin")
vocab_path = hf_hub_download(repo_id=model_name, filename="vocabulary.json")

emoji_codes = []
with open('./data/emoji_codes.json', 'r') as f:
    emoji_codes = json.load(f)

maxlen = 30

with open(vocab_path, 'r') as f:
    vocabulary = json.load(f)

st = SentenceTokenizer(vocabulary, maxlen)

model = torchmoji_emojis(model_path)

def pre_hf_writer(*args):
    return hf_writer(args)

def top_elements(array, k):
    ind = np.argpartition(array, -k)[-k:]
    return ind[np.argsort(array[ind])][::-1]

def predict(deepmoji_analysis, emoji_count):
    if deepmoji_analysis.strip() == '':
        # dotted face emoji
        return {"πŸ«₯":1}

    return_label = {}
    # tokenize input text
    tokenized, _, _ = st.tokenize_sentences([deepmoji_analysis])

    if len(tokenized) == 0:
        # dotted face emoji
        return {"πŸ«₯":1}

    prob = model(tokenized)

    for prob in [prob]:
        # Find top emojis for each sentence. Emoji ids (0-63)
        # correspond to the mapping in emoji_overview.png
        # at the root of the torchMoji repo.
        scores = []
        for i, t in enumerate([deepmoji_analysis]):
            t_prob = prob[i]
            # sort top
            ind_top_ids = top_elements(t_prob, emoji_count)

            for ind in ind_top_ids:
                # unicode emoji + :alias:
                label_emoji = emojize(emoji_codes[str(ind)], language="alias")
                label_name = label_emoji + emoji_codes[str(ind)]
                # propability
                label_prob = t_prob[ind]
                return_label[label_name] = label_prob

    if len(return_label) == 0:
        # dotted face emoji
        return {"πŸ«₯":1}

    return return_label

default_input = "This is the shit!"

input_textbox = gr.Textbox(
    label="English Text",
    info="ignores: emojis, emoticons, numbers, URLs",
    lines=1,
    value=default_input,
    autofocus=True
)
slider = gr.Slider(1, 64, value=5, step=1, label="Top # Emoji", info="Choose between 1 and 64 top emojis to show")

gradio_app = gr.Interface(
    predict,
    [
        input_textbox,
        slider,
    ],
    outputs=gr.Label(
        label="Suitable Emoji",
        # could not auto select example output
        value={
            "🎧:headphones:" :0.10912112891674042,
            "🎢:notes:" :0.10073345899581909,
            "πŸ‘Œ:ok_hand:" :0.05672002583742142,
            "πŸ‘:clap:" :0.0559493824839592,
            "πŸ‘:thumbsup:" :0.05157269537448883
        }
    ),
    examples=[
        ["This is shit!", 5],
        ["You love hurting me, huh?", 5],
        ["I know good movies, this ain't one", 5],
        ["It was fun, but I'm not going to miss you", 5],
        ["My flight is delayed.. amazing.", 5],
        ["What is happening to me??", 5],
    ],
    cache_examples=True,
    live=True,
    title="🎭 DeepMoji 🎭",
    allow_duplication=True,
    # flagged saved to hf dataset
    # FIXME: gradio sends output as a saveable filename, crashing flagging
    # allow_flagging="manual",
    # flagging_options=["'🚩 sarcasm / innuendo 😏'", "'🚩 unsuitable / other'"],
    # flagging_callback=hf_writer
)

if __name__ == "__main__":
    gradio_app.launch()