Spaces:
Running
Running
File size: 3,843 Bytes
df3b007 ebb2014 df3b007 5ff1a7c df3b007 11c4f8e df3b007 0fd5020 df3b007 b32681b 6755c50 5ff1a7c 1c3f925 acf2478 1c3f925 5ff1a7c 0eefaf8 6755c50 df3b007 f631ef8 df3b007 6755c50 d264d99 20106f5 df3b007 92eafc3 809daf0 80f1d19 b74a958 9aba162 b74a958 404d045 b74a958 df3b007 404d045 df3b007 f631ef8 b32681b df3b007 b74a958 f631ef8 ebb2014 3504435 0eefaf8 b74a958 0eefaf8 3504435 630326f 0eefaf8 f631ef8 2a22f13 8e4d744 809daf0 0eefaf8 809daf0 2a22f13 8e4d744 630326f 87b3c09 0eefaf8 2bfa3bf 12ea991 5ff1a7c 92eafc3 8e4d744 ebb2014 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
from __future__ import print_function, division, unicode_literals
import gradio as gr
import sys
import os
from os.path import abspath, dirname
import json
import numpy as np
from torchmoji.sentence_tokenizer import SentenceTokenizer
from torchmoji.model_def import torchmoji_emojis
from emoji import emojize
from huggingface_hub import hf_hub_download
HF_TOKEN = os.getenv('HF_TOKEN')
hf_writer = gr.HuggingFaceDatasetSaver(
HF_TOKEN,
"crowdsourced-deepmoji-flags",
private=True,
separate_dirs=False
)
model_name = "Uberduck/torchmoji"
model_path = hf_hub_download(repo_id=model_name, filename="pytorch_model.bin")
vocab_path = hf_hub_download(repo_id=model_name, filename="vocabulary.json")
emoji_codes = []
with open('./data/emoji_codes.json', 'r') as f:
emoji_codes = json.load(f)
maxlen = 30
with open(vocab_path, 'r') as f:
vocabulary = json.load(f)
st = SentenceTokenizer(vocabulary, maxlen)
model = torchmoji_emojis(model_path)
def pre_hf_writer(*args):
return hf_writer(args)
def top_elements(array, k):
ind = np.argpartition(array, -k)[-k:]
return ind[np.argsort(array[ind])][::-1]
def predict(deepmoji_analysis, emoji_count):
if deepmoji_analysis.strip() == '':
# dotted face emoji
return {"π«₯":1}
return_label = {}
# tokenize input text
tokenized, _, _ = st.tokenize_sentences([deepmoji_analysis])
if len(tokenized) == 0:
# dotted face emoji
return {"π«₯":1}
prob = model(tokenized)
for prob in [prob]:
# Find top emojis for each sentence. Emoji ids (0-63)
# correspond to the mapping in emoji_overview.png
# at the root of the torchMoji repo.
scores = []
for i, t in enumerate([deepmoji_analysis]):
t_prob = prob[i]
# sort top
ind_top_ids = top_elements(t_prob, emoji_count)
for ind in ind_top_ids:
# unicode emoji + :alias:
label_emoji = emojize(emoji_codes[str(ind)], language="alias")
label_name = label_emoji + emoji_codes[str(ind)]
# propability
label_prob = t_prob[ind]
return_label[label_name] = label_prob
if len(return_label) == 0:
# dotted face emoji
return {"π«₯":1}
return return_label
default_input = "This is the shit!"
input_textbox = gr.Textbox(
label="English Text",
info="ignores: emojis, emoticons, numbers, URLs",
lines=1,
value=default_input,
autofocus=True
)
slider = gr.Slider(1, 64, value=5, step=1, label="Top # Emoji", info="Choose between 1 and 64 top emojis to show")
gradio_app = gr.Interface(
predict,
[
input_textbox,
slider,
],
outputs=gr.Label(
label="Suitable Emoji",
# could not auto select example output
value={
"π§:headphones:" :0.10912112891674042,
"πΆ:notes:" :0.10073345899581909,
"π:ok_hand:" :0.05672002583742142,
"π:clap:" :0.0559493824839592,
"π:thumbsup:" :0.05157269537448883
}
),
examples=[
["This is shit!", 5],
["You love hurting me, huh?", 5],
["I know good movies, this ain't one", 5],
["It was fun, but I'm not going to miss you", 5],
["My flight is delayed.. amazing.", 5],
["What is happening to me??", 5],
],
cache_examples=True,
live=True,
title="π DeepMoji π",
allow_duplication=True,
# flagged saved to hf dataset
# FIXME: gradio sends output as a saveable filename, crashing flagging
# allow_flagging="manual",
# flagging_options=["'π© sarcasm / innuendo π'", "'π© unsuitable / other'"],
# flagging_callback=hf_writer
)
if __name__ == "__main__":
gradio_app.launch() |