File size: 3,679 Bytes
58627d4
38cf82b
 
 
 
 
58627d4
38cf82b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
955a6c8
38cf82b
58627d4
 
 
38cf82b
 
 
 
 
 
ec3ec5c
38cf82b
58627d4
38cf82b
58627d4
 
17692e5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import gradio as gr
import asyncio
import torch.nn.functional as F
from torch import nn
import os
os.environ['CURL_CA_BUNDLE'] = ''


from sentence_transformers import SentenceTransformer
sentencemodel = SentenceTransformer('johnpaulbin/toxic-gte-small-3')

USE_GPU = False


""" Use torchMoji to predict emojis from a single text input
"""

import numpy as np
import emoji, json
from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH
from torchmoji.sentence_tokenizer import SentenceTokenizer
from torchmoji.model_def import torchmoji_emojis
import torch

# Emoji map in emoji_overview.png
EMOJIS = ":joy: :unamused: :weary: :sob: :heart_eyes: \
:pensive: :ok_hand: :blush: :heart: :smirk: \
:grin: :notes: :flushed: :100: :sleeping: \
:relieved: :relaxed: :raised_hands: :two_hearts: :expressionless: \
:sweat_smile: :pray: :confused: :kissing_heart: :heartbeat: \
:neutral_face: :information_desk_person: :disappointed: :see_no_evil: :tired_face: \
:v: :sunglasses: :rage: :thumbsup: :cry: \
:sleepy: :yum: :triumph: :hand: :mask: \
:clap: :eyes: :gun: :persevere: :smiling_imp: \
:sweat: :broken_heart: :yellow_heart: :musical_note: :speak_no_evil: \
:wink: :skull: :confounded: :smile: :stuck_out_tongue_winking_eye: \
:angry: :no_good: :muscle: :facepunch: :purple_heart: \
:sparkling_heart: :blue_heart: :grimacing: :sparkles:".split(' ')

def top_elements(array, k):
    ind = np.argpartition(array, -k)[-k:]
    return ind[np.argsort(array[ind])][::-1]


with open("vocabulary.json", 'r') as f:
    vocabulary = json.load(f)

st = SentenceTokenizer(vocabulary, 100)

emojimodel = torchmoji_emojis("pytorch_model.bin")

if USE_GPU:
  emojimodel.to("cuda:0")

def deepmojify(sentence, top_n=5, prob_only=False):
    list_emojis = []
    def top_elements(array, k):
        ind = np.argpartition(array, -k)[-k:]
        return ind[np.argsort(array[ind])][::-1]

    tokenized, _, _ = st.tokenize_sentences([sentence])
    tokenized = np.array(tokenized).astype(int)  # convert to float first
    if USE_GPU:
        tokenized = torch.tensor(tokenized).cuda()  # then convert to PyTorch tensor

    prob = emojimodel.forward(tokenized)[0]
    if not USE_GPU:
        prob = torch.tensor(prob)
    if prob_only:
        return prob
    emoji_ids = top_elements(prob.cpu().numpy(), top_n)
    emojis = map(lambda x: EMOJIS[x], emoji_ids)
    list_emojis.append(emoji.emojize(f"{' '.join(emojis)}", language='alias'))
    # returning the emojis as a list named as list_emojis
    return list_emojis, prob


model = nn.Sequential(
    nn.Linear(448, 300),  # Increase the number of neurons
    nn.ReLU(),
    nn.BatchNorm1d(300),  # Batch normalization

    nn.Linear(300, 300),  # Increase the number of neurons
    nn.ReLU(),
    nn.BatchNorm1d(300),  # Batch normalization

    nn.Linear(300, 200),  # Increase the number of neurons
    nn.ReLU(),
    nn.BatchNorm1d(200),  # Batch normalization

    nn.Linear(200, 125),  # Increase the number of neurons
    nn.ReLU(),
    nn.BatchNorm1d(125),  # Batch normalization

    nn.Linear(125, 2),
    nn.Dropout(0.05)  # Dropout
)

model.load_state_dict(torch.load("large.pth", map_location=torch.device('cpu')))
model.eval()


def inf(inpt):

    TEXT = inpt.lower()
    probs = deepmojify(TEXT, prob_only=True)
    embedding = sentencemodel.encode(TEXT, convert_to_tensor=True)
    INPUT = torch.cat((probs, embedding))
    output = F.softmax(model(INPUT.view(1, -1)), dim=1)
    if not output[0][1] > 0.62:
        return "Not toxic " + str(output[0][0])
    else:
        return "Toxic! " + str(output[0][1])

iface = gr.Interface(fn=inf, inputs="text", outputs="text")
iface.queue(concurrency_count=500).launch()