beanbox-apis / app.py
johnpaulbin's picture
First model version
3affa92
raw
history blame
3.87 kB
from flask import Flask, request, jsonify
import asyncio
from hypercorn.asyncio import serve
from hypercorn.config import Config
from setfit import SetFitModel
import torch.nn.functional as F
from torch import nn
app = Flask(__name__)
from sentence_transformers import SentenceTransformer
sentencemodel = SentenceTransformer('johnpaulbin/toxic-gte-small-3')
USE_GPU = False
""" Use torchMoji to predict emojis from a single text input
"""
import numpy as np
import emoji, json
from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH
from torchmoji.sentence_tokenizer import SentenceTokenizer
from torchmoji.model_def import torchmoji_emojis
import torch
# Emoji map in emoji_overview.png
EMOJIS = ":joy: :unamused: :weary: :sob: :heart_eyes: \
:pensive: :ok_hand: :blush: :heart: :smirk: \
:grin: :notes: :flushed: :100: :sleeping: \
:relieved: :relaxed: :raised_hands: :two_hearts: :expressionless: \
:sweat_smile: :pray: :confused: :kissing_heart: :heartbeat: \
:neutral_face: :information_desk_person: :disappointed: :see_no_evil: :tired_face: \
:v: :sunglasses: :rage: :thumbsup: :cry: \
:sleepy: :yum: :triumph: :hand: :mask: \
:clap: :eyes: :gun: :persevere: :smiling_imp: \
:sweat: :broken_heart: :yellow_heart: :musical_note: :speak_no_evil: \
:wink: :skull: :confounded: :smile: :stuck_out_tongue_winking_eye: \
:angry: :no_good: :muscle: :facepunch: :purple_heart: \
:sparkling_heart: :blue_heart: :grimacing: :sparkles:".split(' ')
def top_elements(array, k):
ind = np.argpartition(array, -k)[-k:]
return ind[np.argsort(array[ind])][::-1]
with open("vocabulary.json", 'r') as f:
vocabulary = json.load(f)
st = SentenceTokenizer(vocabulary, 100)
emojimodel = torchmoji_emojis("pytorch_model.bin")
if USE_GPU:
emojimodel.to("cuda:0")
def deepmojify(sentence, top_n=5, prob_only=False):
list_emojis = []
def top_elements(array, k):
ind = np.argpartition(array, -k)[-k:]
return ind[np.argsort(array[ind])][::-1]
tokenized, _, _ = st.tokenize_sentences([sentence])
tokenized = np.array(tokenized).astype(int) # convert to float first
if USE_GPU:
tokenized = torch.tensor(tokenized).cuda() # then convert to PyTorch tensor
prob = emojimodel.forward(tokenized)[0]
if prob_only:
return prob
emoji_ids = top_elements(prob.cpu().numpy(), top_n)
emojis = map(lambda x: EMOJIS[x], emoji_ids)
list_emojis.append(emoji.emojize(f"{' '.join(emojis)}", language='alias'))
# returning the emojis as a list named as list_emojis
return list_emojis, prob
model = nn.Sequential(
nn.Linear(448, 300), # Increase the number of neurons
nn.ReLU(),
nn.BatchNorm1d(300), # Batch normalization
nn.Linear(300, 300), # Increase the number of neurons
nn.ReLU(),
nn.BatchNorm1d(300), # Batch normalization
nn.Linear(300, 200), # Increase the number of neurons
nn.ReLU(),
nn.BatchNorm1d(200), # Batch normalization
nn.Linear(200, 125), # Increase the number of neurons
nn.ReLU(),
nn.BatchNorm1d(125), # Batch normalization
nn.Linear(125, 2),
nn.Dropout(0.05) # Dropout
)
model.eval()
torch.save(model.state_dict(), 'large.pth')
@app.route('/infer', methods=['POST'])
def translate():
data = request.get_json()
TEXT = data['text'].lower()
probs = deepmojify(TEXT, prob_only=True)
embedding = sentencemodel.encode(TEXT, convert_to_tensor=True)
INPUT = torch.cat((probs, embedding))
output = F.softmax(model(INPUT.view(1, -1)), dim=1)
if output[0][0] > output[0][1]:
output = "false"
else:
output = "true"
return output
# Define more routes for other operations like download_model, etc.
if __name__ == "__main__":
config = Config()
config.bind = ["0.0.0.0:7860"] # You can specify the host and port here
asyncio.run(serve(app, config))