beanbox-apis / app.py
johnpaulbin's picture
Update app.py
c807e55 verified
raw
history blame
7.38 kB
from flask import Flask, request, jsonify
from flask_caching import Cache
import time
import asyncio
from hypercorn.asyncio import serve
from hypercorn.config import Config
import os
os.environ['CURL_CA_BUNDLE'] = ''
#from googletranslate import translate
import json
import random
import re
import numpy as np
import emoji, json
from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH
from torchmoji.sentence_tokenizer import SentenceTokenizer
from torchmoji.model_def import torchmoji_emojis
import torch
# Emoji map in emoji_overview.png
EMOJIS = ":joy: :unamused: :weary: :sob: :heart_eyes: \
:pensive: :ok_hand: :blush: :heart: :smirk: \
:grin: :notes: :flushed: :100: :sleeping: \
:relieved: :relaxed: :raised_hands: :two_hearts: :expressionless: \
:sweat_smile: :pray: :confused: :kissing_heart: :heartbeat: \
:neutral_face: :information_desk_person: :disappointed: :see_no_evil: :tired_face: \
:v: :sunglasses: :rage: :thumbsup: :cry: \
:sleepy: :yum: :triumph: :hand: :mask: \
:clap: :eyes: :gun: :persevere: :smiling_imp: \
:sweat: :broken_heart: :yellow_heart: :musical_note: :speak_no_evil: \
:wink: :skull: :confounded: :smile: :stuck_out_tongue_winking_eye: \
:angry: :no_good: :muscle: :facepunch: :purple_heart: \
:sparkling_heart: :blue_heart: :grimacing: :sparkles:".split(' ')
def top_elements(array, k):
ind = np.argpartition(array, -k)[-k:]
return ind[np.argsort(array[ind])][::-1]
with open("vocabulary.json", 'r') as f:
vocabulary = json.load(f)
st = SentenceTokenizer(vocabulary, 100)
emojimodel = torchmoji_emojis("pytorch_model.bin")
def deepmojify(sentence, top_n=5, prob_only=False):
list_emojis = []
def top_elements(array, k):
ind = np.argpartition(array, -k)[-k:]
return ind[np.argsort(array[ind])][::-1]
tokenized, _, _ = st.tokenize_sentences([sentence])
tokenized = np.array(tokenized).astype(int) # convert to float first
if USE_GPU:
tokenized = torch.tensor(tokenized).cuda() # then convert to PyTorch tensor
prob = emojimodel.forward(tokenized)[0]
if not USE_GPU:
prob = torch.tensor(prob)
if prob_only:
return prob
emoji_ids = top_elements(prob.cpu().numpy(), top_n)
emojis = map(lambda x: EMOJIS[x], emoji_ids)
list_emojis.append(emoji.emojize(f"{' '.join(emojis)}", language='alias'))
# returning the emojis as a list named as list_emojis
return list_emojis, prob
app = Flask(__name__)
cache = Cache(app, config={'CACHE_TYPE': 'simple', 'CACHE_DEFAULT_TIMEOUT': 60})
@app.route('/', methods=['GET'])
def home():
return "HI! Use /translate POST"
# Load the JSON data into memory
def load_json_data(file_path):
with open(file_path, 'r') as file:
data = json.load(file)
return data
# Assuming your JSON structure is a list of dictionaries
json_data = load_json_data('englishspanishpairs.json')
@app.route('/spanish')
def random_spanish_pair1():
# Select a random English-Spanish pair
random_pair = random.choice(json_data)
return jsonify(random_pair)
def is_word(s):
"""
Check if the string 's' is a word (contains only alphabetic characters).
"""
return s.isalpha()
# Lists to store English and Spanish words separately
english_words = set()
spanish_words = set()
# Populate the word lists
for pair in json_data:
if "english" in pair:
# Extract words from the English sentence and filter out numbers
english_words.update(filter(is_word, re.findall(r'\b\w+\b', pair.get("english", ""))))
if "spanish" in pair:
# Extract words from the Spanish sentence and filter out numbers
spanish_words.update(filter(is_word, re.findall(r'\b\w+\b', pair.get("spanish", ""))))
def get_distractors(target_word, all_words, num_distractors=3):
"""
Get distractor words from the same language.
"""
distractors = set()
while len(distractors) < num_distractors:
distractor = random.choice(list(all_words))
if distractor.lower() != target_word.lower():
distractors.add(distractor)
return list(distractors)
@app.route('/fillgame')
def random_spanish_pair2():
# Select a random English-Spanish pair
random_pair = random.choice(json_data)
# Choose either English or Spanish for the fill-in-the-blank game
if random.choice([True, False]):
sentence = random_pair.get('english', "")
language = 'english'
word_set = english_words
else:
sentence = random_pair.get('spanish', "")
language = 'spanish'
word_set = spanish_words
# Split the sentence into words and filter out non-words
words = filter(is_word, re.findall(r'\b\w+\b', sentence))
# Choose a random word to replace with blank
blank_word = random.choice(list(words))
sentence_with_blank = sentence.replace(blank_word, "_____")
# Get distractors from the same language
distractors = get_distractors(blank_word, word_set)
# Combine correct word with distractors and shuffle
options = [blank_word] + distractors
random.shuffle(options)
# Return the sentence with a blank, options, and the correct word
return jsonify({
'sentence': sentence_with_blank,
'options': options,
'correctWord': blank_word,
'language': language
})
"""
@app.route('/translate', methods=['POST'])
def dotranslate():
data = request.get_json()
txt = data.get('txt')
src = data.get('src', 'en')
dest = data.get('dest', 'es')
if txt:
cache_key = f"{txt}_{src}_{dest}"
translation = cache.get(cache_key)
if translation is None:
translation = translate(txt, dest=dest, src=src)
cache.set(cache_key, translation)
return jsonify({'translation': translation}), 200
else:
return jsonify({'error': 'No text provided'}), 400
"""
from transformers import M2M100ForConditionalGeneration
from tokenization_small100 import SMALL100Tokenizer
model_name = "alirezamsh/small100"
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
tokenizer = SMALL100Tokenizer.from_pretrained(model_name)
@app.route('/translate', methods=['POST'])
def dotranslate():
data = request.get_json()
txt = data.get('txt')
src = data.get('src', 'en')
dest = data.get('dest', 'es')
if txt:
cache_key = f"{txt}_{src}_{dest}"
translation = cache.get(cache_key)
if translation is None:
# Set the source and target languages
tokenizer.src_lang = src
tokenizer.tgt_lang = dest
# Tokenize the input text
encoded = tokenizer(txt, return_tensors="pt")
# Generate translation
generated_tokens = model.generate(
**encoded,
forced_bos_token_id=tokenizer.get_lang_id(dest)
)
# Decode the generated tokens
translation = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
# Cache the translation
cache.set(cache_key, translation)
return jsonify({'translation': translation}), 200
else:
return jsonify({'error': 'No text provided'}), 400
if __name__ == "__main__":
config = Config()
config.bind = ["0.0.0.0:7860"] # You can specify the host and port here
asyncio.run(serve(app, config))