Spaces:
Running
Running
from flask import Flask, request, jsonify | |
from flask_caching import Cache | |
import time | |
import asyncio | |
from hypercorn.asyncio import serve | |
from hypercorn.config import Config | |
import os | |
os.environ['CURL_CA_BUNDLE'] = '' | |
#from googletranslate import translate | |
import json | |
import random | |
import re | |
import numpy as np | |
import emoji, json | |
from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH | |
from torchmoji.sentence_tokenizer import SentenceTokenizer | |
from torchmoji.model_def import torchmoji_emojis | |
import torch | |
# Emoji map in emoji_overview.png | |
EMOJIS = ":joy: :unamused: :weary: :sob: :heart_eyes: \ | |
:pensive: :ok_hand: :blush: :heart: :smirk: \ | |
:grin: :notes: :flushed: :100: :sleeping: \ | |
:relieved: :relaxed: :raised_hands: :two_hearts: :expressionless: \ | |
:sweat_smile: :pray: :confused: :kissing_heart: :heartbeat: \ | |
:neutral_face: :information_desk_person: :disappointed: :see_no_evil: :tired_face: \ | |
:v: :sunglasses: :rage: :thumbsup: :cry: \ | |
:sleepy: :yum: :triumph: :hand: :mask: \ | |
:clap: :eyes: :gun: :persevere: :smiling_imp: \ | |
:sweat: :broken_heart: :yellow_heart: :musical_note: :speak_no_evil: \ | |
:wink: :skull: :confounded: :smile: :stuck_out_tongue_winking_eye: \ | |
:angry: :no_good: :muscle: :facepunch: :purple_heart: \ | |
:sparkling_heart: :blue_heart: :grimacing: :sparkles:".split(' ') | |
def top_elements(array, k): | |
ind = np.argpartition(array, -k)[-k:] | |
return ind[np.argsort(array[ind])][::-1] | |
with open("vocabulary.json", 'r') as f: | |
vocabulary = json.load(f) | |
st = SentenceTokenizer(vocabulary, 100) | |
emojimodel = torchmoji_emojis("pytorch_model.bin") | |
def deepmojify(sentence, top_n=5, prob_only=False): | |
list_emojis = [] | |
def top_elements(array, k): | |
ind = np.argpartition(array, -k)[-k:] | |
return ind[np.argsort(array[ind])][::-1] | |
tokenized, _, _ = st.tokenize_sentences([sentence]) | |
tokenized = np.array(tokenized).astype(int) # convert to float first | |
if USE_GPU: | |
tokenized = torch.tensor(tokenized).cuda() # then convert to PyTorch tensor | |
prob = emojimodel.forward(tokenized)[0] | |
if not USE_GPU: | |
prob = torch.tensor(prob) | |
if prob_only: | |
return prob | |
emoji_ids = top_elements(prob.cpu().numpy(), top_n) | |
emojis = map(lambda x: EMOJIS[x], emoji_ids) | |
list_emojis.append(emoji.emojize(f"{' '.join(emojis)}", language='alias')) | |
# returning the emojis as a list named as list_emojis | |
return list_emojis, prob | |
app = Flask(__name__) | |
cache = Cache(app, config={'CACHE_TYPE': 'simple', 'CACHE_DEFAULT_TIMEOUT': 60}) | |
def home(): | |
return "HI! Use /translate POST" | |
# Load the JSON data into memory | |
def load_json_data(file_path): | |
with open(file_path, 'r') as file: | |
data = json.load(file) | |
return data | |
# Assuming your JSON structure is a list of dictionaries | |
json_data = load_json_data('englishspanishpairs.json') | |
def random_spanish_pair1(): | |
# Select a random English-Spanish pair | |
random_pair = random.choice(json_data) | |
return jsonify(random_pair) | |
def is_word(s): | |
""" | |
Check if the string 's' is a word (contains only alphabetic characters). | |
""" | |
return s.isalpha() | |
# Lists to store English and Spanish words separately | |
english_words = set() | |
spanish_words = set() | |
# Populate the word lists | |
for pair in json_data: | |
if "english" in pair: | |
# Extract words from the English sentence and filter out numbers | |
english_words.update(filter(is_word, re.findall(r'\b\w+\b', pair.get("english", "")))) | |
if "spanish" in pair: | |
# Extract words from the Spanish sentence and filter out numbers | |
spanish_words.update(filter(is_word, re.findall(r'\b\w+\b', pair.get("spanish", "")))) | |
def get_distractors(target_word, all_words, num_distractors=3): | |
""" | |
Get distractor words from the same language. | |
""" | |
distractors = set() | |
while len(distractors) < num_distractors: | |
distractor = random.choice(list(all_words)) | |
if distractor.lower() != target_word.lower(): | |
distractors.add(distractor) | |
return list(distractors) | |
def random_spanish_pair2(): | |
# Select a random English-Spanish pair | |
random_pair = random.choice(json_data) | |
# Choose either English or Spanish for the fill-in-the-blank game | |
if random.choice([True, False]): | |
sentence = random_pair.get('english', "") | |
language = 'english' | |
word_set = english_words | |
else: | |
sentence = random_pair.get('spanish', "") | |
language = 'spanish' | |
word_set = spanish_words | |
# Split the sentence into words and filter out non-words | |
words = filter(is_word, re.findall(r'\b\w+\b', sentence)) | |
# Choose a random word to replace with blank | |
blank_word = random.choice(list(words)) | |
sentence_with_blank = sentence.replace(blank_word, "_____") | |
# Get distractors from the same language | |
distractors = get_distractors(blank_word, word_set) | |
# Combine correct word with distractors and shuffle | |
options = [blank_word] + distractors | |
random.shuffle(options) | |
# Return the sentence with a blank, options, and the correct word | |
return jsonify({ | |
'sentence': sentence_with_blank, | |
'options': options, | |
'correctWord': blank_word, | |
'language': language | |
}) | |
""" | |
@app.route('/translate', methods=['POST']) | |
def dotranslate(): | |
data = request.get_json() | |
txt = data.get('txt') | |
src = data.get('src', 'en') | |
dest = data.get('dest', 'es') | |
if txt: | |
cache_key = f"{txt}_{src}_{dest}" | |
translation = cache.get(cache_key) | |
if translation is None: | |
translation = translate(txt, dest=dest, src=src) | |
cache.set(cache_key, translation) | |
return jsonify({'translation': translation}), 200 | |
else: | |
return jsonify({'error': 'No text provided'}), 400 | |
""" | |
from transformers import M2M100ForConditionalGeneration | |
from tokenization_small100 import SMALL100Tokenizer | |
model_name = "alirezamsh/small100" | |
model = M2M100ForConditionalGeneration.from_pretrained(model_name) | |
tokenizer = SMALL100Tokenizer.from_pretrained(model_name) | |
def dotranslate(): | |
data = request.get_json() | |
txt = data.get('txt') | |
src = data.get('src', 'en') | |
dest = data.get('dest', 'es') | |
if txt: | |
cache_key = f"{txt}_{src}_{dest}" | |
translation = cache.get(cache_key) | |
if translation is None: | |
# Set the source and target languages | |
tokenizer.src_lang = src | |
tokenizer.tgt_lang = dest | |
# Tokenize the input text | |
encoded = tokenizer(txt, return_tensors="pt") | |
with torch.no_grad(): | |
# Generate translation | |
generated_tokens = model.generate( | |
**encoded, | |
forced_bos_token_id=tokenizer.get_lang_id(dest) | |
) | |
# Decode the generated tokens | |
translation = tokenizer.decode(generated_tokens[0], skip_special_tokens=True) | |
# Cache the translation | |
cache.set(cache_key, translation) | |
return jsonify({'translation': translation}), 200 | |
else: | |
return jsonify({'error': 'No text provided'}), 400 | |
if __name__ == "__main__": | |
config = Config() | |
config.bind = ["0.0.0.0:7860"] # You can specify the host and port here | |
asyncio.run(serve(app, config)) |