Spaces:
Running
Running
File size: 7,434 Bytes
939c80c 30a52ec 939c80c 6bc2af2 c807e55 5abd3c7 f40ebd6 3affa92 59da306 3affa92 5abd3c7 30a52ec 3affa92 5abd3c7 939c80c 5abd3c7 3affa92 5abd3c7 939c80c 5abd3c7 2c6b6c7 5abd3c7 939c80c 73f20e9 2abb4ba 2ac09a0 73f20e9 fff5a8f 2ac09a0 73f20e9 fff5a8f 2ac09a0 836cf05 2ac09a0 8ef2116 836cf05 2ac09a0 836cf05 2c6b6c7 836cf05 2ac09a0 836cf05 2ac09a0 836cf05 2ac09a0 836cf05 2ac09a0 836cf05 8ef2116 836cf05 8ef2116 836cf05 8ef2116 2ac09a0 836cf05 2ac09a0 836cf05 c807e55 5abd3c7 3e07850 5abd3c7 3e07850 5abd3c7 30a52ec 5abd3c7 3e07850 5abd3c7 c807e55 9d42148 c807e55 3e07850 939c80c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 |
from flask import Flask, request, jsonify
from flask_caching import Cache
import time
import asyncio
from hypercorn.asyncio import serve
from hypercorn.config import Config
import os
os.environ['CURL_CA_BUNDLE'] = ''
#from googletranslate import translate
import json
import random
import re
import numpy as np
import emoji, json
from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH
from torchmoji.sentence_tokenizer import SentenceTokenizer
from torchmoji.model_def import torchmoji_emojis
import torch
# Emoji map in emoji_overview.png
EMOJIS = ":joy: :unamused: :weary: :sob: :heart_eyes: \
:pensive: :ok_hand: :blush: :heart: :smirk: \
:grin: :notes: :flushed: :100: :sleeping: \
:relieved: :relaxed: :raised_hands: :two_hearts: :expressionless: \
:sweat_smile: :pray: :confused: :kissing_heart: :heartbeat: \
:neutral_face: :information_desk_person: :disappointed: :see_no_evil: :tired_face: \
:v: :sunglasses: :rage: :thumbsup: :cry: \
:sleepy: :yum: :triumph: :hand: :mask: \
:clap: :eyes: :gun: :persevere: :smiling_imp: \
:sweat: :broken_heart: :yellow_heart: :musical_note: :speak_no_evil: \
:wink: :skull: :confounded: :smile: :stuck_out_tongue_winking_eye: \
:angry: :no_good: :muscle: :facepunch: :purple_heart: \
:sparkling_heart: :blue_heart: :grimacing: :sparkles:".split(' ')
def top_elements(array, k):
ind = np.argpartition(array, -k)[-k:]
return ind[np.argsort(array[ind])][::-1]
with open("vocabulary.json", 'r') as f:
vocabulary = json.load(f)
st = SentenceTokenizer(vocabulary, 100)
emojimodel = torchmoji_emojis("pytorch_model.bin")
def deepmojify(sentence, top_n=5, prob_only=False):
list_emojis = []
def top_elements(array, k):
ind = np.argpartition(array, -k)[-k:]
return ind[np.argsort(array[ind])][::-1]
tokenized, _, _ = st.tokenize_sentences([sentence])
tokenized = np.array(tokenized).astype(int) # convert to float first
if USE_GPU:
tokenized = torch.tensor(tokenized).cuda() # then convert to PyTorch tensor
prob = emojimodel.forward(tokenized)[0]
if not USE_GPU:
prob = torch.tensor(prob)
if prob_only:
return prob
emoji_ids = top_elements(prob.cpu().numpy(), top_n)
emojis = map(lambda x: EMOJIS[x], emoji_ids)
list_emojis.append(emoji.emojize(f"{' '.join(emojis)}", language='alias'))
# returning the emojis as a list named as list_emojis
return list_emojis, prob
app = Flask(__name__)
cache = Cache(app, config={'CACHE_TYPE': 'simple', 'CACHE_DEFAULT_TIMEOUT': 60})
@app.route('/', methods=['GET'])
def home():
return "HI! Use /translate POST"
# Load the JSON data into memory
def load_json_data(file_path):
with open(file_path, 'r') as file:
data = json.load(file)
return data
# Assuming your JSON structure is a list of dictionaries
json_data = load_json_data('englishspanishpairs.json')
@app.route('/spanish')
def random_spanish_pair1():
# Select a random English-Spanish pair
random_pair = random.choice(json_data)
return jsonify(random_pair)
def is_word(s):
"""
Check if the string 's' is a word (contains only alphabetic characters).
"""
return s.isalpha()
# Lists to store English and Spanish words separately
english_words = set()
spanish_words = set()
# Populate the word lists
for pair in json_data:
if "english" in pair:
# Extract words from the English sentence and filter out numbers
english_words.update(filter(is_word, re.findall(r'\b\w+\b', pair.get("english", ""))))
if "spanish" in pair:
# Extract words from the Spanish sentence and filter out numbers
spanish_words.update(filter(is_word, re.findall(r'\b\w+\b', pair.get("spanish", ""))))
def get_distractors(target_word, all_words, num_distractors=3):
"""
Get distractor words from the same language.
"""
distractors = set()
while len(distractors) < num_distractors:
distractor = random.choice(list(all_words))
if distractor.lower() != target_word.lower():
distractors.add(distractor)
return list(distractors)
@app.route('/fillgame')
def random_spanish_pair2():
# Select a random English-Spanish pair
random_pair = random.choice(json_data)
# Choose either English or Spanish for the fill-in-the-blank game
if random.choice([True, False]):
sentence = random_pair.get('english', "")
language = 'english'
word_set = english_words
else:
sentence = random_pair.get('spanish', "")
language = 'spanish'
word_set = spanish_words
# Split the sentence into words and filter out non-words
words = filter(is_word, re.findall(r'\b\w+\b', sentence))
# Choose a random word to replace with blank
blank_word = random.choice(list(words))
sentence_with_blank = sentence.replace(blank_word, "_____")
# Get distractors from the same language
distractors = get_distractors(blank_word, word_set)
# Combine correct word with distractors and shuffle
options = [blank_word] + distractors
random.shuffle(options)
# Return the sentence with a blank, options, and the correct word
return jsonify({
'sentence': sentence_with_blank,
'options': options,
'correctWord': blank_word,
'language': language
})
"""
@app.route('/translate', methods=['POST'])
def dotranslate():
data = request.get_json()
txt = data.get('txt')
src = data.get('src', 'en')
dest = data.get('dest', 'es')
if txt:
cache_key = f"{txt}_{src}_{dest}"
translation = cache.get(cache_key)
if translation is None:
translation = translate(txt, dest=dest, src=src)
cache.set(cache_key, translation)
return jsonify({'translation': translation}), 200
else:
return jsonify({'error': 'No text provided'}), 400
"""
from transformers import M2M100ForConditionalGeneration
from tokenization_small100 import SMALL100Tokenizer
model_name = "alirezamsh/small100"
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
tokenizer = SMALL100Tokenizer.from_pretrained(model_name)
@app.route('/translate', methods=['POST'])
def dotranslate():
data = request.get_json()
txt = data.get('txt')
src = data.get('src', 'en')
dest = data.get('dest', 'es')
if txt:
cache_key = f"{txt}_{src}_{dest}"
translation = cache.get(cache_key)
if translation is None:
# Set the source and target languages
tokenizer.src_lang = src
tokenizer.tgt_lang = dest
# Tokenize the input text
encoded = tokenizer(txt, return_tensors="pt")
with torch.no_grad():
# Generate translation
generated_tokens = model.generate(
**encoded,
forced_bos_token_id=tokenizer.get_lang_id(dest)
)
# Decode the generated tokens
translation = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
# Cache the translation
cache.set(cache_key, translation)
return jsonify({'translation': translation}), 200
else:
return jsonify({'error': 'No text provided'}), 400
if __name__ == "__main__":
config = Config()
config.bind = ["0.0.0.0:7860"] # You can specify the host and port here
asyncio.run(serve(app, config)) |