File size: 7,434 Bytes
939c80c
30a52ec
 
 
939c80c
 
 
6bc2af2
 
c807e55
5abd3c7
 
f40ebd6
3affa92
59da306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3affa92
5abd3c7
30a52ec
3affa92
5abd3c7
 
 
939c80c
5abd3c7
 
 
 
 
3affa92
5abd3c7
 
939c80c
5abd3c7
2c6b6c7
5abd3c7
 
 
939c80c
73f20e9
 
 
 
 
 
2abb4ba
2ac09a0
 
 
 
 
 
73f20e9
fff5a8f
2ac09a0
73f20e9
fff5a8f
2ac09a0
836cf05
2ac09a0
8ef2116
836cf05
 
 
2ac09a0
836cf05
 
 
 
 
2c6b6c7
836cf05
 
 
 
 
2ac09a0
836cf05
2ac09a0
836cf05
2ac09a0
836cf05
2ac09a0
836cf05
8ef2116
 
836cf05
 
8ef2116
836cf05
 
8ef2116
2ac09a0
836cf05
 
 
 
 
 
 
 
 
 
 
 
2ac09a0
836cf05
 
c807e55
5abd3c7
 
3e07850
 
5abd3c7
 
 
3e07850
5abd3c7
30a52ec
 
 
 
 
5abd3c7
3e07850
5abd3c7
c807e55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d42148
 
 
 
 
 
c807e55
 
 
 
 
 
 
 
 
3e07850
939c80c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
from flask import Flask, request, jsonify
from flask_caching import Cache
import time

import asyncio
from hypercorn.asyncio import serve
from hypercorn.config import Config
import os
os.environ['CURL_CA_BUNDLE'] = ''
#from googletranslate import translate
import json
import random
import re

import numpy as np
import emoji, json
from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH
from torchmoji.sentence_tokenizer import SentenceTokenizer
from torchmoji.model_def import torchmoji_emojis
import torch

# Emoji map in emoji_overview.png
EMOJIS = ":joy: :unamused: :weary: :sob: :heart_eyes: \
:pensive: :ok_hand: :blush: :heart: :smirk: \
:grin: :notes: :flushed: :100: :sleeping: \
:relieved: :relaxed: :raised_hands: :two_hearts: :expressionless: \
:sweat_smile: :pray: :confused: :kissing_heart: :heartbeat: \
:neutral_face: :information_desk_person: :disappointed: :see_no_evil: :tired_face: \
:v: :sunglasses: :rage: :thumbsup: :cry: \
:sleepy: :yum: :triumph: :hand: :mask: \
:clap: :eyes: :gun: :persevere: :smiling_imp: \
:sweat: :broken_heart: :yellow_heart: :musical_note: :speak_no_evil: \
:wink: :skull: :confounded: :smile: :stuck_out_tongue_winking_eye: \
:angry: :no_good: :muscle: :facepunch: :purple_heart: \
:sparkling_heart: :blue_heart: :grimacing: :sparkles:".split(' ')

def top_elements(array, k):
    ind = np.argpartition(array, -k)[-k:]
    return ind[np.argsort(array[ind])][::-1]


with open("vocabulary.json", 'r') as f:
    vocabulary = json.load(f)

st = SentenceTokenizer(vocabulary, 100)

emojimodel = torchmoji_emojis("pytorch_model.bin")

def deepmojify(sentence, top_n=5, prob_only=False):
    list_emojis = []
    def top_elements(array, k):
        ind = np.argpartition(array, -k)[-k:]
        return ind[np.argsort(array[ind])][::-1]

    tokenized, _, _ = st.tokenize_sentences([sentence])
    tokenized = np.array(tokenized).astype(int)  # convert to float first
    if USE_GPU:
        tokenized = torch.tensor(tokenized).cuda()  # then convert to PyTorch tensor

    prob = emojimodel.forward(tokenized)[0]
    if not USE_GPU:
        prob = torch.tensor(prob)
    if prob_only:
        return prob
    emoji_ids = top_elements(prob.cpu().numpy(), top_n)
    emojis = map(lambda x: EMOJIS[x], emoji_ids)
    list_emojis.append(emoji.emojize(f"{' '.join(emojis)}", language='alias'))
    # returning the emojis as a list named as list_emojis
    return list_emojis, prob

app = Flask(__name__)
cache = Cache(app, config={'CACHE_TYPE': 'simple', 'CACHE_DEFAULT_TIMEOUT': 60})

@app.route('/', methods=['GET'])
def home():
  return "HI! Use /translate POST"

# Load the JSON data into memory
def load_json_data(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

# Assuming your JSON structure is a list of dictionaries
json_data = load_json_data('englishspanishpairs.json')

@app.route('/spanish')
def random_spanish_pair1():
    # Select a random English-Spanish pair
    random_pair = random.choice(json_data)
    return jsonify(random_pair)

def is_word(s):
    """
    Check if the string 's' is a word (contains only alphabetic characters).
    """
    return s.isalpha()

# Lists to store English and Spanish words separately
english_words = set()
spanish_words = set()

# Populate the word lists
for pair in json_data:
    if "english" in pair:
        # Extract words from the English sentence and filter out numbers
        english_words.update(filter(is_word, re.findall(r'\b\w+\b', pair.get("english", ""))))
    if "spanish" in pair:
        # Extract words from the Spanish sentence and filter out numbers
        spanish_words.update(filter(is_word, re.findall(r'\b\w+\b', pair.get("spanish", ""))))

def get_distractors(target_word, all_words, num_distractors=3):
    """
    Get distractor words from the same language.
    """
    distractors = set()
    while len(distractors) < num_distractors:
        distractor = random.choice(list(all_words))
        if distractor.lower() != target_word.lower():
            distractors.add(distractor)
    return list(distractors)

@app.route('/fillgame')
def random_spanish_pair2():
    # Select a random English-Spanish pair
    random_pair = random.choice(json_data)

    # Choose either English or Spanish for the fill-in-the-blank game
    if random.choice([True, False]):
        sentence = random_pair.get('english', "")
        language = 'english'
        word_set = english_words
    else:
        sentence = random_pair.get('spanish', "")
        language = 'spanish'
        word_set = spanish_words

    # Split the sentence into words and filter out non-words
    words = filter(is_word, re.findall(r'\b\w+\b', sentence))

    # Choose a random word to replace with blank
    blank_word = random.choice(list(words))
    sentence_with_blank = sentence.replace(blank_word, "_____")

    # Get distractors from the same language
    distractors = get_distractors(blank_word, word_set)

    # Combine correct word with distractors and shuffle
    options = [blank_word] + distractors
    random.shuffle(options)

    # Return the sentence with a blank, options, and the correct word
    return jsonify({
        'sentence': sentence_with_blank,
        'options': options,
        'correctWord': blank_word,
        'language': language
    })

    

"""
@app.route('/translate', methods=['POST'])
def dotranslate():
    data = request.get_json()

    txt = data.get('txt')
    src = data.get('src', 'en')
    dest = data.get('dest', 'es')

    if txt:
        cache_key = f"{txt}_{src}_{dest}"
        translation = cache.get(cache_key)
        if translation is None:
            translation = translate(txt, dest=dest, src=src)
            cache.set(cache_key, translation)
        return jsonify({'translation': translation}), 200
    else:
        return jsonify({'error': 'No text provided'}), 400
"""


from transformers import M2M100ForConditionalGeneration
from tokenization_small100 import SMALL100Tokenizer

model_name = "alirezamsh/small100"
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
tokenizer = SMALL100Tokenizer.from_pretrained(model_name)

@app.route('/translate', methods=['POST'])
def dotranslate():
    data = request.get_json()
    txt = data.get('txt')
    src = data.get('src', 'en')
    dest = data.get('dest', 'es')

    if txt:
        cache_key = f"{txt}_{src}_{dest}"
        translation = cache.get(cache_key)
        if translation is None:
            # Set the source and target languages
            tokenizer.src_lang = src
            tokenizer.tgt_lang = dest

            # Tokenize the input text
            encoded = tokenizer(txt, return_tensors="pt")
            with torch.no_grad():
                # Generate translation
                generated_tokens = model.generate(
                    **encoded,
                    forced_bos_token_id=tokenizer.get_lang_id(dest)
                )

            # Decode the generated tokens
            translation = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)

            # Cache the translation
            cache.set(cache_key, translation)
        return jsonify({'translation': translation}), 200
    else:
        return jsonify({'error': 'No text provided'}), 400

if __name__ == "__main__":
   config = Config()
   config.bind = ["0.0.0.0:7860"]  # You can specify the host and port here
   asyncio.run(serve(app, config))