from flask import Flask, request, jsonify from flask_caching import Cache import time import asyncio from hypercorn.asyncio import serve from hypercorn.config import Config import os os.environ['CURL_CA_BUNDLE'] = '' #from googletranslate import translate import json import random import re import numpy as np import emoji, json from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH from torchmoji.sentence_tokenizer import SentenceTokenizer from torchmoji.model_def import torchmoji_emojis import torch # Emoji map in emoji_overview.png EMOJIS = ":joy: :unamused: :weary: :sob: :heart_eyes: \ :pensive: :ok_hand: :blush: :heart: :smirk: \ :grin: :notes: :flushed: :100: :sleeping: \ :relieved: :relaxed: :raised_hands: :two_hearts: :expressionless: \ :sweat_smile: :pray: :confused: :kissing_heart: :heartbeat: \ :neutral_face: :information_desk_person: :disappointed: :see_no_evil: :tired_face: \ :v: :sunglasses: :rage: :thumbsup: :cry: \ :sleepy: :yum: :triumph: :hand: :mask: \ :clap: :eyes: :gun: :persevere: :smiling_imp: \ :sweat: :broken_heart: :yellow_heart: :musical_note: :speak_no_evil: \ :wink: :skull: :confounded: :smile: :stuck_out_tongue_winking_eye: \ :angry: :no_good: :muscle: :facepunch: :purple_heart: \ :sparkling_heart: :blue_heart: :grimacing: :sparkles:".split(' ') def top_elements(array, k): ind = np.argpartition(array, -k)[-k:] return ind[np.argsort(array[ind])][::-1] with open("vocabulary.json", 'r') as f: vocabulary = json.load(f) st = SentenceTokenizer(vocabulary, 100) emojimodel = torchmoji_emojis("pytorch_model.bin") def deepmojify(sentence, top_n=5, prob_only=False): list_emojis = [] def top_elements(array, k): ind = np.argpartition(array, -k)[-k:] return ind[np.argsort(array[ind])][::-1] tokenized, _, _ = st.tokenize_sentences([sentence]) tokenized = np.array(tokenized).astype(int) # convert to float first if USE_GPU: tokenized = torch.tensor(tokenized).cuda() # then convert to PyTorch tensor prob = emojimodel.forward(tokenized)[0] if not USE_GPU: prob = torch.tensor(prob) if prob_only: return prob emoji_ids = top_elements(prob.cpu().numpy(), top_n) emojis = map(lambda x: EMOJIS[x], emoji_ids) list_emojis.append(emoji.emojize(f"{' '.join(emojis)}", language='alias')) # returning the emojis as a list named as list_emojis return list_emojis, prob app = Flask(__name__) cache = Cache(app, config={'CACHE_TYPE': 'simple', 'CACHE_DEFAULT_TIMEOUT': 60}) @app.route('/', methods=['GET']) def home(): return "HI! Use /translate POST" # Load the JSON data into memory def load_json_data(file_path): with open(file_path, 'r') as file: data = json.load(file) return data # Assuming your JSON structure is a list of dictionaries json_data = load_json_data('englishspanishpairs.json') @app.route('/spanish') def random_spanish_pair1(): # Select a random English-Spanish pair random_pair = random.choice(json_data) return jsonify(random_pair) def is_word(s): """ Check if the string 's' is a word (contains only alphabetic characters). """ return s.isalpha() # Lists to store English and Spanish words separately english_words = set() spanish_words = set() # Populate the word lists for pair in json_data: if "english" in pair: # Extract words from the English sentence and filter out numbers english_words.update(filter(is_word, re.findall(r'\b\w+\b', pair.get("english", "")))) if "spanish" in pair: # Extract words from the Spanish sentence and filter out numbers spanish_words.update(filter(is_word, re.findall(r'\b\w+\b', pair.get("spanish", "")))) def get_distractors(target_word, all_words, num_distractors=3): """ Get distractor words from the same language. """ distractors = set() while len(distractors) < num_distractors: distractor = random.choice(list(all_words)) if distractor.lower() != target_word.lower(): distractors.add(distractor) return list(distractors) @app.route('/fillgame') def random_spanish_pair2(): # Select a random English-Spanish pair random_pair = random.choice(json_data) # Choose either English or Spanish for the fill-in-the-blank game if random.choice([True, False]): sentence = random_pair.get('english', "") language = 'english' word_set = english_words else: sentence = random_pair.get('spanish', "") language = 'spanish' word_set = spanish_words # Split the sentence into words and filter out non-words words = filter(is_word, re.findall(r'\b\w+\b', sentence)) # Choose a random word to replace with blank blank_word = random.choice(list(words)) sentence_with_blank = sentence.replace(blank_word, "_____") # Get distractors from the same language distractors = get_distractors(blank_word, word_set) # Combine correct word with distractors and shuffle options = [blank_word] + distractors random.shuffle(options) # Return the sentence with a blank, options, and the correct word return jsonify({ 'sentence': sentence_with_blank, 'options': options, 'correctWord': blank_word, 'language': language }) """ @app.route('/translate', methods=['POST']) def dotranslate(): data = request.get_json() txt = data.get('txt') src = data.get('src', 'en') dest = data.get('dest', 'es') if txt: cache_key = f"{txt}_{src}_{dest}" translation = cache.get(cache_key) if translation is None: translation = translate(txt, dest=dest, src=src) cache.set(cache_key, translation) return jsonify({'translation': translation}), 200 else: return jsonify({'error': 'No text provided'}), 400 """ from transformers import M2M100ForConditionalGeneration from tokenization_small100 import SMALL100Tokenizer model_name = "alirezamsh/small100" model = M2M100ForConditionalGeneration.from_pretrained(model_name) tokenizer = SMALL100Tokenizer.from_pretrained(model_name) @app.route('/translate', methods=['POST']) def dotranslate(): data = request.get_json() txt = data.get('txt') src = data.get('src', 'en') dest = data.get('dest', 'es') if txt: cache_key = f"{txt}_{src}_{dest}" translation = cache.get(cache_key) if translation is None: # Set the source and target languages tokenizer.src_lang = src tokenizer.tgt_lang = dest # Tokenize the input text encoded = tokenizer(txt, return_tensors="pt") # Generate translation generated_tokens = model.generate( **encoded, forced_bos_token_id=tokenizer.get_lang_id(dest) ) # Decode the generated tokens translation = tokenizer.decode(generated_tokens[0], skip_special_tokens=True) # Cache the translation cache.set(cache_key, translation) return jsonify({'translation': translation}), 200 else: return jsonify({'error': 'No text provided'}), 400 if __name__ == "__main__": config = Config() config.bind = ["0.0.0.0:7860"] # You can specify the host and port here asyncio.run(serve(app, config))