beanbox-apis

Running

App Files Files Community

beanbox-apis / app.py

johnpaulbin

Update app.py

9d42148 verified about 1 month ago

raw

history blame contribute delete

7.43 kB

	from flask import Flask, request, jsonify
	from flask_caching import Cache
	import time

	import asyncio
	from hypercorn.asyncio import serve
	from hypercorn.config import Config
	import os
	os.environ['CURL_CA_BUNDLE'] = ''
	#from googletranslate import translate
	import json
	import random
	import re

	import numpy as np
	import emoji, json
	from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH
	from torchmoji.sentence_tokenizer import SentenceTokenizer
	from torchmoji.model_def import torchmoji_emojis
	import torch

	# Emoji map in emoji_overview.png
	EMOJIS = ":joy: :unamused: :weary: :sob: :heart_eyes: \
	:pensive: :ok_hand: :blush: :heart: :smirk: \
	:grin: :notes: :flushed: :100: :sleeping: \
	:relieved: :relaxed: :raised_hands: :two_hearts: :expressionless: \
	:sweat_smile: :pray: :confused: :kissing_heart: :heartbeat: \
	:neutral_face: :information_desk_person: :disappointed: :see_no_evil: :tired_face: \
	:v: :sunglasses: :rage: :thumbsup: :cry: \
	:sleepy: :yum: :triumph: :hand: :mask: \
	:clap: :eyes: :gun: :persevere: :smiling_imp: \
	:sweat: :broken_heart: :yellow_heart: :musical_note: :speak_no_evil: \
	:wink: :skull: :confounded: :smile: :stuck_out_tongue_winking_eye: \
	:angry: :no_good: :muscle: :facepunch: :purple_heart: \
	:sparkling_heart: :blue_heart: :grimacing: :sparkles:".split(' ')

	def top_elements(array, k):
	ind = np.argpartition(array, -k)[-k:]
	return ind[np.argsort(array[ind])][::-1]


	with open("vocabulary.json", 'r') as f:
	vocabulary = json.load(f)

	st = SentenceTokenizer(vocabulary, 100)

	emojimodel = torchmoji_emojis("pytorch_model.bin")

	def deepmojify(sentence, top_n=5, prob_only=False):
	list_emojis = []
	def top_elements(array, k):
	ind = np.argpartition(array, -k)[-k:]
	return ind[np.argsort(array[ind])][::-1]

	tokenized, _, _ = st.tokenize_sentences([sentence])
	tokenized = np.array(tokenized).astype(int) # convert to float first
	if USE_GPU:
	tokenized = torch.tensor(tokenized).cuda() # then convert to PyTorch tensor

	prob = emojimodel.forward(tokenized)[0]
	if not USE_GPU:
	prob = torch.tensor(prob)
	if prob_only:
	return prob
	emoji_ids = top_elements(prob.cpu().numpy(), top_n)
	emojis = map(lambda x: EMOJIS[x], emoji_ids)
	list_emojis.append(emoji.emojize(f"{' '.join(emojis)}", language='alias'))
	# returning the emojis as a list named as list_emojis
	return list_emojis, prob

	app = Flask(__name__)
	cache = Cache(app, config={'CACHE_TYPE': 'simple', 'CACHE_DEFAULT_TIMEOUT': 60})

	@app.route('/', methods=['GET'])
	def home():
	return "HI! Use /translate POST"

	# Load the JSON data into memory
	def load_json_data(file_path):
	with open(file_path, 'r') as file:
	data = json.load(file)
	return data

	# Assuming your JSON structure is a list of dictionaries
	json_data = load_json_data('englishspanishpairs.json')

	@app.route('/spanish')
	def random_spanish_pair1():
	# Select a random English-Spanish pair
	random_pair = random.choice(json_data)
	return jsonify(random_pair)

	def is_word(s):
	"""
	Check if the string 's' is a word (contains only alphabetic characters).
	"""
	return s.isalpha()

	# Lists to store English and Spanish words separately
	english_words = set()
	spanish_words = set()

	# Populate the word lists
	for pair in json_data:
	if "english" in pair:
	# Extract words from the English sentence and filter out numbers
	english_words.update(filter(is_word, re.findall(r'\b\w+\b', pair.get("english", ""))))
	if "spanish" in pair:
	# Extract words from the Spanish sentence and filter out numbers
	spanish_words.update(filter(is_word, re.findall(r'\b\w+\b', pair.get("spanish", ""))))

	def get_distractors(target_word, all_words, num_distractors=3):
	"""
	Get distractor words from the same language.
	"""
	distractors = set()
	while len(distractors) < num_distractors:
	distractor = random.choice(list(all_words))
	if distractor.lower() != target_word.lower():
	distractors.add(distractor)
	return list(distractors)

	@app.route('/fillgame')
	def random_spanish_pair2():
	# Select a random English-Spanish pair
	random_pair = random.choice(json_data)

	# Choose either English or Spanish for the fill-in-the-blank game
	if random.choice([True, False]):
	sentence = random_pair.get('english', "")
	language = 'english'
	word_set = english_words
	else:
	sentence = random_pair.get('spanish', "")
	language = 'spanish'
	word_set = spanish_words

	# Split the sentence into words and filter out non-words
	words = filter(is_word, re.findall(r'\b\w+\b', sentence))

	# Choose a random word to replace with blank
	blank_word = random.choice(list(words))
	sentence_with_blank = sentence.replace(blank_word, "_____")

	# Get distractors from the same language
	distractors = get_distractors(blank_word, word_set)

	# Combine correct word with distractors and shuffle
	options = [blank_word] + distractors
	random.shuffle(options)

	# Return the sentence with a blank, options, and the correct word
	return jsonify({
	'sentence': sentence_with_blank,
	'options': options,
	'correctWord': blank_word,
	'language': language
	})



	"""
	@app.route('/translate', methods=['POST'])
	def dotranslate():
	data = request.get_json()

	txt = data.get('txt')
	src = data.get('src', 'en')
	dest = data.get('dest', 'es')

	if txt:
	cache_key = f"{txt}_{src}_{dest}"
	translation = cache.get(cache_key)
	if translation is None:
	translation = translate(txt, dest=dest, src=src)
	cache.set(cache_key, translation)
	return jsonify({'translation': translation}), 200
	else:
	return jsonify({'error': 'No text provided'}), 400
	"""


	from transformers import M2M100ForConditionalGeneration
	from tokenization_small100 import SMALL100Tokenizer

	model_name = "alirezamsh/small100"
	model = M2M100ForConditionalGeneration.from_pretrained(model_name)
	tokenizer = SMALL100Tokenizer.from_pretrained(model_name)

	@app.route('/translate', methods=['POST'])
	def dotranslate():
	data = request.get_json()
	txt = data.get('txt')
	src = data.get('src', 'en')
	dest = data.get('dest', 'es')

	if txt:
	cache_key = f"{txt}_{src}_{dest}"
	translation = cache.get(cache_key)
	if translation is None:
	# Set the source and target languages
	tokenizer.src_lang = src
	tokenizer.tgt_lang = dest

	# Tokenize the input text
	encoded = tokenizer(txt, return_tensors="pt")
	with torch.no_grad():
	# Generate translation
	generated_tokens = model.generate(
	**encoded,
	forced_bos_token_id=tokenizer.get_lang_id(dest)
	)

	# Decode the generated tokens
	translation = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)

	# Cache the translation
	cache.set(cache_key, translation)
	return jsonify({'translation': translation}), 200
	else:
	return jsonify({'error': 'No text provided'}), 400

	if __name__ == "__main__":
	config = Config()
	config.bind = ["0.0.0.0:7860"] # You can specify the host and port here
	asyncio.run(serve(app, config))