Spaces:
Paused
Paused
#!/usr/bin/env python3.10 | |
# """ | |
# Semantrix Game Module | |
# This module defines the Semantrix class, which implements a word guessing game using word embeddings. The game can be configured to use either a Word2Vec model or a SentenceTransformer model for word embeddings. The game supports multiple languages and difficulty levels. | |
# Classes: | |
# Semantrix: A class that implements the Semantrix word guessing game. | |
# Semantrix.DictWrapper: A helper class to wrap configuration dictionaries. | |
# Functions: | |
# __init__(self, lang=0, model_type="SentenceTransformer"): Initializes the Semantrix game with the specified language and model type. | |
# prepare_game(self, difficulty): Prepares the game with the selected difficulty level. | |
# gen_rank(self, repeated): Generates the ranking file based on the scores. | |
# play_game(self, word): Plays the game with the selected word and returns feedback. | |
# curiosity(self): Generates a curiosity hint about the secret word once the game is over. | |
# Attributes: | |
# model (KeyedVectors): The word embeddings model. | |
# config_file_path (str): Path to the configuration file. | |
# secret_file_path (str): Path to the secret words file. | |
# data_path (str): Path to the data directory. | |
# Config_full (dict): Full configuration data. | |
# secret (dict): Secret words data. | |
# lang (int): Language of the game (0 for Spanish, 1 for English). | |
# model_type (str): Type of the model ("word2vec" or "SentenceTransformer"). | |
# Config (DictWrapper): Configuration data for the selected language. | |
# secret_dict (dict): Secret words for the selected language. | |
# secret_list (list): List of secret words for the selected difficulty. | |
# words (list): List of words guessed by the player. | |
# scores (list): List of scores for the guessed words. | |
# win (bool): Indicates if the player has won the game. | |
# n (int): Number of hints given. | |
# recent_hint (int): Counter for recent hints. | |
# f_dev_avg (float): Moving average of the tendency slope. | |
# last_hint (int): Index of the last hint given. | |
# difficulty (int): Difficulty level of the game. | |
# """ | |
import os | |
import sys | |
import json | |
import uuid | |
import random | |
from datetime import datetime | |
import time | |
from tqdm import tqdm | |
import numpy as np | |
from gensim.models import KeyedVectors | |
from hints import curiosity, hint | |
from tracking import ( | |
calculate_moving_average, | |
calculate_tendency_slope, | |
) | |
from sentence_transformers import SentenceTransformer, util | |
import warnings | |
from huggingface_hub import snapshot_download | |
warnings.filterwarnings(action="ignore", category=UserWarning, module="gensim") | |
class Model_class: | |
base_path = os.path.dirname(os.path.abspath(__file__)) | |
def __init__(self, lang=0, model_type="SentenceTransformer"): | |
if model_type == "SentenceTransformer": | |
repo_url = "git@hf.co:Jsevisal/strans_models" | |
dest_path = "config/strans_models/" | |
else: | |
repo_url = "git@hf.co:Jsevisal/w2v_models" | |
dest_path = "config/w2v_models/" | |
# Check if the model exists, clone it if it doesn't | |
if not os.path.exists( | |
os.path.join(self.base_path, "config/strans_models/") | |
) or not os.path.exists(os.path.join(self.base_path, "config/w2v_models/")): | |
model_path = snapshot_download(repo_id="Jsevisal/strans_models") | |
if lang == 1: | |
if model_type == "word2vec": | |
self.model = KeyedVectors.load( | |
os.path.join(model_path, "eng_w2v_model"), | |
mmap="r", | |
) | |
elif model_type == "SentenceTransformer": | |
self.model = KeyedVectors.load( | |
os.path.join(model_path, "eng_strans_model"), | |
mmap="r", | |
) | |
else: | |
if model_type == "word2vec": | |
self.model = KeyedVectors.load( | |
os.path.join(model_path, "esp_w2v_model"), | |
mmap="r", | |
) | |
elif model_type == "SentenceTransformer": | |
self.model = KeyedVectors.load( | |
os.path.join(model_path, "esp_strans_model"), | |
mmap="r", | |
) | |
self.model_st = SentenceTransformer( | |
"sentence-transformers/paraphrase-multilingual-mpnet-base-v2" | |
) | |
# Define the class Semantrix | |
class Semantrix: | |
# Define the paths for the configuration files and the data | |
base_path = os.path.dirname(os.path.abspath(__file__)) | |
config_file_path = os.path.join(base_path, "config/") | |
config_file_path = os.path.join(base_path, "config/lang.json") | |
secret_file_path = os.path.join(base_path, "config/secret.json") | |
data_path = os.path.join(base_path, "data/") | |
# Define the class DictWrapper to store the configuration data | |
class DictWrapper: | |
def __init__(self, data_dict): | |
self.__dict__.update(data_dict) | |
# Define the constructor of the class which loads the configuration files and initializes the class variables depending on the language parameter and the model type | |
def __init__(self, lang=0, model_type="SentenceTransformer", session_hash=None): | |
# Load the configuration files | |
with open(self.config_file_path, "r") as file: | |
self.Config_full = json.load(file) | |
# Load the secret file where the secret words are stored | |
with open(self.secret_file_path, "r") as file: | |
self.secret = json.load(file) | |
# Set the language of the game | |
self.lang = lang | |
self.session_id = str(uuid.uuid4().hex) | |
# Set the model type | |
self.model_type = model_type | |
self.session_hash = session_hash | |
self.ranking_path = "rankings/ranking_" + str(self.session_hash) + ".txt" | |
self.ranking_data = [] | |
self.ranking_msg = "" | |
if lang == 1: | |
self.Config = self.DictWrapper(self.Config_full["ENG"]["Game"]) | |
self.secret_dict = self.secret["ENG"] | |
self.secret_list = self.secret_dict["basic"] | |
else: | |
self.Config = self.DictWrapper(self.Config_full["SPA"]["Game"]) | |
self.secret_dict = self.secret["SPA"] | |
self.secret_list = self.secret_dict["basic"] | |
# Create the ranking file | |
with open(self.data_path + self.ranking_path, "w+") as file: | |
file.write("---------------------------") | |
def reset_game(self): | |
self.session_id = str(uuid.uuid4().hex) | |
# Load the secret file where the secret words are stored | |
with open(self.secret_file_path, "r") as file: | |
self.secret = json.load(file) | |
self.secret_dict = self.secret["SPA"] | |
self.secret_list = self.secret_dict["basic"] | |
def generate_gensim_model(self, model_class, batch_size=32): | |
from tqdm import tqdm | |
self.model_trans = KeyedVectors(768) | |
self.model_trans.init_sims(replace=True) | |
words = list(model_class.model.key_to_index.keys()) | |
num_batches = ( | |
len(words) + batch_size - 1 | |
) // batch_size # Calculate the number of batches | |
for batch_index in tqdm(range(num_batches)): | |
# Get the batch of words | |
start_index = batch_index * batch_size | |
end_index = min(start_index + batch_size, len(words)) | |
batch_words = words[start_index:end_index] | |
# Encode the batch of words | |
encoded_vectors = model_class.model_st.encode( | |
batch_words, | |
convert_to_tensor=True, | |
prompt="Encuentra el valor semántico de la palabra: ", | |
).tolist() | |
# # Add vectors to the model | |
self.model_trans.add_vectors(batch_words, encoded_vectors) | |
self.model_trans.save("config/strans_models/esp_strans_model_prompt") | |
# Define the function to prepare the game with the selected difficulty | |
def prepare_game(self, secret_word_used, difficulty): | |
# Set the secret list depending on the difficulty | |
self.secret = self.secret_list[secret_word_used] | |
self.secret = self.secret.lower() | |
self.init_time = time.time() | |
# Store the secret word in the words list | |
self.words = [self.Config.secret_word] | |
# Store the score in the scores list | |
self.scores = [10] | |
# Initialize the game variables | |
self.win = False | |
self.n = 0 | |
self.recent_hint = 0 | |
self.f_dev_avg = 0 | |
self.last_hint = -1 | |
self.difficulty = difficulty | |
# Set the number of hints depending on the difficulty | |
if self.difficulty == 1: | |
self.n = 3 | |
# Define the function to generate the ranking file | |
def gen_rank(self, repeated): | |
ascending_indices = np.argsort(self.scores) | |
descending_indices = list(ascending_indices[::-1]) | |
self.ranking_data.clear() | |
k = len(self.words) - 1 | |
if repeated != -1: | |
k = repeated | |
self.ranking_data.append(["#" + str(k), self.words[k], self.scores[k]]) | |
self.ranking_data.append("---------------------------") | |
for i in descending_indices: | |
if i == 0: | |
continue | |
self.ranking_data.append(["#" + str(i), self.words[i], self.scores[i]]) | |
with open(self.data_path + self.ranking_path, "w+") as file: | |
for item in self.ranking_data: | |
file.write("%s\n" % item) | |
self.ranking_msg = "" | |
for item in self.ranking_data: | |
self.ranking_msg += f"{item}\n" | |
# Define the function to play the game with the selected word | |
def play_game(self, word, model_class): | |
# Convert the word to lowercase | |
word = word.lower().strip() | |
# Check if the user wants to give up | |
if word == "give_up": | |
text = ( | |
"[lose]" | |
+ str(self.Config.Feedback_9) | |
+ self.secret | |
+ "\n\n" | |
+ self.Config.Feedback_10 | |
) | |
return text | |
# Check if the word is repeated | |
if word in self.words: | |
repeated = self.words.index(word) | |
else: | |
repeated = -1 | |
self.words.append(word) | |
# Check if the word is in the model already | |
if word not in model_class.model.key_to_index.keys(): | |
# If the word is not in the model, remove it from the words list and provide feedback | |
self.words.pop(len(self.words) - 1) | |
feedback = ( | |
"I don't know that word. Try another word." | |
if self.lang == 1 | |
else "No conozco esa palabra. Prueba con otra palabra." | |
) | |
feedback += "[rank]" + self.ranking_msg if len(self.words) > 1 else "\n\n" | |
return feedback | |
similarity = model_class.model.similarity(self.secret, word) | |
if self.model_type == "word2vec": | |
score = np.round(similarity * 10, 2) | |
else: | |
# log_similarity = np.log10(similarity * 10) if np.any(similarity > 0) else 0 | |
# score = np.round( | |
# np.interp( | |
# log_similarity, | |
# [0, np.log10(10)], | |
# [0, 10], | |
# ), | |
# 2, | |
# ) | |
score = np.round(similarity * 10, 2) | |
# Remove the word from the score list if it is repeated | |
if repeated == -1: | |
self.scores.append(score) | |
# Generate the feedback message depending on the score | |
if score <= 2.5: | |
feedback = self.Config.Feedback_0 + str(score) | |
elif score > 2.5 and score <= 6.0: | |
feedback = self.Config.Feedback_1 + str(score) | |
elif score > 6.0 and score <= 7.0: | |
feedback = self.Config.Feedback_2 + str(score) | |
elif score > 7.0 and score <= 8: | |
feedback = self.Config.Feedback_3 + str(score) | |
elif score > 8 and score <= 9.0: | |
feedback = self.Config.Feedback_4 + str(score) | |
elif score > 9.0 and score < 10.0: | |
feedback = self.Config.Feedback_5 + str(score) | |
# If the score is 10, the user wins the game | |
else: | |
self.win = True | |
feedback = "[win]" + self.Config.Feedback_8 | |
self.words[0] = self.secret | |
self.words.pop(len(self.words) - 1) | |
self.scores.pop(len(self.scores) - 1) | |
# Generate the feedback message depending on the score and the previous score | |
if score > self.scores[len(self.scores) - 2] and self.win == False: | |
feedback += "\n" + self.Config.Feedback_6 | |
elif score < self.scores[len(self.scores) - 2] and self.win == False: | |
feedback += "\n" + self.Config.Feedback_7 | |
## Hint generation | |
# If the difficulty is not 4, calculate the moving average of the scores and the tendency slope | |
if self.difficulty != 4 and len(self.scores) > 1: | |
mov_avg = calculate_moving_average(self.scores[1:], 5) | |
# If the moving average has more than one element and the user has not won yet, calculate the tendency slope and the moving average of the tendency slope | |
if len(mov_avg) > 1 and self.win == False: | |
f_dev = calculate_tendency_slope(mov_avg) | |
f_dev_avg = calculate_moving_average(f_dev, 3) | |
# If the tendency slope is negative and the hint has not been given recently (at least three rounds earlier), generate a hint | |
if f_dev_avg[len(f_dev_avg) - 1] < 0 and self.recent_hint == 0: | |
# Generate a random hint intro from the hint list | |
i = random.randint(0, len(self.Config.hint_intro) - 1) | |
feedback += "\n\n[hint]" + self.Config.hint_intro[i] | |
# Generate a dynamic hint | |
hint_text, self.n, self.last_hint = hint( | |
self.secret, | |
self.n, | |
model_class.model_st, | |
self.last_hint, | |
self.lang, | |
( | |
self.DictWrapper(self.Config_full["ENG"]["Hint"]) | |
if self.lang == 1 | |
else self.DictWrapper(self.Config_full["SPA"]["Hint"]) | |
), | |
) | |
feedback += "\n" + hint_text | |
self.recent_hint = 3 | |
if self.recent_hint != 0: | |
self.recent_hint -= 1 | |
# Generate the ranking file | |
self.gen_rank(repeated) | |
# Add the ranking file to the feedback message | |
feedback += "[rank]" + self.ranking_msg if len(self.words) > 1 else "\n\n" | |
# Return the feedback message | |
return feedback | |
# Define the function to generate a curiosity hint once the game is over | |
def curiosity(self): | |
# Generate a curiosity aboyt the secret word | |
feedback = curiosity( | |
self.secret, | |
( | |
self.DictWrapper(self.Config_full["ENG"]["Hint"]) | |
if self.lang == 1 | |
else self.DictWrapper(self.Config_full["SPA"]["Hint"]) | |
), | |
) | |
# Save the ranking file with the plays of the user if the user wins | |
with open(self.data_path + self.ranking_path, "r") as original_file: | |
file_content = original_file.readlines()[2:] | |
new_file_name = f"{self.session_id}-{self.secret}.json" | |
play_data = { | |
"session_id": self.session_id, | |
"datetime": str(datetime.now()), | |
"time": time.time() - self.init_time, | |
"data": file_content, | |
"win": self.win, | |
"secret": self.secret, | |
"number_of_hints": self.n, | |
} | |
with open(self.data_path + "plays/" + new_file_name, "w") as new_file: | |
json.dump(play_data, new_file, indent=4) | |
# Return the feedback message | |
return feedback | |
def get_session_id(self): | |
return self.session_id | |