Spaces:

Jsevisal
/

semantrix_cond0

Paused

semantrix_cond0 / game.py

Javierss

Clone old repo

65ca37b about 4 hours ago

16.2 kB

	#!/usr/bin/env python3.10

	# """
	# Semantrix Game Module

	# This module defines the Semantrix class, which implements a word guessing game using word embeddings. The game can be configured to use either a Word2Vec model or a SentenceTransformer model for word embeddings. The game supports multiple languages and difficulty levels.

	# Classes:
	# Semantrix: A class that implements the Semantrix word guessing game.
	# Semantrix.DictWrapper: A helper class to wrap configuration dictionaries.

	# Functions:
	# __init__(self, lang=0, model_type="SentenceTransformer"): Initializes the Semantrix game with the specified language and model type.
	# prepare_game(self, difficulty): Prepares the game with the selected difficulty level.
	# gen_rank(self, repeated): Generates the ranking file based on the scores.
	# play_game(self, word): Plays the game with the selected word and returns feedback.
	# curiosity(self): Generates a curiosity hint about the secret word once the game is over.

	# Attributes:
	# model (KeyedVectors): The word embeddings model.
	# config_file_path (str): Path to the configuration file.
	# secret_file_path (str): Path to the secret words file.
	# data_path (str): Path to the data directory.
	# Config_full (dict): Full configuration data.
	# secret (dict): Secret words data.
	# lang (int): Language of the game (0 for Spanish, 1 for English).
	# model_type (str): Type of the model ("word2vec" or "SentenceTransformer").
	# Config (DictWrapper): Configuration data for the selected language.
	# secret_dict (dict): Secret words for the selected language.
	# secret_list (list): List of secret words for the selected difficulty.
	# words (list): List of words guessed by the player.
	# scores (list): List of scores for the guessed words.
	# win (bool): Indicates if the player has won the game.
	# n (int): Number of hints given.
	# recent_hint (int): Counter for recent hints.
	# f_dev_avg (float): Moving average of the tendency slope.
	# last_hint (int): Index of the last hint given.
	# difficulty (int): Difficulty level of the game.
	# """

	import os
	import sys
	import json
	import uuid
	import random
	from datetime import datetime
	import time
	from tqdm import tqdm
	import numpy as np
	from gensim.models import KeyedVectors
	from hints import curiosity, hint
	from tracking import (
	calculate_moving_average,
	calculate_tendency_slope,
	)
	from sentence_transformers import SentenceTransformer, util
	import warnings
	from huggingface_hub import snapshot_download


	warnings.filterwarnings(action="ignore", category=UserWarning, module="gensim")


	class Model_class:

	base_path = os.path.dirname(os.path.abspath(__file__))

	def __init__(self, lang=0, model_type="SentenceTransformer"):

	if model_type == "SentenceTransformer":
	repo_url = "git@hf.co:Jsevisal/strans_models"
	dest_path = "config/strans_models/"
	else:
	repo_url = "git@hf.co:Jsevisal/w2v_models"
	dest_path = "config/w2v_models/"

	# Check if the model exists, clone it if it doesn't
	if not os.path.exists(
	os.path.join(self.base_path, "config/strans_models/")
	) or not os.path.exists(os.path.join(self.base_path, "config/w2v_models/")):
	model_path = snapshot_download(repo_id="Jsevisal/strans_models")

	if lang == 1:
	if model_type == "word2vec":
	self.model = KeyedVectors.load(
	os.path.join(model_path, "eng_w2v_model"),
	mmap="r",
	)
	elif model_type == "SentenceTransformer":
	self.model = KeyedVectors.load(
	os.path.join(model_path, "eng_strans_model"),
	mmap="r",
	)

	else:
	if model_type == "word2vec":
	self.model = KeyedVectors.load(
	os.path.join(model_path, "esp_w2v_model"),
	mmap="r",
	)

	elif model_type == "SentenceTransformer":
	self.model = KeyedVectors.load(
	os.path.join(model_path, "esp_strans_model"),
	mmap="r",
	)

	self.model_st = SentenceTransformer(
	"sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
	)


	# Define the class Semantrix
	class Semantrix:

	# Define the paths for the configuration files and the data
	base_path = os.path.dirname(os.path.abspath(__file__))
	config_file_path = os.path.join(base_path, "config/")
	config_file_path = os.path.join(base_path, "config/lang.json")
	secret_file_path = os.path.join(base_path, "config/secret.json")
	data_path = os.path.join(base_path, "data/")

	# Define the class DictWrapper to store the configuration data
	class DictWrapper:
	def __init__(self, data_dict):
	self.__dict__.update(data_dict)

	# Define the constructor of the class which loads the configuration files and initializes the class variables depending on the language parameter and the model type
	def __init__(self, lang=0, model_type="SentenceTransformer", session_hash=None):

	# Load the configuration files
	with open(self.config_file_path, "r") as file:
	self.Config_full = json.load(file)

	# Load the secret file where the secret words are stored
	with open(self.secret_file_path, "r") as file:
	self.secret = json.load(file)

	# Set the language of the game
	self.lang = lang

	self.session_id = str(uuid.uuid4().hex)

	# Set the model type
	self.model_type = model_type

	self.session_hash = session_hash
	self.ranking_path = "rankings/ranking_" + str(self.session_hash) + ".txt"

	self.ranking_data = []
	self.ranking_msg = ""

	if lang == 1:
	self.Config = self.DictWrapper(self.Config_full["ENG"]["Game"])
	self.secret_dict = self.secret["ENG"]
	self.secret_list = self.secret_dict["basic"]
	else:
	self.Config = self.DictWrapper(self.Config_full["SPA"]["Game"])
	self.secret_dict = self.secret["SPA"]
	self.secret_list = self.secret_dict["basic"]

	# Create the ranking file
	with open(self.data_path + self.ranking_path, "w+") as file:
	file.write("---------------------------")

	def reset_game(self):
	self.session_id = str(uuid.uuid4().hex)
	# Load the secret file where the secret words are stored
	with open(self.secret_file_path, "r") as file:
	self.secret = json.load(file)
	self.secret_dict = self.secret["SPA"]
	self.secret_list = self.secret_dict["basic"]

	def generate_gensim_model(self, model_class, batch_size=32):
	from tqdm import tqdm

	self.model_trans = KeyedVectors(768)

	self.model_trans.init_sims(replace=True)
	words = list(model_class.model.key_to_index.keys())
	num_batches = (
	len(words) + batch_size - 1
	) // batch_size # Calculate the number of batches

	for batch_index in tqdm(range(num_batches)):
	# Get the batch of words
	start_index = batch_index * batch_size
	end_index = min(start_index + batch_size, len(words))
	batch_words = words[start_index:end_index]

	# Encode the batch of words
	encoded_vectors = model_class.model_st.encode(
	batch_words,
	convert_to_tensor=True,
	prompt="Encuentra el valor semántico de la palabra: ",
	).tolist()

	# # Add vectors to the model
	self.model_trans.add_vectors(batch_words, encoded_vectors)

	self.model_trans.save("config/strans_models/esp_strans_model_prompt")

	# Define the function to prepare the game with the selected difficulty
	def prepare_game(self, secret_word_used, difficulty):

	# Set the secret list depending on the difficulty
	self.secret = self.secret_list[secret_word_used]
	self.secret = self.secret.lower()

	self.init_time = time.time()

	# Store the secret word in the words list
	self.words = [self.Config.secret_word]

	# Store the score in the scores list
	self.scores = [10]

	# Initialize the game variables
	self.win = False
	self.n = 0
	self.recent_hint = 0
	self.f_dev_avg = 0
	self.last_hint = -1
	self.difficulty = difficulty

	# Set the number of hints depending on the difficulty
	if self.difficulty == 1:
	self.n = 3

	# Define the function to generate the ranking file
	def gen_rank(self, repeated):
	ascending_indices = np.argsort(self.scores)
	descending_indices = list(ascending_indices[::-1])
	self.ranking_data.clear()
	k = len(self.words) - 1
	if repeated != -1:
	k = repeated

	self.ranking_data.append(["#" + str(k), self.words[k], self.scores[k]])

	self.ranking_data.append("---------------------------")
	for i in descending_indices:
	if i == 0:
	continue
	self.ranking_data.append(["#" + str(i), self.words[i], self.scores[i]])

	with open(self.data_path + self.ranking_path, "w+") as file:
	for item in self.ranking_data:
	file.write("%s\n" % item)

	self.ranking_msg = ""
	for item in self.ranking_data:
	self.ranking_msg += f"{item}\n"

	# Define the function to play the game with the selected word
	def play_game(self, word, model_class):

	# Convert the word to lowercase
	word = word.lower().strip()

	# Check if the user wants to give up
	if word == "give_up":
	text = (
	"[lose]"
	+ str(self.Config.Feedback_9)
	+ self.secret
	+ "\n\n"
	+ self.Config.Feedback_10
	)
	return text

	# Check if the word is repeated
	if word in self.words:
	repeated = self.words.index(word)
	else:
	repeated = -1
	self.words.append(word)

	# Check if the word is in the model already
	if word not in model_class.model.key_to_index.keys():
	# If the word is not in the model, remove it from the words list and provide feedback
	self.words.pop(len(self.words) - 1)
	feedback = (
	"I don't know that word. Try another word."
	if self.lang == 1
	else "No conozco esa palabra. Prueba con otra palabra."
	)

	feedback += "[rank]" + self.ranking_msg if len(self.words) > 1 else "\n\n"
	return feedback

	similarity = model_class.model.similarity(self.secret, word)

	if self.model_type == "word2vec":
	score = np.round(similarity * 10, 2)
	else:
	# log_similarity = np.log10(similarity * 10) if np.any(similarity > 0) else 0
	# score = np.round(
	# np.interp(
	# log_similarity,
	# [0, np.log10(10)],
	# [0, 10],
	# ),
	# 2,
	# )
	score = np.round(similarity * 10, 2)

	# Remove the word from the score list if it is repeated
	if repeated == -1:
	self.scores.append(score)

	# Generate the feedback message depending on the score
	if score <= 2.5:
	feedback = self.Config.Feedback_0 + str(score)
	elif score > 2.5 and score <= 6.0:
	feedback = self.Config.Feedback_1 + str(score)
	elif score > 6.0 and score <= 7.0:
	feedback = self.Config.Feedback_2 + str(score)
	elif score > 7.0 and score <= 8:
	feedback = self.Config.Feedback_3 + str(score)
	elif score > 8 and score <= 9.0:
	feedback = self.Config.Feedback_4 + str(score)
	elif score > 9.0 and score < 10.0:
	feedback = self.Config.Feedback_5 + str(score)
	# If the score is 10, the user wins the game
	else:
	self.win = True
	feedback = "[win]" + self.Config.Feedback_8
	self.words[0] = self.secret
	self.words.pop(len(self.words) - 1)
	self.scores.pop(len(self.scores) - 1)

	# Generate the feedback message depending on the score and the previous score
	if score > self.scores[len(self.scores) - 2] and self.win == False:
	feedback += "\n" + self.Config.Feedback_6
	elif score < self.scores[len(self.scores) - 2] and self.win == False:
	feedback += "\n" + self.Config.Feedback_7

	## Hint generation
	# If the difficulty is not 4, calculate the moving average of the scores and the tendency slope
	if self.difficulty != 4 and len(self.scores) > 1:
	mov_avg = calculate_moving_average(self.scores[1:], 5)

	# If the moving average has more than one element and the user has not won yet, calculate the tendency slope and the moving average of the tendency slope
	if len(mov_avg) > 1 and self.win == False:
	f_dev = calculate_tendency_slope(mov_avg)
	f_dev_avg = calculate_moving_average(f_dev, 3)

	# If the tendency slope is negative and the hint has not been given recently (at least three rounds earlier), generate a hint
	if f_dev_avg[len(f_dev_avg) - 1] < 0 and self.recent_hint == 0:

	# Generate a random hint intro from the hint list
	i = random.randint(0, len(self.Config.hint_intro) - 1)
	feedback += "\n\n[hint]" + self.Config.hint_intro[i]

	# Generate a dynamic hint
	hint_text, self.n, self.last_hint = hint(
	self.secret,
	self.n,
	model_class.model_st,
	self.last_hint,
	self.lang,
	(
	self.DictWrapper(self.Config_full["ENG"]["Hint"])
	if self.lang == 1
	else self.DictWrapper(self.Config_full["SPA"]["Hint"])
	),
	)
	feedback += "\n" + hint_text
	self.recent_hint = 3

	if self.recent_hint != 0:
	self.recent_hint -= 1

	# Generate the ranking file
	self.gen_rank(repeated)

	# Add the ranking file to the feedback message
	feedback += "[rank]" + self.ranking_msg if len(self.words) > 1 else "\n\n"

	# Return the feedback message
	return feedback

	# Define the function to generate a curiosity hint once the game is over
	def curiosity(self):

	# Generate a curiosity aboyt the secret word
	feedback = curiosity(
	self.secret,
	(
	self.DictWrapper(self.Config_full["ENG"]["Hint"])
	if self.lang == 1
	else self.DictWrapper(self.Config_full["SPA"]["Hint"])
	),
	)

	# Save the ranking file with the plays of the user if the user wins
	with open(self.data_path + self.ranking_path, "r") as original_file:
	file_content = original_file.readlines()[2:]
	new_file_name = f"{self.session_id}-{self.secret}.json"
	play_data = {
	"session_id": self.session_id,
	"datetime": str(datetime.now()),
	"time": time.time() - self.init_time,
	"data": file_content,
	"win": self.win,
	"secret": self.secret,
	"number_of_hints": self.n,
	}

	with open(self.data_path + "plays/" + new_file_name, "w") as new_file:
	json.dump(play_data, new_file, indent=4)

	# Return the feedback message
	return feedback

	def get_session_id(self):
	return self.session_id