Upload folder using huggingface_hub
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":1,"metadata":{"id":"Rli_enT6lBDT","executionInfo":{"status":"ok","timestamp":1737395007014,"user_tz":-540,"elapsed":5212,"user":{"displayName":"Frederikus Hudi","userId":"06160664103998835801"}}},"outputs":[],"source":["##%%\n","import os\n","import pickle\n","import json\n","# import random\n","# import torch\n","# import numpy as np\n","# import argparse\n","# import cohere\n","# from openai import OpenAI\n"]},{"cell_type":"code","source":["##%%\n","# import hashlib\n","from tqdm import tqdm\n","from itertools import product\n","# from collections import Counter\n","\n","# from transformers import LlamaForCausalLM, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM\n","from transformers import AutoTokenizer, AutoModelForCausalLM\n","from textgames import GAME_NAMES, LEVEL_IDS, game_filename, _game_class_from_name\n"],"metadata":{"id":"dp1F32B8oSfD","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1737395010583,"user_tz":-540,"elapsed":3547,"user":{"displayName":"Frederikus Hudi","userId":"06160664103998835801"}},"outputId":"e9adeb5f-70eb-4ca9-dcbb-428e4b28ab41"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stderr","text":["/home/is/frederikus-h/miniconda3/envs/textgame/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n"," from .autonotebook import tqdm as notebook_tqdm\n"]}]},{"cell_type":"code","source":["os.environ.setdefault(\"TEXTGAMES_OUTPUT_DIR\", \"user_outputs\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"2wEu1V1wvxn0","executionInfo":{"status":"ok","timestamp":1737395010664,"user_tz":-540,"elapsed":67,"user":{"displayName":"Frederikus Hudi","userId":"06160664103998835801"}},"outputId":"cdcad20f-e357-4009-9f4f-0d4495ebd894"},"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/plain":["'user_outputs'"]},"metadata":{},"execution_count":3}]},{"cell_type":"code","source":["##%%\n","gen_model_checkpoint = \"google/gemma-2-9b-it\"\n","quantize = True"],"metadata":{"id":"jZF8bkUcojTX","executionInfo":{"status":"ok","timestamp":1737395010678,"user_tz":-540,"elapsed":13,"user":{"displayName":"Frederikus Hudi","userId":"06160664103998835801"}}},"execution_count":4,"outputs":[]},{"cell_type":"code","source":["kwargs = {\n"," \"device_map\": \"auto\",\n","} if quantize else {}"],"metadata":{"id":"VAF5sR9arYzS","executionInfo":{"status":"ok","timestamp":1737395010683,"user_tz":-540,"elapsed":2,"user":{"displayName":"Frederikus Hudi","userId":"06160664103998835801"}}},"execution_count":5,"outputs":[]},{"cell_type":"code","source":["##%%\n","gen_model = AutoModelForCausalLM.from_pretrained(gen_model_checkpoint, **kwargs)\n","tokenizer = AutoTokenizer.from_pretrained(gen_model_checkpoint, **kwargs)"],"metadata":{"id":"tzqldl8ooRVL","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1737395038547,"user_tz":-540,"elapsed":27859,"user":{"displayName":"Frederikus Hudi","userId":"06160664103998835801"}},"outputId":"902b638c-e6ce-4f8a-bba2-e9f7241c9a27"},"execution_count":6,"outputs":[{"output_type":"stream","name":"stderr","text":["Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:24<00:00, 6.19s/it]\n"]}]},{"cell_type":"code","source":["gen_model.device"],"metadata":{"id":"FeBUXdkWsWrL","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1737395038552,"user_tz":-540,"elapsed":3,"user":{"displayName":"Frederikus Hudi","userId":"06160664103998835801"}},"outputId":"6437d1b7-02f8-47f5-d519-e979cefde795"},"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":["device(type='cuda', index=0)"]},"metadata":{},"execution_count":7}]},{"cell_type":"code","source":["def get_gemma_response(text):\n"," # global gen_model, tokenizer\n"," messages = [\n"," {\"role\": \"user\", \"content\": text},\n"," ]\n","\n"," input_ids = tokenizer.apply_chat_template(\n"," messages,\n"," add_generation_prompt=True,\n"," return_tensors=\"pt\"\n"," ).to(gen_model.device)\n","\n"," terminators = [\n"," tokenizer.eos_token_id,\n"," tokenizer.convert_tokens_to_ids(\"<|eot_id|>\")\n"," ]\n","\n"," outputs = gen_model.generate(\n"," input_ids,\n"," max_new_tokens=100,\n"," eos_token_id=terminators,\n"," do_sample=True,\n"," temperature=.001,\n"," top_p=1,\n"," )\n","\n"," response = outputs[0][input_ids.shape[-1]:]\n"," return tokenizer.decode(response, skip_special_tokens=True)"],"metadata":{"id":"R5D4K-P2sPaj","executionInfo":{"status":"ok","timestamp":1737395038554,"user_tz":-540,"elapsed":1,"user":{"displayName":"Frederikus Hudi","userId":"06160664103998835801"}}},"execution_count":8,"outputs":[]},{"cell_type":"markdown","source":["---\n","Example Call"],"metadata":{"id":"s5FEwOOvxf4h"}},{"cell_type":"code","source":["# @title\n","text = \\\n","\"\"\"\n","Given a set of rules to calculate point, sort the set of words in decreasing order.\n","When there 2 or more words with same point, sort lexicographically.\n","\n","Rules:\n","- every pair of consecutive consonant gets 5 points\n","- every pair of consecutive vowel gets 3 points\n","- add 1 point if there exists exactly 1 'g' in the word\n","- word less than 5 characters gets 10 points\n","- word starts with gen gets 100 points\n","- word ends with ta gets -1000 point\n","\n","Words:\n","- genta\n","- winata\n","- hudi\n","- alham\n","- aji\n","- ruochen\n","\n","Print only the answer.\n","\"\"\"\n","\n","print(text)"],"metadata":{"id":"T_tk4hTGsxsR","colab":{"base_uri":"https://localhost:8080/"},"cellView":"form","executionInfo":{"status":"ok","timestamp":1737392776367,"user_tz":-540,"elapsed":27,"user":{"displayName":"Frederikus Hudi","userId":"06160664103998835801"}},"outputId":"d5ea884f-d0fa-4134-ecd9-690eab51c976"},"execution_count":14,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","Given a set of rules to calculate point, sort the set of words in decreasing order.\n","When there 2 or more words with same point, sort lexicographically.\n","\n","Rules:\n","- every pair of consecutive consonant gets 5 points\n","- every pair of consecutive vowel gets 3 points\n","- add 1 point if there exists exactly 1 'g' in the word\n","- word less than 5 characters gets 10 points\n","- word starts with gen gets 100 points\n","- word ends with ta gets -1000 point\n","\n","Words:\n","- genta\n","- winata\n","- hudi\n","- alham\n","- aji\n","- ruochen\n","\n","Print only the answer.\n","\n"]}]},{"cell_type":"code","source":["# Gold Answer:\n","# - aji 10\n","# - hudi 10\n","# - ruochen 5 3\n","# - alham 5\n","# - genta 5 1 100 -1000\n","# - winata -1000"],"metadata":{"id":"G-5yS4S-rdsN"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["print(get_gemma_response(text))"],"metadata":{"id":"05OI36v6vGoY","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1737392724119,"user_tz":-540,"elapsed":6741,"user":{"displayName":"Frederikus Hudi","userId":"06160664103998835801"}},"outputId":"fe5d6ed2-d063-4f1c-b2e1-b3af8dbc456e"},"execution_count":9,"outputs":[{"output_type":"stream","name":"stdout","text":["genta\n","winata\n","ruochen\n","hudi\n","alham\n","aji \n","\n"]}]},{"cell_type":"markdown","source":["---\n","Automate run all sessions"],"metadata":{"id":"cxJ4WqHpxi75"}},{"cell_type":"code","source":["for game_name, difficulty_level in product([GAME_NAMES[4], *GAME_NAMES[:4], *GAME_NAMES[5:]], LEVEL_IDS[:3]):\n"," game_cls = _game_class_from_name(game_name)\n"," with open(f\"problemsets/{game_filename(game_name)}_{difficulty_level}.json\", \"r\", encoding=\"utf8\") as f:\n"," sid_prompt_dict = json.load(f)\n","\n"," correct_cnt = 0\n"," for sid, prompt in tqdm(list(sid_prompt_dict.items()), desc=f\"{game_filename(game_name)}_-_{difficulty_level}\"):\n"," cur_game = game_cls()\n"," cur_game.load_game(prompt)\n"," response = get_gemma_response(cur_game.get_prompt()).strip()\n"," solved, val_msg = cur_game.validate(response)\n"," with open(f\"model_outputs/results_gemma_2_9B_it.pkl\", \"ab\") as o:\n"," pickle.dump((f\"{game_filename(game_name)}_{difficulty_level}\", sid, response, (solved, val_msg)), o)\n"," if solved:\n"," correct_cnt += 1\n","\n"," print(f\"{game_name}_-_{difficulty_level}\")\n"," print(f\" Acc.: {correct_cnt / len(sid_prompt_dict):.2%}\")"],"metadata":{"id":"hCTXYpXa1UQ6"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":[],"metadata":{"id":"GC-zkVI52IJX"},"execution_count":null,"outputs":[]}]}
712 |
713 |
# %%
714 |
def check_to_start_new_game(game_name, level, user=None, uid=None, sid=None):
715 |
print(game_name, level, uid, sid)
716 |
if game_name is None or level is None:
717 |
raise gr.Error("please choose both Game & Level")
718 |
fp = _get_file_output(game_name, LEVEL_IDS[LEVELS.index(level)], f"{uid}_{sid}")
719 |
if os.path.exists(fp):
720 |
# raise gr.Error(f"You have done this game already.<br/>{game_name} - {level}")
721 |
gr.Warning("You have done this game already. Only first attempt is recorded in the scoreboard.")
722 |
if user is None:
723 |
gr.Warning("no user, game will be generated randomly")
724 |
# else:
730 |
731 |
732 |
# %%
733 |
def check_played_game(solved_games, user, progress=gr.Progress()):
734 |
uid = user['email']
735 |
sid = user.get('sid', None)
736 |
matches = _files.list(
737 |
q=f"'{_folder_id}' in parents and mimeType='application/octet-stream' and name contains '{uid}_-_'",
738 |
fields=f"files(name, id, {_cksm_methods_str})",
739 |
740 |
matches = matches['files']
741 |
ret = dict()
742 |
for game_name in solved_games.keys():
743 |
cur = []
744 |
for level, level_id in zip(LEVELS, LEVEL_IDS):
745 |
fp_out = _get_file_output(game_name, level_id, f"{uid}_{sid}")
746 |
_matches = list(filter(lambda m: fp_out.endswith(m['name']), matches))
747 |
if os.path.exists(fp_out):
748 |
upload_to_drive(fp_out, _matches)
750 |
download_from_drive(fp_out, _matches)
751 |
if os.path.exists(fp_out):
752 |
753 |
ret[game_name] = cur or '∅'
754 |
return ret, gr.update()
755 |
756 |
@@ -130,7 +130,7 @@ with gr.Blocks(title="TextGames") as login_demo:
130 |
app = gr.mount_gradio_app(app, login_demo, path="/login")
131 |
132 |
with gr.Blocks(title="TextGames", css=css, delete_cache=(3600, 3600)) as demo:
133 |
((m, logout_btn, solved_games_df, game_radio, level_radio, new_game_btn, render_toggle),
134 |
(session_state, is_solved, solved_games, user_state, uid_state),
135 |
) = declare_components(demo, greet)
136 |
130 |
app = gr.mount_gradio_app(app, login_demo, path="/login")
131 |
132 |
with gr.Blocks(title="TextGames", css=css, delete_cache=(3600, 3600)) as demo:
133 |
((m, logout_btn, solved_games_df, game_radio, level_radio, new_game_btn, render_toggle, reset_sid_btn),
134 |
(session_state, is_solved, solved_games, user_state, uid_state),
135 |
) = declare_components(demo, greet)
136 |
@@ -0,0 +1,132 @@
1 |
from __future__ import annotations
2 |
3 |
4 |
import os
5 |
# os.environ.setdefault("GRADIO_SERVER_PORT", "1080")
6 |
# os.environ.setdefault("TEXTGAMES_SHOW_HIDDEN_LEVEL", "1")
7 |
os.environ.setdefault("TEXTGAMES_LOADGAME_DIR", "problemsets")
8 |
os.environ.setdefault("TEXTGAMES_LOADGAME_ID", "42")
9 |
os.environ.setdefault("TEXTGAMES_MOCKUSER", "")
10 |
os.environ.setdefault("TEXTGAMES_OUTPUT_DIR", "user_outputs")
11 |
favicon_path = "textgames-scrabble-black2-ss.png"
12 |
13 |
14 |
from play_helper import css, declare_components, start_new_game, check_played_game, download_from_drive, upload_to_drive, _leaderboards
15 |
import pandas as pd
16 |
import gradio as gr
17 |
import random
18 |
import json
19 |
from textgames import GAME_NAMES
20 |
21 |
22 |
23 |
os.makedirs(os.getenv('TEXTGAMES_OUTPUT_DIR', '.'), exist_ok=True)
24 |
25 |
26 |
27 |
def generate_sid(fp):
28 |
rand_int = random.randint(0, 1000)
29 |
with open(fp, "w", encoding="utf8") as f:
30 |
31 |
upload_to_drive(fp, mime_type="text/plain", update=True)
32 |
33 |
34 |
35 |
def get_sid(uid, force_generate_sid=False):
36 |
fp = f"{os.getenv('TEXTGAMES_OUTPUT_DIR')}/{uid}_sid.txt"
37 |
if force_generate_sid:
38 |
39 |
if not os.path.exists(fp):
40 |
download_from_drive(fp, mime_type="text/plain", compare_checksum=False)
41 |
if not os.path.exists(fp):
42 |
43 |
with open(fp, "r", encoding="utf8") as f:
44 |
sid = [_ for _ in f][-1]
45 |
return sid.strip()
46 |
47 |
48 |
49 |
def greet(request: gr.OAuthProfile | None):
50 |
user = {'email': os.getenv('TEXTGAMES_MOCKUSER', ''), 'name': ""}
51 |
if request is not None:
52 |
user = {'email': request.username, 'name': request.name, 'sid': get_sid(request.username)}
53 |
return f"""
54 |
Welcome to TextGames, {user['name'] or 'please login'}!
55 |
""", user, user['email']
56 |
57 |
58 |
59 |
with gr.Blocks(title="TextGames", css=css, delete_cache=(3600, 3600)) as demo:
60 |
((m, logout_btn, solved_games_df, game_radio, level_radio, new_game_btn, render_toggle, reset_sid_btn),
61 |
(session_state, is_solved, solved_games, user_state, uid_state),
62 |
) = declare_components(demo, greet, use_login_button=True)
63 |
64 |
65 |
reset_sid_checkbox = gr.Checkbox(False, visible=False, interactive=False)
66 |
67 |
lambda: [gr.update(interactive=False)]*2, None, [reset_sid_btn, new_game_btn]
68 |
69 |
lambda x: x, [reset_sid_checkbox], [reset_sid_checkbox],
70 |
js="(x) => confirm('Reset Progress? (cannot be undone)')"
71 |
72 |
lambda: [gr.update(interactive=True)]*2, None, [reset_sid_btn, new_game_btn]
73 |
74 |
75 |
def _resetting(confirmed, user):
76 |
uid = user.get('email', None) if isinstance(user, dict) else None
77 |
if uid is None:
78 |
gr.Warning("You need to log in first!")
79 |
elif confirmed:
80 |
user['sid'] = get_sid(uid, force_generate_sid=True)
81 |
return user, False
82 |
83 |
lambda: [gr.update(interactive=False)]*3, None, [logout_btn, reset_sid_btn, new_game_btn]
84 |
85 |
_resetting, [reset_sid_checkbox, user_state], [user_state, reset_sid_checkbox]
86 |
87 |
check_played_game, [solved_games, user_state], [solved_games, solved_games_df]
88 |
89 |
lambda: [gr.update(interactive=True)]*3, None, [logout_btn, reset_sid_btn, new_game_btn]
90 |
91 |
92 |
93 |
@gr.render(inputs=[game_radio, level_radio, user_state, session_state, uid_state], triggers=[render_toggle.change])
94 |
def _start_new_game(game_name, level, user, _session_state, _uid_state):
95 |
if _session_state in [1, 2]:
96 |
start_new_game(game_name, level, session_state, is_solved, solved_games, user=user, uid=_uid_state)
97 |
98 |
99 |
with demo.route("Leaderboards", "/leaderboard") as demo_leaderboard:
100 |
gr.Markdown("Under Construction. Will be available soon.")
101 |
leaderboards = []
102 |
for tab in ["🚅 Easy", "🚀 Medium", "🛸 Hard"]:
103 |
with gr.Tab(tab):
104 |
105 |
106 |
# if os.path.exists(_leaderboards):
107 |
# datas = []
108 |
# with open(_leaderboards, "r", encoding="utf8") as f:
109 |
# for line in f:
110 |
# datas.append(json.loads(line))
111 |
# concat = [{'Level': d['difficulty_level'], 'User': d['uid'], 'Game': d['game_name'].split('\t', 1)[0], 'Attempts': d['turns'],
112 |
# "Time": d['ed'] - d['st']} for d in datas]
113 |
# else:
114 |
def add_dummies():
115 |
return pd.DataFrame({
116 |
'User': ['dummy'],
117 |
'Solved': [' '.join([g.split('\t', 1)[0] for g in GAME_NAMES])],
118 |
'Attempts': [8],
119 |
'Time': [7200.8],
120 |
121 |
for l in leaderboards:
122 |
demo_leaderboard.load(add_dummies, None, [l])
123 |
124 |
125 |
126 |
# demo.launch()
127 |
128 |
favicon_path=favicon_path if os.path.exists(favicon_path) else None,
129 |
130 |
131 |
132 |
problemsets/Anagram Scribble_1.json
The diff for this file is too large to render.
See raw diff
problemsets/Anagram Scribble_2.json
The diff for this file is too large to render.
See raw diff
problemsets/Anagram Scribble_3.json
The diff for this file is too large to render.
See raw diff
problemsets/Bracket Game_1.json
The diff for this file is too large to render.
See raw diff
problemsets/Bracket Game_2.json
The diff for this file is too large to render.
See raw diff
problemsets/Bracket Game_3.json
The diff for this file is too large to render.
See raw diff
problemsets/Crossword Arranger_1.json
The diff for this file is too large to render.
See raw diff
problemsets/Crossword Arranger_2.json
The diff for this file is too large to render.
See raw diff
problemsets/Crossword Arranger_3.json
The diff for this file is too large to render.
See raw diff
@@ -0,0 +1,87 @@
1 |
import os
2 |
import json
3 |
from tqdm import tqdm
4 |
from textgames import GAME_NAMES, game_filename, _game_class_from_name
5 |
from pathlib import Path
6 |
7 |
8 |
PROBLEMSET_DIR = Path(os.getenv("TG_PROBLEMSET_DIR", "problemsets"))
9 |
MODEL_OUTPUT_DIR = Path(os.getenv("TG_MODEL_OUTPUT_DIR", "model_outputs"))
10 |
11 |
# "results_gemma-2-9b-it.1s.jsonl",
12 |
# "results_gemma-2-9b-it.zs.jsonl",
13 |
# "results_gemma-2-27b-it.1s.jsonl",
14 |
# "results_gemma-2-27b-it.zs.jsonl",
15 |
16 |
# "results_llama-3.1-8b-instruct.1s.jsonl",
17 |
# "results_llama-3.1-8b-instruct.zs.jsonl",
18 |
# "results_llama-3.1-70b-instruct.1s.jsonl",
19 |
# "results_llama-3.1-70b-instruct.zs.jsonl",
20 |
# "results_llama-3.3-70b-instruct.1s.jsonl",
21 |
# "results_llama-3.3-70b-instruct.zs.jsonl",
22 |
23 |
# "results_qwen2-5-7b-instruct.1s.jsonl",
24 |
# "results_qwen2-5-7b-instruct.zs.jsonl",
25 |
# "results_qwen2-5-14b-instruct.1s.jsonl",
26 |
# "results_qwen2-5-14b-instruct.zs.jsonl",
27 |
# "results_qwen2-5-32b-instruct.1s.jsonl",
28 |
# "results_qwen2-5-32b-instruct.zs.jsonl",
29 |
# "results_qwen2-5-72b-instruct.1s.jsonl",
30 |
# "results_qwen2-5-72b-instruct.zs.jsonl",
31 |
32 |
# "results_deepseek-r1-distill-14b.1s.jsonl",
33 |
# "results_deepseek-r1-distill-14b.zs.jsonl",
34 |
# "results_deepseek-r1-distill-14b.rerun.1s.jsonl",
35 |
36 |
# "results_chatgpt-4o-mini.zs.jsonl",
37 |
# "results_chatgpt-o3-mini.zs.jsonl",
38 |
39 |
# "results_qwen2-5-7b-instruct_sp.1s.jsonl",
40 |
# "results_qwen2-5-7b-instruct_sp.zs.jsonl",
41 |
42 |
# "results_deepseek-r1-distill-8b.1s.jsonl",
43 |
44 |
45 |
46 |
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
47 |
# !!! Must run reval_bracket_rerun.py first !!!
48 |
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
49 |
50 |
51 |
def revalidate_anagram_3(fp, reval_dir="revalidate_anagram_3", source_dir="prior_revalidate"):
52 |
os.makedirs(MODEL_OUTPUT_DIR/reval_dir, exist_ok=True)
53 |
count_pos, count_neg = 0, 0
54 |
with (open(MODEL_OUTPUT_DIR/source_dir/fp, "r", encoding="utf8") as i,
55 |
open(MODEL_OUTPUT_DIR/reval_dir/fp, "w", encoding="utf8") as o,
56 |
tqdm(total=1000, desc=fp) as pbar,
57 |
58 |
for line in i:
59 |
res = json.loads(line)
60 |
if (res['game'] == f"{game_filename(GAME_NAME)}_3"):
61 |
if (res['turn'] == 1):
62 |
cur_sid = res["session"]
63 |
prompt = sid_prompt_dict[cur_sid]
64 |
cur_game = game_cls()
65 |
66 |
67 |
elif solved == True:
68 |
69 |
70 |
assert cur_sid == res["session"]
71 |
solved, _ = cur_game.validate(res["response"])
72 |
if solved and not res["solved"]:
73 |
count_pos += 1
74 |
elif not solved and res["solved"]:
75 |
count_neg += 1
76 |
res["solved"] = solved
77 |
78 |
79 |
return count_pos, count_neg
80 |
81 |
82 |
if __name__ == "__main__":
83 |
game_cls = _game_class_from_name(GAME_NAME)
84 |
with open(f"{PROBLEMSET_DIR}/{game_filename(GAME_NAME)}_3.json", "r", encoding="utf8") as f:
85 |
sid_prompt_dict = json.load(f)
86 |
87 |
@@ -0,0 +1,94 @@
1 |
import os
2 |
import json
3 |
from tqdm import tqdm
4 |
from textgames import GAME_NAMES, game_filename, _game_class_from_name
5 |
from pathlib import Path
6 |
7 |
8 |
PROBLEMSET_DIR = Path(os.getenv("TG_PROBLEMSET_DIR", "problemsets"))
9 |
MODEL_OUTPUT_DIR = Path(os.getenv("TG_MODEL_OUTPUT_DIR", "model_outputs"))
10 |
11 |
# "results_gemma-2-9b-it.1s.jsonl",
12 |
# "results_gemma-2-9b-it.zs.jsonl",
13 |
# "results_gemma-2-27b-it.1s.jsonl",
14 |
# "results_gemma-2-27b-it.zs.jsonl",
15 |
16 |
# "results_llama-3.1-8b-instruct.1s.jsonl",
17 |
# "results_llama-3.1-8b-instruct.zs.jsonl",
18 |
# "results_llama-3.1-70b-instruct.1s.jsonl",
19 |
# "results_llama-3.1-70b-instruct.zs.jsonl",
20 |
# "results_llama-3.3-70b-instruct.1s.jsonl",
21 |
# "results_llama-3.3-70b-instruct.zs.jsonl",
22 |
23 |
# "results_qwen2-5-7b-instruct.1s.jsonl",
24 |
# "results_qwen2-5-7b-instruct.zs.jsonl",
25 |
# "results_qwen2-5-14b-instruct.1s.jsonl",
26 |
# "results_qwen2-5-14b-instruct.zs.jsonl",
27 |
# "results_qwen2-5-32b-instruct.1s.jsonl",
28 |
# "results_qwen2-5-32b-instruct.zs.jsonl",
29 |
# "results_qwen2-5-72b-instruct.1s.jsonl",
30 |
# "results_qwen2-5-72b-instruct.zs.jsonl",
31 |
32 |
# "results_deepseek-r1-distill-14b.1s.jsonl",
33 |
# "results_deepseek-r1-distill-14b.zs.jsonl",
34 |
# "results_deepseek-r1-distill-14b.rerun.1s.jsonl",
35 |
36 |
# "results_chatgpt-4o-mini.1s.jsonl",
37 |
# "results_chatgpt-4o-mini.zs.jsonl",
38 |
# "results_chatgpt-o3-mini.zs.jsonl",
39 |
40 |
# "results_qwen2-5-7b-instruct_sp.1s.jsonl",
41 |
# "results_qwen2-5-7b-instruct_sp.zs.jsonl",
42 |
43 |
# "results_deepseek-r1-distill-8b.1s.jsonl",
44 |
45 |
46 |
47 |
48 |
def revalidate_bracket(fp, reval_dir="revalidate_bracket_all",
49 |
source_dirs=("revalidate_bracket_rerun", "revalidate_anagram_3",)):
50 |
os.makedirs(MODEL_OUTPUT_DIR/reval_dir, exist_ok=True)
51 |
count_pos, count_neg = 0, 0
52 |
source_dir = "."
53 |
for source_dir in source_dirs:
54 |
if (MODEL_OUTPUT_DIR / source_dir / fp).exists():
55 |
56 |
with (open(MODEL_OUTPUT_DIR / source_dir / fp, "r", encoding="utf8") as i,
57 |
open(MODEL_OUTPUT_DIR / reval_dir / fp, "w", encoding="utf8") as o,
58 |
tqdm(total=3000, desc=fp) as pbar,
59 |
60 |
for line in i:
61 |
res = json.loads(line)
62 |
if (res['game'].startswith(f"{game_filename(GAME_NAME)}")):
63 |
sid_prompt_dict = sid_prompt_dicts[res['game'].rsplit("_", 1)[-1]]
64 |
if (res['turn'] == 1):
65 |
cur_sid = res["session"]
66 |
prompt = sid_prompt_dict[cur_sid]
67 |
cur_game = game_cls()
68 |
69 |
70 |
elif solved == True:
71 |
72 |
73 |
assert cur_sid == res["session"]
74 |
solved, _ = cur_game.validate(res["response"])
75 |
if solved and not res["solved"]:
76 |
count_pos += 1
77 |
elif not solved and res["solved"]:
78 |
count_neg += 1
79 |
res["solved"] = solved
80 |
81 |
82 |
return count_pos, count_neg
83 |
84 |
85 |
if __name__ == "__main__":
86 |
def load(k):
87 |
with open(f"{PROBLEMSET_DIR}/{game_filename(GAME_NAME)}_{k}.json", "r", encoding="utf8") as f:
88 |
sid_prompt_dict = json.load(f)
89 |
return sid_prompt_dict
90 |
sid_prompt_dicts = {k: load(k) for k in map(str, range(1, 4))}
91 |
game_cls = _game_class_from_name(GAME_NAME)
92 |
93 |
94 |
@@ -0,0 +1,46 @@
1 |
# @title ##### Combine Rerun of the Bracket - All
2 |
import os
3 |
import json
4 |
from tqdm import tqdm
5 |
from pathlib import Path
6 |
7 |
MODEL_OUTPUT_DIR = Path(os.getenv("TG_MODEL_OUTPUT_DIR", "model_outputs"))
8 |
fd_new = MODEL_OUTPUT_DIR / "__runs__" / "_redo_bracket"
9 |
fd_ori = MODEL_OUTPUT_DIR / "revalidate_anagram_3"
10 |
fd_out = MODEL_OUTPUT_DIR / "revalidate_bracket_rerun"
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
os.makedirs(fd_out, exist_ok=True)
36 |
for fp in tqdm(OUTPUT_FILENAMES):
37 |
with open(fd_out / fp, "w", encoding="utf8") as o:
38 |
with open(fd_ori / fp, "r", encoding="utf8") as i:
39 |
for line in i:
40 |
res = json.loads(line)
41 |
if res['game'].startswith("Bracket Game"):
42 |
43 |
44 |
with open((fd_new / fp).with_suffix(".6.jsonl"), "r", encoding="utf8") as i:
45 |
for line in i:
46 |
@@ -0,0 +1,94 @@
1 |
import os
2 |
import json
3 |
from tqdm import tqdm
4 |
from textgames import GAME_NAMES, game_filename, _game_class_from_name
5 |
from pathlib import Path
6 |
7 |
8 |
PROBLEMSET_DIR = Path(os.getenv("TG_PROBLEMSET_DIR", "problemsets"))
9 |
MODEL_OUTPUT_DIR = Path(os.getenv("TG_MODEL_OUTPUT_DIR", "model_outputs"))
10 |
11 |
# "results_gemma-2-9b-it.1s.jsonl",
12 |
# "results_gemma-2-9b-it.zs.jsonl",
13 |
# "results_gemma-2-27b-it.1s.jsonl",
14 |
# "results_gemma-2-27b-it.zs.jsonl",
15 |
16 |
# "results_llama-3.1-8b-instruct.1s.jsonl",
17 |
# "results_llama-3.1-8b-instruct.zs.jsonl",
18 |
# "results_llama-3.1-70b-instruct.1s.jsonl",
19 |
# "results_llama-3.1-70b-instruct.zs.jsonl",
20 |
# "results_llama-3.3-70b-instruct.1s.jsonl",
21 |
# "results_llama-3.3-70b-instruct.zs.jsonl",
22 |
23 |
# "results_qwen2-5-7b-instruct.1s.jsonl",
24 |
# "results_qwen2-5-7b-instruct.zs.jsonl",
25 |
# "results_qwen2-5-14b-instruct.1s.jsonl",
26 |
# "results_qwen2-5-14b-instruct.zs.jsonl",
27 |
# "results_qwen2-5-32b-instruct.1s.jsonl",
28 |
# "results_qwen2-5-32b-instruct.zs.jsonl",
29 |
# "results_qwen2-5-72b-instruct.1s.jsonl",
30 |
# "results_qwen2-5-72b-instruct.zs.jsonl",
31 |
32 |
# "results_deepseek-r1-distill-14b.1s.jsonl",
33 |
# "results_deepseek-r1-distill-14b.zs.jsonl",
34 |
# # "results_deepseek-r1-distill-14b.rerun.1s.jsonl",
35 |
36 |
# "results_chatgpt-4o-mini.1s.jsonl",
37 |
# "results_chatgpt-4o-mini.zs.jsonl",
38 |
# "results_chatgpt-o3-mini.zs.jsonl",
39 |
40 |
# # "results_qwen2-5-7b-instruct_sp.1s.jsonl",
41 |
# # "results_qwen2-5-7b-instruct_sp.zs.jsonl",
42 |
43 |
44 |
45 |
46 |
47 |
48 |
def revalidate_bracket(fp, reval_dir="revalidate_crosswords_all",
49 |
50 |
os.makedirs(MODEL_OUTPUT_DIR/reval_dir, exist_ok=True)
51 |
count_pos, count_neg = 0, 0
52 |
source_dir = "."
53 |
for source_dir in source_dirs:
54 |
if (MODEL_OUTPUT_DIR / source_dir / fp).exists():
55 |
56 |
with (open(MODEL_OUTPUT_DIR / source_dir / fp, "r", encoding="utf8") as i,
57 |
open(MODEL_OUTPUT_DIR / reval_dir / fp, "w", encoding="utf8") as o,
58 |
tqdm(total=3000, desc=fp) as pbar,
59 |
60 |
for line in i:
61 |
res = json.loads(line)
62 |
if (res['game'].startswith(f"{game_filename(GAME_NAME)}")):
63 |
sid_prompt_dict = sid_prompt_dicts[res['game'].rsplit("_", 1)[-1]]
64 |
if (res['turn'] == 1):
65 |
cur_sid = res["session"]
66 |
prompt = sid_prompt_dict[cur_sid]
67 |
cur_game = game_cls()
68 |
69 |
70 |
elif solved == True:
71 |
72 |
73 |
assert cur_sid == res["session"]
74 |
solved, _ = cur_game.validate(res["response"])
75 |
if solved and not res["solved"]:
76 |
count_pos += 1
77 |
elif not solved and res["solved"]:
78 |
count_neg += 1
79 |
res["solved"] = solved
80 |
81 |
82 |
return count_pos, count_neg
83 |
84 |
85 |
if __name__ == "__main__":
86 |
def load(k):
87 |
with open(f"{PROBLEMSET_DIR}/{game_filename(GAME_NAME)}_{k}.json", "r", encoding="utf8") as f:
88 |
sid_prompt_dict = json.load(f)
89 |
return sid_prompt_dict
90 |
sid_prompt_dicts = {k: load(k) for k in map(str, range(1, 4))}
91 |
game_cls = _game_class_from_name(GAME_NAME)
92 |
93 |
94 |
@@ -0,0 +1,94 @@
1 |
import os
2 |
import json
3 |
from tqdm import tqdm
4 |
from textgames import GAME_NAMES, game_filename, _game_class_from_name
5 |
from pathlib import Path
6 |
7 |
8 |
PROBLEMSET_DIR = Path(os.getenv("TG_PROBLEMSET_DIR", "problemsets"))
9 |
MODEL_OUTPUT_DIR = Path(os.getenv("TG_MODEL_OUTPUT_DIR", "model_outputs"))
10 |
11 |
# "results_gemma-2-9b-it.1s.jsonl",
12 |
# "results_gemma-2-9b-it.zs.jsonl",
13 |
# "results_gemma-2-27b-it.1s.jsonl",
14 |
# "results_gemma-2-27b-it.zs.jsonl",
15 |
16 |
# "results_llama-3.1-8b-instruct.1s.jsonl",
17 |
# "results_llama-3.1-8b-instruct.zs.jsonl",
18 |
# "results_llama-3.1-70b-instruct.1s.jsonl",
19 |
# "results_llama-3.1-70b-instruct.zs.jsonl",
20 |
# "results_llama-3.3-70b-instruct.1s.jsonl",
21 |
# "results_llama-3.3-70b-instruct.zs.jsonl",
22 |
23 |
# "results_qwen2-5-7b-instruct.1s.jsonl",
24 |
# "results_qwen2-5-7b-instruct.zs.jsonl",
25 |
# "results_qwen2-5-14b-instruct.1s.jsonl",
26 |
# "results_qwen2-5-14b-instruct.zs.jsonl",
27 |
# "results_qwen2-5-32b-instruct.1s.jsonl",
28 |
# "results_qwen2-5-32b-instruct.zs.jsonl",
29 |
# "results_qwen2-5-72b-instruct.1s.jsonl",
30 |
# "results_qwen2-5-72b-instruct.zs.jsonl",
31 |
32 |
# "results_deepseek-r1-distill-14b.1s.jsonl",
33 |
# "results_deepseek-r1-distill-14b.zs.jsonl",
34 |
# # "results_deepseek-r1-distill-14b.rerun.1s.jsonl",
35 |
36 |
# "results_chatgpt-4o-mini.1s.jsonl",
37 |
# "results_chatgpt-4o-mini.zs.jsonl",
38 |
# "results_chatgpt-o3-mini.zs.jsonl",
39 |
40 |
# # "results_qwen2-5-7b-instruct_sp.1s.jsonl",
41 |
# # "results_qwen2-5-7b-instruct_sp.zs.jsonl",
42 |
43 |
44 |
45 |
46 |
47 |
48 |
def revalidate_bracket(fp, reval_dir="revalidate_sudoku_all",
49 |
50 |
os.makedirs(MODEL_OUTPUT_DIR/reval_dir, exist_ok=True)
51 |
count_pos, count_neg = 0, 0
52 |
source_dir = "."
53 |
for source_dir in source_dirs:
54 |
if (MODEL_OUTPUT_DIR / source_dir / fp).exists():
55 |
56 |
with (open(MODEL_OUTPUT_DIR / source_dir / fp, "r", encoding="utf8") as i,
57 |
open(MODEL_OUTPUT_DIR / reval_dir / fp, "w", encoding="utf8") as o,
58 |
tqdm(total=3000, desc=fp) as pbar,
59 |
60 |
for line in i:
61 |
res = json.loads(line)
62 |
if (res['game'].startswith(f"{game_filename(GAME_NAME)}")):
63 |
sid_prompt_dict = sid_prompt_dicts[res['game'].rsplit("_", 1)[-1]]
64 |
if (res['turn'] == 1):
65 |
cur_sid = res["session"]
66 |
prompt = sid_prompt_dict[cur_sid]
67 |
cur_game = game_cls()
68 |
69 |
70 |
elif solved == True:
71 |
72 |
73 |
assert cur_sid == res["session"]
74 |
solved, _ = cur_game.validate(res["response"])
75 |
if solved and not res["solved"]:
76 |
count_pos += 1
77 |
elif not solved and res["solved"]:
78 |
count_neg += 1
79 |
res["solved"] = solved
80 |
81 |
82 |
return count_pos, count_neg
83 |
84 |
85 |
if __name__ == "__main__":
86 |
def load(k):
87 |
with open(f"{PROBLEMSET_DIR}/{game_filename(GAME_NAME)}_{k}.json", "r", encoding="utf8") as f:
88 |
sid_prompt_dict = json.load(f)
89 |
return sid_prompt_dict
90 |
sid_prompt_dicts = {k: load(k) for k in map(str, range(1, 4))}
91 |
game_cls = _game_class_from_name(GAME_NAME)
92 |
93 |
94 |
![]() |
![]() |
Git LFS Details
@@ -14,8 +14,10 @@ from pandas import read_csv
14 |
import json
15 |
16 |
17 |
# [
18 |
# "
19 |
20 |
k: v.get_game_name() for k, v in [
21 |
("1", CrosswordArrangerGame),
@@ -60,12 +62,13 @@ def _game_class_from_name(game_name):
60 |
return None
61 |
62 |
63 |
def preload_game(game_name, level_id, user):
64 |
game_cls = _game_class_from_name(game_name)
65 |
66 |
67 |
68 |
69 |
print(f"preload_game('{game_name}', '{level_id}', '{user['email']}') on {sid}")
70 |
71 |
with open(f"problemsets/{game_filename(game_name)}_{level_id}.json", "r", encoding="utf8") as f:
14 |
import json
15 |
16 |
17 |
# [
18 |
# "📰\tCrossword Arranger", "🧩\tText Sudoku", "🏝️\tIslands", "🔑\tPassword Game",
19 |
# "📈\tOrdering Text", "🔤\tAnagram Scribble", "🗳️\tBracket Game", "🔎\tString Search",
20 |
# ]
21 |
22 |
k: v.get_game_name() for k, v in [
23 |
("1", CrosswordArrangerGame),
62 |
return None
63 |
64 |
65 |
def preload_game(game_name, level_id, user, sid=None):
66 |
game_cls = _game_class_from_name(game_name)
67 |
if not sid:
68 |
email_sid_dict = read_csv(
69 |
f"{os.getenv('TEXTGAMES_OUTPUT_DIR')}/textgames_userauth.tsv", sep='\t'
70 |
71 |
sid = email_sid_dict.get(user["email"])
72 |
print(f"preload_game('{game_name}', '{level_id}', '{user['email']}') on {sid}")
73 |
74 |
with open(f"problemsets/{game_filename(game_name)}_{level_id}.json", "r", encoding="utf8") as f:
@@ -5,6 +5,7 @@ import json
5 |
import string
6 |
import re
7 |
8 |
class AnagramScribble(BaseGame):
9 |
10 |
def get_game_name() -> str:
@@ -43,6 +44,18 @@ class AnagramScribble(BaseGame):
43 |
if total_chars_extraction != "Error loading game state.":
44 |
characters = total_chars_extraction.split(",")
45 |
self.total_chars = [char.strip().strip("'") for char in characters]
46 |
47 |
def _generate_new_game(self, *args, **kwargs) -> None:
48 |
self.low_num_chars = kwargs['low_num_chars']
@@ -57,16 +70,16 @@ class AnagramScribble(BaseGame):
57 |
58 |
def _get_prompt(self) -> str:
59 |
if self.allow_repeat:
60 |
prompt = f"Construct a valid {self.num_chars}-character English word from the following letters:\n{self.total_chars}.\nEach character can be used multiple times. Please write None if there is no valid combination."
61 |
62 |
prompt = f"Construct a valid {self.num_chars}-character English word from the following letters:\n{self.total_chars}.\nEach character can only be used once. Please write None if there is no valid combination."
63 |
return prompt
64 |
65 |
def _validate(self, answer: str) -> (bool, str):
66 |
67 |
if self.possible_ans != "" and answer == "none":
68 |
val_msg = "There is a valid answer."
69 |
return False, val_msg
70 |
if len(answer) != self.num_chars:
71 |
val_msg = f"Your answer must be exactly {self.num_chars} characters long"
72 |
return False, val_msg
@@ -74,12 +87,31 @@ class AnagramScribble(BaseGame):
74 |
if char not in self.total_chars:
75 |
val_msg = "Your answer must only contain the characters provided"
76 |
return False, val_msg
77 |
if (not self.allow_repeat and (len(set(answer)) != len(answer))
78 |
79 |
80 |
81 |
if answer not in self.WORD_LIST_BIN[str(self.num_chars)]:
82 |
val_msg = "Your answer is not a valid English word"
83 |
return False, val_msg
84 |
85 |
return True, ""
5 |
import string
6 |
import re
7 |
8 |
9 |
class AnagramScribble(BaseGame):
10 |
11 |
def get_game_name() -> str:
44 |
if total_chars_extraction != "Error loading game state.":
45 |
characters = total_chars_extraction.split(",")
46 |
self.total_chars = [char.strip().strip("'") for char in characters]
47 |
self.possible_ans = ""
48 |
_chars = sorted(self.total_chars)
49 |
for w in self.WORD_LIST_BIN[str(self.num_chars)]:
50 |
_ans = sorted(w)
51 |
j, k = 0, 0
52 |
while j < len(_ans) and k < len(_chars):
53 |
if _ans[j] == _chars[k]:
54 |
j += 1
55 |
k += 1
56 |
if j >= len(_ans):
57 |
self.possible_ans = w
58 |
59 |
60 |
def _generate_new_game(self, *args, **kwargs) -> None:
61 |
self.low_num_chars = kwargs['low_num_chars']
70 |
71 |
def _get_prompt(self) -> str:
72 |
if self.allow_repeat:
73 |
prompt = f"Construct a valid {self.num_chars}-character English word from the following letters:\n{self.total_chars}.\nEach character can be used multiple times. Please write None if there is no valid combination. Print only the answer.\n"
74 |
75 |
prompt = f"Construct a valid {self.num_chars}-character English word from the following letters:\n{self.total_chars}.\nEach character can only be used once. Please write None if there is no valid combination. Print only the answer.\n"
76 |
return prompt
77 |
78 |
def _validate(self, answer: str) -> (bool, str):
79 |
if self.possible_ans != "" and answer == "None":
80 |
val_msg = "There is a valid answer."
81 |
return False, val_msg
82 |
answer = answer.lower()
83 |
if len(answer) != self.num_chars:
84 |
val_msg = f"Your answer must be exactly {self.num_chars} characters long"
85 |
return False, val_msg
87 |
if char not in self.total_chars:
88 |
val_msg = "Your answer must only contain the characters provided"
89 |
return False, val_msg
90 |
# if (not self.allow_repeat and (len(set(answer)) != len(answer))
91 |
# and (len(self.possible_ans) == len(set(self.possible_ans)))):
92 |
if not self.allow_repeat:
93 |
_ans = sorted(answer)
94 |
_chars = sorted(self.total_chars)
95 |
j, k = 0, 0
96 |
while j < len(_ans) and k < len(_chars):
97 |
if _ans[j] == _chars[k]:
98 |
j += 1
99 |
k += 1
100 |
if j < len(_ans):
101 |
val_msg = "Your answer must not contain repeated characters"
102 |
return False, val_msg
103 |
if answer not in self.WORD_LIST_BIN[str(self.num_chars)]:
104 |
val_msg = "Your answer is not a valid English word"
105 |
return False, val_msg
106 |
107 |
return True, ""
108 |
109 |
110 |
def example() -> (str, str):
111 |
prompt = ("Construct a valid 5-character English word from the following letters:\n"
112 |
"['e', 'l', 'o', 'b', 's', 'p'].\n"
113 |
"Each character can be used multiple times. Please write None if there is no valid combination."
114 |
" Print only the answer.\n")
115 |
answer = "sleep"
116 |
return prompt, answer
117 |
@@ -1,5 +1,6 @@
1 |
import random
2 |
import re
3 |
from pathlib import Path
4 |
from textgames.base_game import BaseGame
5 |
@@ -57,48 +58,94 @@ class BracketGame(BaseGame):
57 |
self.MULTI_WORD_LIST.append(self.WORD_LIST[num1] + self.WORD_LIST[num2])
58 |
59 |
def _validate(self, answer: str) -> (bool, str):
60 |
61 |
arr = answer.split(rule[0])
62 |
63 |
if rule[1][1] not in arr[0] or rule[1][2] not in arr[1]:
64 |
val_msg = f"{rule[0]} is not between the correct bracket, {rule[1][1]} not in {arr[0]} and {rule[1][2]} not in {arr[1]}"
65 |
return False, val_msg
66 |
67 |
filter_answer = answer
68 |
for i in range(0, 26):
69 |
cc = chr(ord("a") + i)
70 |
filter_answer = filter_answer.replace(cc,"")
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
val_msg = "There is a closing bracket without an open bracket"
94 |
return False, val_msg
95 |
96 |
97 |
98 |
99 |
val_msg = f"The depth of the bracket is {
100 |
return False, val_msg
101 |
102 |
def _generate_new_game(self, *args, **kwargs) -> None:
103 |
num_words = kwargs["num_words"]
104 |
num_rules = kwargs["num_rules"]
@@ -141,6 +188,7 @@ class BracketGame(BaseGame):
141 |
prompt = f"You are given a text {self.string} Your job is to put some valid parenthesis brackets in the text such that:\n"
142 |
for rule in self.rules:
143 |
prompt += f"- \"{rule[0]}\" is inside a {rule[1][0]} bracket\n"
144 |
prompt += f"The bracket depth must be {self.depth} and print only the answer\n"
145 |
return prompt
146 |
@@ -159,7 +207,7 @@ class BracketGame(BaseGame):
159 |
160 |
return 0
161 |
162 |
content = state_string.split("the text such that:")[1].split("\nThe
163 |
164 |
self.words = []
165 |
self.rules = []
@@ -188,3 +236,14 @@ class BracketGame(BaseGame):
188 |
189 |
190 |
1 |
import random
2 |
import re
3 |
from bisect import bisect_left
4 |
from pathlib import Path
5 |
from textgames.base_game import BaseGame
6 |
58 |
self.MULTI_WORD_LIST.append(self.WORD_LIST[num1] + self.WORD_LIST[num2])
59 |
60 |
def _validate(self, answer: str) -> (bool, str):
61 |
answer = "".join(answer.split()).lower()
62 |
63 |
if ("".join(filter(lambda a: a.isalpha(), answer)) !=
64 |
"".join(filter(lambda a: a.isalpha(), self.string.lower()))):
65 |
val_msg = f"You are not allowed to change the character sequence of base text '{self.string}'."
66 |
return False, val_msg
67 |
68 |
char2type_op = {b[1]: b[0] for b in self.BRACKETS}
69 |
char2type_ed = {b[2]: b[0] for b in self.BRACKETS}
70 |
71 |
depth_count = {b[0]: [(-1, 0)] for b in self.BRACKETS}
72 |
73 |
def push(dc, v):
74 |
cur_depth = dc[-1][-1]
75 |
if cur_depth < 0:
76 |
return False
77 |
dc.append((i, cur_depth + v))
78 |
return True
79 |
80 |
mak, cur_mak = 0, 0
81 |
for i, c in enumerate(answer):
82 |
if c in char2type_op:
83 |
push(depth_count[char2type_op[c]], 1)
84 |
cur_mak += 1
85 |
elif c in char2type_ed:
86 |
if not push(depth_count[char2type_ed[c]], -1):
87 |
val_msg = "There is a closing bracket without an open bracket"
88 |
return False, val_msg
89 |
cur_mak -= 1
90 |
mak = max(mak, cur_mak)
91 |
92 |
if mak != self.depth:
93 |
val_msg = f"The depth of the bracket is {mak}. The expected depth is {self.depth}"
94 |
return False, val_msg
95 |
96 |
for rule in self.rules:
97 |
i = answer.find(rule[0])
98 |
if i < 0:
99 |
val_msg = f"The text '{rule[0]}' is not found in your answer."
100 |
return False, val_msg
101 |
102 |
i_depth = bisect_left(depth_count[rule[1][0]], (i, -1)) - 1
103 |
if depth_count[rule[1][0]][i_depth][-1] < 1:
104 |
val_msg = f"The text '{rule[0]}' is not inside any {rule[1][0]} bracket {rule[1][1]} {rule[1][2]}"
105 |
return False, val_msg
106 |
107 |
# arr = answer.split(rule[0])
108 |
# if rule[1][1] not in arr[0] or rule[1][2] not in arr[1]:
109 |
# val_msg = f"The text '{rule[0]}' is not between the correct bracket, {rule[1][1]} not in {arr[0]} and {rule[1][2]} not in {arr[1]}"
110 |
# return False, val_msg
111 |
112 |
return True, ""
113 |
114 |
# filter_answer = answer
115 |
# for i in range(0, 26):
116 |
# cc = chr(ord("a") + i)
117 |
# filter_answer = filter_answer.replace(cc,"")
118 |
119 |
# cc = chr(ord("A") + i)
120 |
# filter_answer = filter_answer.replace(cc,"")
121 |
122 |
# open_bracket_list = ["[", "{", "(", "<"]
123 |
# close_bracket_map = {
124 |
# "[":"]", "{":"}", "(":")", "<":">"
125 |
# }
126 |
127 |
# # check max depth
128 |
# count = 0
129 |
# st = []
130 |
131 |
# for i in range(len(filter_answer)):
132 |
# if (filter_answer[i] in open_bracket_list):
133 |
# st.append(filter_answer[i]) # pushing the bracket in the stack
134 |
# else:
135 |
# if len(st) > 0 and (filter_answer[i] == close_bracket_map[st[-1]]):
136 |
# if (count < len(st)):
137 |
# count = len(st)
138 |
# st.pop()
139 |
# else:
140 |
# val_msg = "There is a closing bracket without an open bracket"
141 |
# return False, val_msg
142 |
143 |
# if count == self.depth:
144 |
# return True, ""
145 |
# else:
146 |
# val_msg = f"The depth of the bracket is {count}. The expected depth is {self.depth}"
147 |
# return False, val_msg
148 |
149 |
def _generate_new_game(self, *args, **kwargs) -> None:
150 |
num_words = kwargs["num_words"]
151 |
num_rules = kwargs["num_rules"]
188 |
prompt = f"You are given a text {self.string} Your job is to put some valid parenthesis brackets in the text such that:\n"
189 |
for rule in self.rules:
190 |
prompt += f"- \"{rule[0]}\" is inside a {rule[1][0]} bracket\n"
191 |
prompt += "The open and close parenthesis for block is [ ], curly is { }, round is ( ), and angle is < >\n"
192 |
prompt += f"The bracket depth must be {self.depth} and print only the answer\n"
193 |
return prompt
194 |
207 |
208 |
return 0
209 |
210 |
content = state_string.split("the text such that:")[1].split("\nThe open and close parenthesis ")[0].split("\n")
211 |
212 |
self.words = []
213 |
self.rules = []
236 |
237 |
238 |
239 |
240 |
241 |
def example() -> (str, str):
242 |
prompt = ("You are given a text fabuloustextgames Your job is to put some valid parenthesis brackets in the text such that:\n"
243 |
"- \"games\" is inside a round bracket\n"
244 |
"- \"text\" is inside a angle bracket\n"
245 |
"- \"fabulous\" is inside a block bracket\n"
246 |
"The open and close parenthesis for block is [ ], curly is { }, round is ( ), and angle is < >\n"
247 |
"The bracket depth must be 2 and print only the answer\n")
248 |
answer = "[[fabulous]<text>(games)]"
249 |
return prompt, answer
@@ -125,19 +125,47 @@ class CrosswordArrangerGame(BaseGame):
125 |
return prompt
126 |
127 |
def _validate(self, answer: str) -> (bool, str):
128 |
129 |
val_msg = ""
130 |
if len(ans_hor) != self.board_size:
131 |
val_msg = f"Mismatch answer length found!! Expected size of {self.board_size}, got {len(ans_hor)}."
132 |
return False, val_msg
133 |
ans_ver = [''.join(ans_hor[r][c] for r in range(self.board_size)) for c in range(self.board_size)]
134 |
word_set = set(self.word_list)
135 |
for w in chain(ans_hor, ans_ver):
136 |
if w not in word_set:
137 |
return False, val_msg
138 |
139 |
return True, val_msg
140 |
141 |
142 |
143 |
125 |
return prompt
126 |
127 |
def _validate(self, answer: str) -> (bool, str):
128 |
answer = answer if answer else ""
129 |
# ans_hor = list(filter(None, answer.lower().replace(' ', '\n').split("\n")))
130 |
ans_hor = answer.lower().split()
131 |
val_msg = ""
132 |
if len(ans_hor) != self.board_size:
133 |
arr = answer.lower().split()
134 |
if all(len(l) == 1 for l in arr) and (len(arr) == self.board_size * self.board_size):
135 |
ans_hor = ["".join(arr[i:i+self.board_size]) for i in range(0, len(arr), self.board_size)]
136 |
if len(ans_hor) != self.board_size:
137 |
val_msg = f"Mismatch answer length found!! Expected size of {self.board_size}, got {len(ans_hor)}."
138 |
return False, val_msg
139 |
for w in ans_hor:
140 |
if len(w) != self.board_size:
141 |
val_msg = f"Mismatch answer length found!! Expected size of {self.board_size}, got {len(w)}."
142 |
return False, val_msg
143 |
ans_ver = [''.join(ans_hor[r][c] for r in range(self.board_size)) for c in range(self.board_size)]
144 |
word_set = set(self.word_list)
145 |
for i, w in enumerate(chain(ans_hor, ans_ver)):
146 |
if w not in word_set:
147 |
val_msg = (f"Mismatch answer word found!! {'Horizontal' if i < self.board_size else 'Vertical'} word"
148 |
f" '{w}' is not in the word set.")
149 |
return False, val_msg
150 |
151 |
return True, val_msg
152 |
153 |
154 |
def example() -> (str, str):
155 |
prompt = (f"Given a board size of 3x3, arrange a possible crossword puzzle answer from a list of words.\n"
156 |
f"Item in the list can only be used once.\n\n"
157 |
f"List of words:\n"
158 |
f"- app\n"
159 |
f"- all\n"
160 |
f"- and\n"
161 |
f"- lee\n"
162 |
f"- let\n"
163 |
f"- pat\n"
164 |
f"- pee\n"
165 |
f"- pet\n\n"
166 |
f"Print only the answer.")
167 |
answer = "app\nlee\nlet"
168 |
return prompt, answer
169 |
170 |
171 |
@@ -99,8 +99,8 @@ class Islands(BaseGame):
99 |
answer = [a.replace(" ", "").lower().strip() for a in answer]
100 |
101 |
# check the size
102 |
if len(answer) != self.N or len(
103 |
val_msg = f"2D grid is not {self.N} x {self.N}. ({len(answer)} x {len(answer
104 |
return False, val_msg
105 |
106 |
# check the tiles, ensure they are valid
@@ -194,4 +194,16 @@ Your 2D grid must follow the following rules:
194 |
195 |
Print only the answer.
196 |
197 |
return prompt
99 |
answer = [a.replace(" ", "").lower().strip() for a in answer]
100 |
101 |
# check the size
102 |
if len(answer) != self.N or any((len(a) < self.N) for a in answer):
103 |
val_msg = f"2D grid is not {self.N} x {self.N}. ({len(answer)} x {set(len(a) for a in answer)})"
104 |
return False, val_msg
105 |
106 |
# check the tiles, ensure they are valid
194 |
195 |
Print only the answer.
196 |
197 |
return prompt
198 |
199 |
200 |
def example() -> (str, str):
201 |
prompt = ("You are asked to construct a 2D 5 x 5 grid, consisting of water tiles (denoted by \u2019.\u2019), \n"
202 |
"land tiles (denoted by \u2019#\u2019). \n\n"
203 |
"A group of connected land tiles in 4 cardinal directions forms an island.\n\n"
204 |
"Your 2D grid must follow the following rules:\n"
205 |
"- There must be exactly 1 islands.\n"
206 |
"- The size of each island must be from 1 to 2 tiles.\n\n"
207 |
"Print only the answer.\n")
208 |
answer = "...##\n.....\n.....\n.....\n....."
209 |
return prompt, answer
@@ -5,10 +5,10 @@ Rules Description
5 |
6 |
word length:
7 |
- example: word less than 5 characters gets 10 points
8 |
- possible operands: {
9 |
10 |
- possible combinations: {
11 |
- only 1
12 |
13 |
neighboring / consecutive chars
14 |
- example: every pair of consecutive consonant gets 5 points
@@ -66,6 +66,15 @@ from textgames.base_game import BaseGame
66 |
from textgames.assets.word_list import WORDS_LIST, WORDS_BY_LEN
67 |
68 |
69 |
70 |
class Scoring:
71 |
def __init__(self, point: int):
@@ -505,14 +514,15 @@ class OrderingTextGame(BaseGame):
505 |
return self.answer # sorted(self.words, key=lambda word: (self.get_point(word), word))
506 |
507 |
def _validate(self, answer: str) -> (bool, str):
508 |
answer = answer.lower().replace('
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
def _generate_new_game(self, *args, **kwargs) -> None:
518 |
if "preset_config" in kwargs:
@@ -588,6 +598,26 @@ class OrderingTextGame(BaseGame):
588 |
prompt += "\nPrint only the answer."
589 |
return prompt
590 |
591 |
592 |
593 |
5 |
6 |
word length:
7 |
- example: word less than 5 characters gets 10 points
8 |
- possible operands: {\\eq, \\lt, \\gt, \\ne}
9 |
- \\le and \\ge will be randomized for prompt generation
10 |
- possible combinations: {\\gt\\lt, \\gt\\lt\\ne}
11 |
- only 1 \\ne is considered
12 |
13 |
neighboring / consecutive chars
14 |
- example: every pair of consecutive consonant gets 5 points
66 |
from textgames.assets.word_list import WORDS_LIST, WORDS_BY_LEN
67 |
68 |
69 |
70 |
index_to_word = {
71 |
1: "first", 2: "second", 3: "third", 4: "fourth", 5: "fifth",
72 |
6: "sixth", 7: "seventh", 8: "eighth", 9: "ninth", 10: "tenth",
73 |
11: "eleventh", 12: "twelfth", 13: "thirteenth", 14: "fourteenth",
74 |
15: "fifteenth", 16: "sixteenth", 17: "seventeenth", 18: "eighteenth",
75 |
76 |
77 |
78 |
79 |
class Scoring:
80 |
def __init__(self, point: int):
514 |
return self.answer # sorted(self.words, key=lambda word: (self.get_point(word), word))
515 |
516 |
def _validate(self, answer: str) -> (bool, str):
517 |
answer = answer.lower().replace(',', ' ').split()
518 |
gold = self.get_answer()
519 |
if len(answer) < len(gold):
520 |
return False, f"Your answer is too short. There should be {len(gold)} items."
521 |
for i, (a, b) in enumerate(zip(answer, self.get_answer()), 1):
522 |
if a != b:
523 |
val_msg = f"'{a}' is not supposed to be the {index_to_word[i]} word in the order."
524 |
return False, val_msg
525 |
return True, ""
526 |
527 |
def _generate_new_game(self, *args, **kwargs) -> None:
528 |
if "preset_config" in kwargs:
598 |
prompt += "\nPrint only the answer."
599 |
return prompt
600 |
601 |
602 |
def example() -> (str, str):
603 |
prompt = ("Given a set of rules to calculate point, sort the set of words in decreasing order.\n"
604 |
"When there 2 or more words with same point, sort lexicographically.\n\n"
605 |
606 |
"- add 10 points if there exists 'u' in the word\n\n"
607 |
608 |
"- hudi\n"
609 |
"- genta\n"
610 |
"- aji\n"
611 |
"- ruochen\n\n"
612 |
"Print only the answer.")
613 |
answer = (
614 |
615 |
616 |
617 |
618 |
619 |
return prompt, answer
620 |
621 |
622 |
623 |
@@ -274,3 +274,13 @@ class PasswordGame(BaseGame):
274 |
self.rules = [rule for rule in new_rules]
275 |
276 |
274 |
self.rules = [rule for rule in new_rules]
275 |
276 |
277 |
278 |
279 |
def example() -> (str, str):
280 |
prompt = ("Please write a text string without any space by following a set of given rules."
281 |
" Please write only the answer and follow the following criteria:\n"
282 |
"- the text has 6 english character\n"
283 |
"- the text has 0 uppercase characters\n")
284 |
answer = "hoodie"
285 |
return prompt, answer
286 |
@@ -309,4 +309,16 @@ Find a substring of exactly {self.answer_len} characters long that:
309 |
310 |
Print only the answer.
311 |
312 |
return prompt
309 |
310 |
Print only the answer.
311 |
312 |
return prompt
313 |
314 |
315 |
def example() -> (str, str):
316 |
prompt = ("You are given the following string:\n"
317 |
318 |
"Find a substring of exactly 3 characters long that:\n"
319 |
" - Contains t\n"
320 |
" - Does not contain i and a\n\n"
321 |
"Print only the answer.\n")
322 |
answer = "ent"
323 |
return prompt, answer
324 |
@@ -9,6 +9,7 @@ Please solve the 9x9 sudoku with 1,2,3,4,5,6,7,8,9 as the values and fill _ with
9 |
Print only the answer.
10 |
11 |
12 |
13 |
class Sudoku(BaseGame):
14 |
@@ -28,34 +29,47 @@ class Sudoku(BaseGame):
28 |
for j in range(self.size):
29 |
num = mat[i][j]
30 |
if num == self.empty_character:
31 |
return False
32 |
33 |
subgrid_index = (i // self.srn) * self.srn + (j // self.srn)
34 |
35 |
if num in rows[i]
36 |
return False
37 |
38 |
39 |
40 |
41 |
42 |
return True
43 |
44 |
def _validate(self, input) -> (bool, str):
45 |
mat = [[self.empty_character for i in range(self.size)] for j in range(self.size)]
46 |
47 |
arr = input.split()
48 |
for i in range(len(arr)):
49 |
for j in range(len(arr[i])):
50 |
if arr[i][j] not in self.char_to_id:
51 |
val_msg = "
52 |
return False, val_msg
53 |
54 |
mat[i][j] = self.char_to_id[arr[i][j]]
55 |
if arr[i][j] != self.mat[i][j] and self.mat[i][j] != self.empty_character:
56 |
val_msg = "One or more characters are replaced"
57 |
return False, val_msg
58 |
59 |
60 |
def _generate_new_game(self, *args, **kwargs) -> None:
61 |
@@ -228,4 +242,14 @@ class Sudoku(BaseGame):
228 |
self.char_to_id = {}
229 |
for c_id in range(len(self.characters)):
230 |
self.char_to_id[self.characters[c_id]] = c_id
231 |
9 |
Print only the answer.
10 |
11 |
12 |
13 |
14 |
class Sudoku(BaseGame):
15 |
29 |
for j in range(self.size):
30 |
num = mat[i][j]
31 |
if num == self.empty_character:
32 |
return False, "There are unfilled cells"
33 |
34 |
subgrid_index = (i // self.srn) * self.srn + (j // self.srn)
35 |
36 |
if num in rows[i]:
37 |
return False, f"Duplicated row value ({num}) for cell in row {i+1} column {j+1}."
38 |
elif num in cols[j]:
39 |
return False, f"Duplicated column value ({num}) for cell in row {i+1} column {j+1}."
40 |
elif num in subgrids[subgrid_index]:
41 |
return False, f"Duplicated subgrid value ({num}) for cell in row {i+1} column {j+1}."
42 |
43 |
44 |
45 |
46 |
47 |
return True, ""
48 |
49 |
def _validate(self, input) -> (bool, str):
50 |
mat = [[self.empty_character for i in range(self.size)] for j in range(self.size)]
51 |
52 |
input = input if input else ""
53 |
arr = input.split()
54 |
if all(len(l) == 1 for l in arr) and (len(arr) == self.size * self.size):
55 |
arr = ["".join(arr[i:i+self.size]) for i in range(0, len(arr), self.size)]
56 |
if (len(arr) != self.size) or any(len(arr[i]) != self.size for i in range(len(arr))):
57 |
arr = input.split("\n")
58 |
val_msg = f"Your answer is wrong in shape, it should be {self.size}x{self.size} sudoku."
59 |
return False, val_msg
60 |
61 |
for i in range(len(arr)):
62 |
for j in range(len(arr[i])):
63 |
if arr[i][j] not in self.char_to_id:
64 |
val_msg = "There are unrecognized characters, or possibly unfilled cells."
65 |
return False, val_msg
66 |
67 |
mat[i][j] = self.char_to_id[arr[i][j]]
68 |
if arr[i][j] != self.mat[i][j] and self.mat[i][j] != self.empty_character:
69 |
val_msg = "One or more characters are replaced"
70 |
return False, val_msg
71 |
72 |
return self.is_valid_sudoku(mat)
73 |
74 |
def _generate_new_game(self, *args, **kwargs) -> None:
75 |
242 |
self.char_to_id = {}
243 |
for c_id in range(len(self.characters)):
244 |
self.char_to_id[self.characters[c_id]] = c_id
245 |
246 |
247 |
def example() -> (str, str):
248 |
prompt = ("Please solve the 4x4 sudoku with A,B,C,D as the values and fill _ with the possible value and"
249 |
" only print the answer. Follow the sudoku rule.\nA_CD CD_B _AD_ DCBA")
250 |
answer = ("ABCD\n"
251 |
252 |
253 |
254 |
return prompt, answer
255 |