Spaces:

yp-edu
/

viz-gpt2-stockfish-debug

Sleeping

App Files Files Community

viz-gpt2-stockfish-debug / src /attention_interface.py

Xmaster6y

typo

2f281df unverified 12 months ago

raw

history blame contribute delete

8.44 kB

	"""
	Gradio interface for plotting attention.
	"""

	import chess
	import gradio as gr
	import torch
	import uuid
	import re

	from . import constants, state, visualisation


	def compute_cache(
	game_pgn,
	board_fen,
	attention_layer,
	attention_head,
	comp_index,
	state_cache,
	state_board_index,
	):
	if game_pgn == "" and board_fen != "":
	board = chess.Board(board_fen)
	fen_list = [board.fen()]
	else:
	board = chess.Board()
	fen_list = [board.fen()]
	for move in game_pgn.split():
	if move.endswith("."):
	continue
	try:
	board.push_san(move)
	fen_list.append(board.fen())
	except ValueError:
	gr.Warning(f"Invalid move {move}, stopping before it.")
	break
	state_cache = [(fen, state.model_cache(fen)) for fen in fen_list]
	return (
	*make_plot(
	attention_layer, attention_head, comp_index, state_cache, state_board_index
	),
	state_cache,
	)


	def make_plot(
	attention_layer,
	attention_head,
	comp_index,
	state_cache,
	state_board_index,
	):
	if state_cache is None:
	gr.Warning("Cache not computed!")
	return None, None, None, None, None

	fen, (out, cache) = state_cache[state_board_index]
	attn_list = [a[0, attention_head - 1] for a in cache[attention_layer - 1]]
	prompt_attn, *comp_attn = attn_list
	comp_attn.insert(0, prompt_attn[-1:])
	comp_attn = [a.squeeze(0) for a in comp_attn]
	if len(comp_attn) != 5:
	raise NotImplementedError("This is not implemented yet.")

	config_total = meta_total = dump_total = 0
	config_done = False
	heatmap = torch.zeros(64)
	h_index = 0
	for i, t_o in enumerate(out[0]):
	try:
	t_attn = comp_attn[comp_index - 1][i]
	if (i < 3) or (i > len(out[0]) - 10):
	dump_total += t_attn
	continue
	t_str = state.model.tokenizer.decode(t_o)
	if t_str.startswith(" ") and h_index > 0:
	config_done = True
	if not config_done:
	if t_str == "/":
	dump_total += t_attn
	continue
	t_str = re.sub(r"\d", lambda m: "0" * int(m.group(0)), t_str)
	config_total += t_attn
	t_str_len = len(t_str.strip())
	pre_t_attn = t_attn / t_str_len
	for j in range(t_str_len):
	heatmap[h_index + j] = pre_t_attn
	h_index += t_str_len
	else:
	meta_total += t_attn
	except IndexError:
	break
	raw_attention = comp_attn[comp_index - 1]
	highlited_tokens = [
	(state.model.tokenizer.decode(out[0][i]), raw_attention[i])
	for i in range(len(raw_attention))
	]
	uci_move = state.model.tokenizer.decode(out[0][-5:-1]).strip()
	board = chess.Board(fen)
	heatmap = heatmap.view(8, 8).flip(0).view(64)
	move = chess.Move.from_uci(uci_move)
	svg_board, fig = visualisation.render_heatmap(
	board, heatmap, arrows=[(move.from_square, move.to_square)]
	)
	info = (
	f"[Completion] Complete: '{state.model.tokenizer.decode(out[0][-5:])}'"
	f" Chosen: '{state.model.tokenizer.decode(out[0][-5:][comp_index-1])}'"
	f"\n[Distribution] Config: {config_total:.2f} Meta: {meta_total:.2f} Dump: {dump_total:.2f}"
	)
	id = str(uuid.uuid4())
	with open(f"{constants.FIGURE_DIRECTORY}/board_{id}.svg", "w") as f:
	f.write(svg_board)
	return (
	board.fen(),
	info,
	fig,
	f"{constants.FIGURE_DIRECTORY}/board_{id}.svg",
	highlited_tokens,
	)


	def previous_board(
	attention_layer,
	attention_head,
	comp_index,
	state_cache,
	state_board_index,
	):
	state_board_index -= 1
	if state_board_index < 0:
	gr.Warning("Already at first board.")
	state_board_index = 0
	return (
	*make_plot(
	attention_layer, attention_head, comp_index, state_cache, state_board_index
	),
	state_board_index,
	)


	def next_board(
	attention_layer,
	attention_head,
	comp_index,
	state_cache,
	state_board_index,
	):
	state_board_index += 1
	if state_board_index >= len(state_cache):
	gr.Warning("Already at last board.")
	state_board_index = len(state_cache) - 1
	return (
	*make_plot(
	attention_layer, attention_head, comp_index, state_cache, state_board_index
	),
	state_board_index,
	)


	with gr.Blocks() as interface:
	with gr.Row():
	with gr.Column():
	with gr.Group():
	gr.Markdown(
	"Specify the game PGN or FEN string that you want to analyse (PGN overrides FEN)."
	)
	game_pgn = gr.Textbox(
	label="Game PGN",
	lines=1,
	)
	board_fen = gr.Textbox(
	label="Board FEN",
	lines=1,
	max_lines=1,
	)
	compute_cache_button = gr.Button("Compute cache")
	with gr.Group():
	with gr.Row():
	attention_layer = gr.Slider(
	label="Attention layer",
	minimum=1,
	maximum=12,
	step=1,
	value=1,
	)
	attention_head = gr.Slider(
	label="Attention head",
	minimum=1,
	maximum=12,
	step=1,
	value=1,
	)
	comp_index = gr.Slider(
	label="Completion index",
	minimum=1,
	maximum=6,
	step=1,
	value=1,
	)
	with gr.Row():
	previous_board_button = gr.Button("Previous board")
	next_board_button = gr.Button("Next board")
	current_board_fen = gr.Textbox(
	label="Board FEN",
	lines=1,
	max_lines=1,
	)
	info = gr.Textbox(
	label="Info",
	lines=1,
	info=(
	"'Config' refers to the board configuration tokens."
	"\n'Meta' to the additional board tokens (like color or castling)."
	"\n'Dump' to the rest of the tokens (including '/')."
	),
	)
	gr.Markdown(
	"Note that only the 'Config' attention is plotted.\n\nSee below for the raw attention."
	)
	raw_attention_html = gr.HighlightedText(
	label="Raw attention",
	)
	with gr.Column():
	image_board = gr.Image(label="Board")
	colorbar = gr.Plot(label="Colorbar")

	static_inputs = [
	attention_layer,
	attention_head,
	comp_index,
	]
	static_outputs = [
	current_board_fen,
	info,
	colorbar,
	image_board,
	raw_attention_html,
	]

	state_cache = gr.State(value=None)
	state_board_index = gr.State(value=0)
	compute_cache_button.click(
	compute_cache,
	inputs=[game_pgn, board_fen, *static_inputs, state_cache, state_board_index],
	outputs=[*static_outputs, state_cache],
	)

	previous_board_button.click(
	previous_board,
	inputs=[*static_inputs, state_cache, state_board_index],
	outputs=[*static_outputs, state_board_index],
	)
	next_board_button.click(
	next_board,
	inputs=[*static_inputs, state_cache, state_board_index],
	outputs=[*static_outputs, state_board_index],
	)
	attention_layer.change(
	make_plot,
	inputs=[*static_inputs, state_cache, state_board_index],
	outputs=[*static_outputs],
	)
	attention_head.change(
	make_plot,
	inputs=[*static_inputs, state_cache, state_board_index],
	outputs=[*static_outputs],
	)
	comp_index.change(
	make_plot,
	inputs=[*static_inputs, state_cache, state_board_index],
	outputs=[*static_outputs],
	)