import streamlit as st |
import streamlit.components.v1 as components |
from transformers import AutoTokenizer, AutoModelForCausalLM |
st.set_page_config(page_title="First Conscious Quadrant with Detokenizer", layout="wide") |
st.title("First Conscious Quadrant with Detokenizer") |
html_content = """ |
<!DOCTYPE html> |
<html lang="en"> |
<head> |
<meta charset="UTF-8"> |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
<title>Interactive Base 50256 Grid</title> |
<style> |
body { |
font-family: Arial, sans-serif; |
display: flex; |
justify-content: center; |
align-items: center; |
height: 100vh; |
margin: 0; |
background-color: #f0f0f0; |
} |
.container { |
text-align: center; |
} |
#grid { |
max-width: 80vmin; |
max-height: 80vmin; |
border: 1px solid #ccc; |
} |
.output { |
margin-top: 20px; |
font-size: 18px; |
font-weight: bold; |
} |
</style> |
</head> |
<body> |
<div class="container"> |
<canvas id="grid" width="1000" height="1000"></canvas> |
<div id="clickedOutput" class="output">Click on the grid to select a coordinate</div> |
<div id="hoverOutput">Hover Coordinate: (X: 0, Y: 0)</div> |
</div> |
<script> |
const canvas = document.getElementById('grid'); |
const ctx = canvas.getContext('2d'); |
const clickedOutput = document.getElementById('clickedOutput'); |
const hoverOutput = document.getElementById('hoverOutput'); |
const gridSizeX = 50255; |
const gridSizeY = 50255; |
const cellSizeX = canvas.width / 16; |
const cellSizeY = canvas.height / 16; |
function drawGrid() { |
ctx.fillStyle = 'white'; |
ctx.fillRect(0, 0, canvas.width, canvas.height); |
ctx.strokeStyle = '#ccc'; |
ctx.lineWidth = 1; |
for (let i = cellSizeX; i < canvas.width; i += cellSizeX) { |
ctx.beginPath(); |
ctx.moveTo(i, 0); |
ctx.lineTo(i, canvas.height); |
ctx.stroke(); |
} |
for (let i = cellSizeY; i < canvas.height; i += cellSizeY) { |
ctx.beginPath(); |
ctx.moveTo(0, i); |
ctx.lineTo(canvas.width, i); |
ctx.stroke(); |
} |
ctx.fillStyle = 'black'; |
ctx.font = '16px Arial'; |
ctx.fillText('0,0', 5, canvas.height - 5); |
ctx.fillText(`${gridSizeX},0`, canvas.width - 60, canvas.height - 5); |
ctx.fillText(`0,${gridSizeY}`, 5, 20); |
ctx.fillText(`${gridSizeX},${gridSizeY}`, canvas.width - 100, 20); |
} |
function getCoordinates(event) { |
const rect = canvas.getBoundingClientRect(); |
const x = Math.min(Math.floor((event.clientX - rect.left) / rect.width * gridSizeX), gridSizeX); |
const y = Math.min(gridSizeY - Math.floor((event.clientY - rect.top) / rect.height * gridSizeY), gridSizeY); |
return { x, y }; |
} |
canvas.addEventListener('mousemove', (event) => { |
const { x, y } = getCoordinates(event); |
hoverOutput.textContent = `Hover Coordinate: (X: ${x}, Y: ${y})`; |
}); |
canvas.addEventListener('click', (event) => { |
const { x, y } = getCoordinates(event); |
const combinedCoord = x * 100000 + y; |
clickedOutput.textContent = `Clicked Coordinate: ${combinedCoord.toString().padStart(10, '0')}`; |
window.parent.postMessage({type: 'clickedCoordinate', value: combinedCoord.toString().padStart(10, '0')}, '*'); |
}); |
canvas.addEventListener('mouseleave', () => { |
hoverOutput.textContent = 'Hover Coordinate: (X: 0, Y: 0)'; |
}); |
drawGrid(); |
</script> |
</body> |
</html> |
""" |
components.html(html_content, height=700, scrolling=True) |
tokenizer = AutoTokenizer.from_pretrained('gpt2') |
st.header("Detokenization") |
token_ids = st.text_input("Enter token IDs (concatenated without spaces):", "") |
def split_token_ids(concatenated_ids, length=5): |
return [concatenated_ids[i:i+length] for i in range(0, len(concatenated_ids), length)] |
def remove_leading_zeros(grouped_ids): |
return [id.lstrip('0') for id in grouped_ids] |
if st.button("Detokenize"): |
split_ids = split_token_ids(token_ids) |
cleaned_ids = remove_leading_zeros(split_ids) |
cleaned_token_ids_str = ' '.join(cleaned_ids) |
token_id_list = [int(id) for id in cleaned_ids if id.isdigit()] |
detokenized_sentence = tokenizer.decode(token_id_list) |
st.write("Grouped and cleaned token IDs:") |
st.write(cleaned_token_ids_str) |
st.write("Detokenized sentence:") |
st.write(detokenized_sentence) |
gpt2 = AutoModelForCausalLM.from_pretrained('gpt2') |
if st.checkbox("Show GPT-2 Model Help"): |
st.write("Help GPT2") |
st.help(gpt2) |
components.html( |
""" |
<script> |
window.addEventListener('message', function(event) { |
if (event.data.type === 'clickedCoordinate') { |
document.querySelector('input[aria-label="Enter token IDs (concatenated without spaces):"]').value = event.data.value; |
document.querySelector('button[kind="secondary"]').click(); |
} |
}, false); |
</script> |
""" |
) |