|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
window.initSent = async function(sent, sel){ |
|
var isHamlet = sent.class == 'hamlet' |
|
var isMobile = innerWidth < 900 |
|
|
|
var sel = d3.select('.' + sent.class) |
|
.st({opacity: .5, marginBottom: isHamlet ? '' : 40}) |
|
|
|
|
|
|
|
var str = sent.str |
|
while (str.includes('__')) str = str.replace('__', '_') |
|
str = str.replace('_', 'things') |
|
|
|
var tokens = tokenizer.tokenizeCLS(str) |
|
.filter(d => d < 30522) |
|
|
|
var topTokens = await post('embed_group_top', {tokens}) |
|
topTokens.forEach(sent => { |
|
sent.forEach(d => d.str = tokenizer.vocab[d.i]) |
|
}) |
|
|
|
var displayTokens = tokens |
|
.slice(1) |
|
.map((vocabIndex, i) => { |
|
return {i, str: bertLargeVocab[vocabIndex].replace('##', '')} |
|
}) |
|
displayTokens.pop() |
|
|
|
|
|
sel.html('').st({opacity: 1}) |
|
if (!sel.node()) return |
|
|
|
var divSel = sel.append('div') |
|
.st({position: 'relative'}) |
|
var svgSel = divSel.append('svg') |
|
.st({position: 'absolute', top: 0, zIndex: -10}) |
|
|
|
var tokenSel = divSel |
|
.append('div.token-container') |
|
.st({padding: 20, paddingLeft: 0, paddingRight: 0, fontSize: 20}) |
|
.appendMany('button.token', displayTokens) |
|
.text(d => d.str) |
|
.on('click', drawToken) |
|
|
|
var connectionPath = svgSel.append('path').at({fill: 'none', stroke: '#000', strokeWidth: 1}) |
|
|
|
var padding = 5 |
|
var width = divSel.node().offsetWidth |
|
var botWidth = isMobile ? width - padding*2 : 580 |
|
|
|
var botTextSel = divSel.append('div.top-sents') |
|
.translate([width/2 - botWidth/2 - padding + .5, 15]) |
|
.st({ |
|
width: botWidth, |
|
height: 170, |
|
outline: '1px solid #000', |
|
padding: padding, |
|
|
|
background: '#fff', |
|
overflowY: 'scroll', |
|
fontSize: isMobile ? 10 : '', |
|
}) |
|
|
|
if (isHamlet){ |
|
divSel.append('div.caption') |
|
.text(`BERT's predictions for what should fill in the hidden word`) |
|
.st({fontWeight: '', lineHeight: '1.1em', fontSize: 14, textAlign: 'center', width: '100%', marginTop: 20}) |
|
} |
|
|
|
var curIndex = -1 |
|
function drawToken(token){ |
|
var node = tokenSel.filter(d => d == token).node() |
|
var x = node.offsetLeft + node.offsetWidth/2 |
|
var y = node.offsetTop + node.offsetHeight |
|
|
|
var y1 = botTextSel.node().offsetTop |
|
|
|
connectionPath.at({d: ['M', x, y, 'L', width/2, y1 + 15].join(' ')}) |
|
|
|
var completionSel = botTextSel.html('').appendMany('span', topTokens[token.i + 1]) |
|
.st({display: 'inline-block', fontFamily: 'monospace', width: isMobile ? '47%' : '31%', borderBottom: '1px solid #ccc', margin: 4, fontSize: innerWidth < 350 ? 12 : isMobile ? 13 : 14 }) |
|
|
|
completionSel.append('span') |
|
.st({color: '#ccc'}) |
|
.html(d => { |
|
var str = d3.format('.3f')(d.p*100) + '% ' |
|
if (str.length < 8) str = ' ' + str |
|
return str |
|
}) |
|
|
|
completionSel.append('span') |
|
.text(d => d.str.replace('▁', '')) |
|
|
|
|
|
tokenSel |
|
.text(d => d.str) |
|
.classed('active', false) |
|
.filter(d => d == token) |
|
.classed('active', true) |
|
.text(d => d.str.split('').map(d => '_').join('')) |
|
} |
|
|
|
var i = displayTokens.length - (isHamlet ? 2 : 2) |
|
if (tokens.includes(2477)) i = tokens.indexOf(2477) - 1 |
|
drawToken(displayTokens[i]) |
|
|
|
var topTokensSel = sel.append('div.top-tokens') |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (window.init) init() |
|
|