|
<!doctype html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="utf-8" /> |
|
<meta name="viewport" content="width=device-width,initial-scale=1" /> |
|
<title>Next Token Predictor</title> |
|
<style> |
|
:root{ |
|
--bg:#0b0f14; --text:#fff; --muted:#9aa4b2; --accent:#38bdf8; --border:#1f2a3a; |
|
--chip:#111827; --chip-border:#263246; --chip-hover:#1a2434; |
|
--mono: ui-monospace,Menlo,Consolas,monospace; --sans: system-ui, -apple-system,"Segoe UI", Roboto, Arial; |
|
} |
|
*{box-sizing:border-box} |
|
body{margin:0;background:radial-gradient(1000px 600px at 50% -80px,#0c162a 15%,#081019 40%,var(--bg) 68%);color:var(--text);font-family:var(--sans)} |
|
.wrap{max-width:1100px;margin:0 auto;padding:16px} |
|
h1{margin:.2rem 0 .25rem;font-size:2.1rem;color:var(--accent)} |
|
.sub{color:var(--muted);margin:0 0 .8rem} |
|
|
|
|
|
.grid{display:grid;gap:12px;grid-template-columns:0.35fr 0.65fr} |
|
@media (max-width:900px){.grid{grid-template-columns:1fr}.row{flex-wrap:wrap}} |
|
|
|
.row{display:flex;gap:.6rem;align-items:center} |
|
.card{background:linear-gradient(180deg,#0c1624,#0a1220);border:1px solid var(--border);border-radius:14px;padding:12px} |
|
select,input{border-radius:10px;border:1px solid var(--border);background:#0a1220;color:var(--text);padding:.6rem .8rem;outline:none} |
|
select:focus,input:focus{border-color:var(--accent)} |
|
#status{color:var(--muted);font-size:.9rem} |
|
|
|
|
|
.tokens{display:flex;gap:.4rem;flex-wrap:wrap} |
|
.chip{border:1px solid var(--chip-border);background:var(--chip);padding:.35rem .5rem;border-radius:10px;font-family:var(--mono);color:var(--text);} |
|
|
|
|
|
#topk{display:flex;flex-direction:column;gap:.4rem;padding-right:4px} |
|
.k{ |
|
padding:.45rem .6rem;border-radius:10px;background:#102133;border:1px solid #1c2b44; |
|
font-family:var(--mono);cursor:pointer;color:var(--text); |
|
display:flex;align-items:center;justify-content:space-between;width:100%;text-align:left; |
|
} |
|
.k:hover{border-color:var(--accent)} |
|
.note{color:var(--muted);font-size:.82rem} |
|
|
|
|
|
#emb .panel{ |
|
display:grid; |
|
grid-template-columns:minmax(0,1fr) 260px; |
|
gap:12px; |
|
align-items:start; |
|
} |
|
|
|
#scatter{ |
|
width:100%; |
|
height:auto; |
|
aspect-ratio:4/3; |
|
border-radius:10px; |
|
background:#09121d;border:1px solid var(--border) |
|
} |
|
#nbrs{align-content:flex-start} |
|
|
|
|
|
@media (max-width:700px){ |
|
#emb .panel{ |
|
grid-template-columns:1fr; |
|
grid-template-rows:auto auto; |
|
} |
|
#scatter{ aspect-ratio:1/1; } |
|
#nbrs{ |
|
display:grid; |
|
grid-template-columns:1fr; |
|
gap:.5rem; |
|
} |
|
.chip{ width:100%; } |
|
} |
|
|
|
.legend{display:flex;gap:10px;align-items:center;margin:.25rem 0 .5rem} |
|
.dot{width:10px;height:10px;border-radius:50%} |
|
.all{background:#1a2a3a} |
|
.target{background:#22d3ee} |
|
.nb{background:#93c5fd} |
|
.warn{color:#ffd79a} |
|
.footer{margin-top:18px;text-align:center;color:var(--muted);font-size:.9rem} |
|
.footer a{color:#8fd6ff;text-decoration:none} |
|
.err{margin-top:8px;background:#1f2937;border:1px solid #374151;color:#ffb4b4;padding:8px 10px;border-radius:10px;display:none} |
|
</style> |
|
</head> |
|
<body> |
|
<main class="wrap"> |
|
<h1>Next Token Predictor</h1> |
|
<div class="sub">Type a sentence to see the AI’s next-token guesses. Click to add a token, or hover to find similar ones.</div> |
|
|
|
<section class="card"> |
|
<div class="row" style="gap:12px"> |
|
<div class="row"> |
|
<label style="margin-right:.5rem">Model</label> |
|
<select id="model"> |
|
<option value="distilgpt2">distilgpt2</option> |
|
<option value="qwen3" selected>Qwen3-0.6B</option> |
|
</select> |
|
</div> |
|
<input id="text" placeholder="Enter your text here..." style="flex:1;min-width:240px" /> |
|
<div id="status">Loading…</div> |
|
</div> |
|
<div id="error" class="err"></div> |
|
</section> |
|
|
|
<section class="grid"> |
|
<article class="card"> |
|
<h3 style="margin:.2rem 0 .6rem">Top-10 next tokens</h3> |
|
<div id="topk"></div> |
|
</article> |
|
|
|
<article id="emb" class="card"> |
|
<h3 style="margin:.2rem 0 .6rem">Semantic neighborhood</h3> |
|
<div class="legend"> |
|
<div class="dot all"></div><div class="note">All tokens</div> |
|
<div class="dot nb"></div><div class="note">Similar tokens</div> |
|
<div class="dot target"></div><div class="note">Your token</div> |
|
</div> |
|
<div class="panel"> |
|
<canvas id="scatter" width="600" height="520"></canvas> |
|
<div> |
|
<div class="note" id="embInfo">Hover a suggestion to explore.</div> |
|
<div id="nbrs" class="tokens" style="margin-top:.5rem"></div> |
|
</div> |
|
</div> |
|
<div class="note" style="margin-top:.6rem"> |
|
Each dot is a token. Nearby dots have similar meanings. The bright dot is your chosen token. |
|
Percentages show how closely each neighbor relates — higher means more similar. |
|
</div> |
|
</article> |
|
</section> |
|
|
|
<div class="footer"> |
|
Built by Peter Adams • Powered in your browser by <a href="https://xenova.github.io/transformers.js/" target="_blank" rel="noreferrer">Transformers.js</a>. |
|
</div> |
|
</main> |
|
|
|
<script type="module"> |
|
|
|
async function loadTransformers() { |
|
const urls = [ |
|
'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.2/+esm', |
|
'https://esm.run/@huggingface/transformers@3.7.2', |
|
'https://esm.sh/@huggingface/transformers@3.7.2', |
|
]; |
|
for (const u of urls) { |
|
try { |
|
const m = await import(u); |
|
if (m?.env && m.AutoTokenizer && m.AutoModelForCausalLM) return m; |
|
} catch {} |
|
} |
|
throw new Error('Failed to load @huggingface/transformers (ESM).'); |
|
} |
|
|
|
const tf = await loadTransformers(); |
|
const DEVICE = (navigator.gpu ? 'webgpu' : 'wasm'); |
|
tf.env.useBrowserCache = true; |
|
tf.env.allowRemoteModels = true; |
|
tf.env.allowLocalModels = false; |
|
|
|
|
|
const $ = s => document.querySelector(s); |
|
const textIn = $('#text'); |
|
const statusEl = $('#status'); |
|
const topkEl = $('#topk'); |
|
const nbrsEl = $('#nbrs'); |
|
const embInfo = $('#embInfo'); |
|
const canvas = $('#scatter'); |
|
const ctx = canvas.getContext('2d'); |
|
const errBox = $('#error'); |
|
const modelSel = $('#model'); |
|
|
|
|
|
const MODELS = { |
|
distilgpt2: { |
|
id: 'Xenova/distilgpt2', |
|
emb: { |
|
coords: 'assets/embeddings/pca_top5k_coords.json', |
|
neigh : 'assets/embeddings/neighbors_top5k_k40.json' |
|
}, |
|
label: 'distilgpt2' |
|
}, |
|
qwen3: { |
|
id: 'onnx-community/Qwen3-0.6B-ONNX', |
|
emb: { |
|
coords: 'assets/embeddings/qwen_pca_top5k_coords.json', |
|
neigh : 'assets/embeddings/qwen_neighbors_top5k_k40.json' |
|
}, |
|
label: 'Qwen3-0.6B' |
|
} |
|
}; |
|
|
|
|
|
let tokenizer = null, model = null; |
|
let currentModel = 'qwen3'; |
|
let busy = false, flight = 0, warmed = false; |
|
const EmbCache = {}; |
|
let lastHoveredId = null; |
|
let stickyId = null; |
|
|
|
function setStatus(m){ statusEl.textContent = m; } |
|
function showError(e){ errBox.style.display='block'; errBox.textContent = e?.message || String(e); } |
|
function clearError(){ errBox.style.display='none'; errBox.textContent=''; } |
|
|
|
|
|
async function loadModel(modelKey){ |
|
if (!MODELS[modelKey]) { |
|
throw new Error(`Unknown model key "${modelKey}"`); |
|
} |
|
|
|
currentModel = modelKey; |
|
const conf = MODELS[modelKey]; |
|
|
|
|
|
lastHoveredId = null; |
|
stickyId = null; |
|
embInfo.textContent = 'Hover a suggestion to explore.'; |
|
nbrsEl.innerHTML = ''; |
|
try { ctx.clearRect(0, 0, canvas.width, canvas.height); } catch {} |
|
|
|
|
|
if (EmbCache[currentModel]) { |
|
EmbCache[currentModel].baseDrawn = false; |
|
} |
|
|
|
clearError(); |
|
setStatus(`Loading ${conf.label} tokenizer…`); |
|
tokenizer = await tf.AutoTokenizer.from_pretrained(conf.id); |
|
|
|
setStatus(`Loading ${conf.label} model…`); |
|
model = await tf.AutoModelForCausalLM.from_pretrained(conf.id, { device: DEVICE }); |
|
|
|
setStatus('Ready.'); |
|
warmed = false; |
|
} |
|
|
|
|
|
|
|
const softmax = arr => { const m=Math.max(...arr); const exps=arr.map(x=>Math.exp(x-m)); const s=exps.reduce((a,b)=>a+b,0); return exps.map(x=>x/s); }; |
|
const topK = (probs, k) => probs.map((p,i)=>[p,i]).sort((a,b)=>b[0]-a[0]).slice(0,k); |
|
function normalizeText(x){ if (x==null) return ''; if (typeof x==='string') return x; if (Array.isArray(x)) return x.map(v=>String(v??'')).join(''); if (typeof x==='object'&&'text'in x) return normalizeText(x.text); return String(x); } |
|
async function tokenize(text){ text=normalizeText(text||textIn?.value||''); if(!text.trim()) text=' '; const enc=await tokenizer(text,{add_special_tokens:false}); tokenize.lastEnc=enc; return enc; } |
|
function decodeId(id){ try{return tokenizer.decode([id],{skip_special_tokens:false,clean_up_tokenization_spaces:false});}catch{return '';} } |
|
|
|
|
|
async function predict(){ |
|
if (!tokenizer || !model) return; |
|
if (busy) return; |
|
busy = true; clearError(); |
|
const myFlight = ++flight; |
|
|
|
try { |
|
const enc = tokenize.lastEnc ?? await tokenize(); |
|
const out = await model(enc); |
|
if (myFlight !== flight) return; |
|
|
|
const [ , T, V ] = out.logits.dims; |
|
const start = (T - 1) * V; |
|
const last = Array.from(out.logits.data.slice(start, start + V)); |
|
const probs = softmax(last); |
|
const k = topK(probs, 10); |
|
|
|
topkEl.innerHTML = ''; |
|
for (const [p, i] of k) { |
|
let tok = decodeId(i); |
|
if (tok === '') { |
|
tok = tokenizer.id_to_token ? (tokenizer.id_to_token(i) ?? '(special/space)') : '(special/space)'; |
|
} |
|
|
|
const btn = document.createElement('button'); |
|
btn.className = 'k'; |
|
btn.innerHTML = `<span>${tok}</span><span>${(p * 100).toFixed(1)}%</span>`; |
|
|
|
|
|
const preview = () => { lastHoveredId = i; stickyId = null; drawNeighborhood(i); }; |
|
btn.onmouseenter = preview; |
|
btn.onpointerenter = preview; |
|
|
|
|
|
btn.onclick = async () => { |
|
|
|
lastHoveredId = i; |
|
stickyId = i; |
|
drawNeighborhood(i); |
|
|
|
|
|
const cur = normalizeText(textIn.value); |
|
textIn.value = cur + tok; |
|
await tokenize(textIn.value); |
|
await predict(); |
|
}; |
|
|
|
topkEl.appendChild(btn); |
|
} |
|
|
|
|
|
if (stickyId != null) { |
|
drawNeighborhood(stickyId); |
|
} |
|
|
|
if (!warmed) { warmed = true; setStatus('Ready.'); } |
|
} catch (e) { |
|
console.error(e); showError(e); setStatus('Error'); |
|
} finally { |
|
busy = false; |
|
} |
|
} |
|
|
|
|
|
function getEmbState(){ |
|
if(!EmbCache[currentModel]) EmbCache[currentModel]={coords:null,neigh:null,keySet:null,keyMode:null,normIndex:null,baseDrawn:false}; |
|
return EmbCache[currentModel]; |
|
} |
|
|
|
function normalizePiece(s){ |
|
return (s || '') |
|
.replaceAll('▁', ' ') |
|
.replaceAll('Ġ', ' ') |
|
.replace(/\s+/g,' ') |
|
.trim() |
|
.toLowerCase(); |
|
} |
|
function detectKeyMode(coords){ |
|
const keys = Object.keys(coords); |
|
const numeric = keys.length && keys.every(k => String(+k) === k); |
|
return numeric ? 'id' : 'token'; |
|
} |
|
|
|
async function ensureEmbeddings(){ |
|
const emb=getEmbState(); |
|
if(emb.coords && emb.neigh && emb.keySet) return emb; |
|
|
|
const files=MODELS[currentModel].emb; |
|
emb.coords = await fetch(files.coords).then(r=>r.json()); |
|
emb.neigh = await fetch(files.neigh ).then(r=>r.json()); |
|
emb.keyMode = detectKeyMode(emb.coords); |
|
emb.keySet = new Set(Object.keys(emb.coords)); |
|
emb.baseDrawn = false; |
|
|
|
emb.normIndex = new Map(); |
|
if (emb.keyMode === 'token') { |
|
for (const k of emb.keySet) { |
|
const nk = normalizePiece(k); |
|
if (!emb.normIndex.has(nk)) emb.normIndex.set(nk, k); |
|
} |
|
} |
|
|
|
resizeCanvas(true); |
|
return emb; |
|
} |
|
|
|
function idToCandidates(id){ |
|
const c = []; |
|
c.push(String(id)); |
|
try { |
|
if (tokenizer.id_to_token) { |
|
const piece = tokenizer.id_to_token(id); |
|
if (piece) { |
|
c.push(piece); |
|
const deSp = piece.replace(/^▁/, ' ').replace(/^Ġ/, ' '); |
|
c.push(deSp); |
|
if (!piece.startsWith(' ')) c.push(' ' + piece); |
|
if (!deSp.startsWith(' ')) c.push(' ' + deSp); |
|
c.push(piece.toLowerCase(), deSp.toLowerCase()); |
|
} |
|
} |
|
} catch {} |
|
try { |
|
const dec = decodeId(id); |
|
if (dec) { |
|
c.push(dec); |
|
if (!dec.startsWith(' ')) c.push(' ' + dec); |
|
c.push(dec.toLowerCase()); |
|
c.push('▁' + dec.replace(/^\s/,'')); |
|
c.push('Ġ' + dec.replace(/^\s/,'')); |
|
} |
|
} catch {} |
|
return Array.from(new Set(c)); |
|
} |
|
|
|
function resolveCoordKey(emb, id){ |
|
for (const k of idToCandidates(id)) { |
|
if (emb.keySet.has(k)) return k; |
|
} |
|
if (emb.keyMode === 'id') return null; |
|
|
|
const base = (tokenizer.id_to_token?.(id)) || decodeId(id) || ''; |
|
const norm = normalizePiece(base); |
|
if (norm && emb.normIndex?.has(norm)) return emb.normIndex.get(norm); |
|
|
|
if (norm && emb.normIndex) { |
|
let candidate = null, candLen = Infinity; |
|
for (const [nk, original] of emb.normIndex.entries()) { |
|
if (nk.includes(norm) || norm.includes(nk)) { |
|
if (nk.length < candLen) { candidate = original; candLen = nk.length; } |
|
} |
|
} |
|
if (candidate) return candidate; |
|
} |
|
return null; |
|
} |
|
|
|
function getNeighborList(emb, coordKey, id){ |
|
const N = emb.neigh?.neighbors || {}; |
|
let list = N[coordKey]; |
|
if (!list) list = N[String(id)]; |
|
if (!list) list = N[id]; |
|
if (!list) { |
|
for (const k of idToCandidates(id)) { if (N[k]) { list = N[k]; break; } } |
|
} |
|
return Array.isArray(list) ? list : []; |
|
} |
|
|
|
function mapNeighborEntry(emb, entry){ |
|
const [nid, sim] = entry; |
|
if (typeof nid === 'string' && emb.keySet.has(nid)) return [nid, sim]; |
|
const maybe = typeof nid === 'number' ? nid : +nid; |
|
if (!Number.isNaN(maybe)) { |
|
const k = resolveCoordKey(emb, maybe); |
|
if (k) return [k, sim]; |
|
} |
|
if (typeof nid === 'string') { |
|
const nk = normalizePiece(nid); |
|
const hit = emb.normIndex?.get(nk); |
|
if (hit) return [hit, sim]; |
|
} |
|
return null; |
|
} |
|
|
|
function getBounds(coords){ const pts=Object.values(coords); let minX=Infinity,minY=Infinity,maxX=-Infinity,maxY=-Infinity; for(const [x,y] of pts){ if(x<minX)minX=x; if(y<minY)minY=y; if(x>maxX)maxX=x; if(y>maxY)maxY=y; } return {minX,minY,maxX,maxY}; } |
|
function makeToXY(coords){ |
|
const {minX,minY,maxX,maxY}=getBounds(coords); |
|
const pad=18, w=canvas.width-pad*2, h=canvas.height-pad*2; |
|
return ([x,y])=>{const nx=(x-minX)/(maxX-minX); const ny=(y-minY)/(maxY-minY); return [pad+nx*w, pad+(1-ny)*h];}; |
|
} |
|
function drawBase(emb,toXY){ |
|
ctx.clearRect(0,0,canvas.width,canvas.height); |
|
ctx.fillStyle='#1a2a3a'; |
|
for(const k in emb.coords){ |
|
const [x,y]=toXY(emb.coords[k]); |
|
ctx.beginPath(); ctx.arc(x,y,2,0,Math.PI*2); ctx.fill(); |
|
} |
|
emb.baseDrawn=true; |
|
} |
|
|
|
async function drawNeighborhood(tokenId){ |
|
const emb = await ensureEmbeddings(); |
|
|
|
const key = resolveCoordKey(emb, tokenId); |
|
if (!key) { |
|
embInfo.innerHTML = '<span class="warn">Neighborhood unavailable for this token (not in the current map).</span>'; |
|
nbrsEl.innerHTML = ''; |
|
if (!emb.baseDrawn) { const toXY = makeToXY(emb.coords); drawBase(emb, toXY); } |
|
return; |
|
} |
|
|
|
const toXY = makeToXY(emb.coords); |
|
drawBase(emb, toXY); |
|
|
|
const targetXY = toXY(emb.coords[key]); |
|
const rawList = getNeighborList(emb, key, tokenId); |
|
const list = rawList.map(e => mapNeighborEntry(emb, e)).filter(Boolean); |
|
|
|
|
|
ctx.fillStyle = '#93c5fd'; |
|
for (const [nk] of list){ |
|
const pt = emb.coords[nk]; |
|
if (!pt) continue; |
|
const [x,y] = toXY(pt); |
|
ctx.beginPath(); ctx.arc(x, y, 3.4, 0, Math.PI*2); ctx.fill(); |
|
} |
|
|
|
ctx.fillStyle = '#22d3ee'; |
|
ctx.beginPath(); ctx.arc(targetXY[0], targetXY[1], 4.8, 0, Math.PI*2); ctx.fill(); |
|
|
|
|
|
nbrsEl.innerHTML = ''; |
|
embInfo.textContent = 'Nearest neighbors:'; |
|
for (const [nk, sim] of list.slice(0,18)){ |
|
const label = (String(+nk) === nk) |
|
? (decodeId(+nk) || (tokenizer.id_to_token ? tokenizer.id_to_token(+nk) : String(nk))) |
|
: nk.replace(/^▁/,' ').replace(/^Ġ/,' '); |
|
const b = document.createElement('div'); |
|
b.className = 'chip'; |
|
b.textContent = `${label} ${(sim*100).toFixed(1)}%`; |
|
nbrsEl.appendChild(b); |
|
} |
|
} |
|
|
|
|
|
function resizeCanvas(force=false){ |
|
const dpr = Math.min(2, window.devicePixelRatio || 1); |
|
const rect = canvas.getBoundingClientRect(); |
|
const w = Math.max(1, Math.round(rect.width * dpr)); |
|
const h = Math.max(1, Math.round(rect.height * dpr)); |
|
if (force || canvas.width !== w || canvas.height !== h){ |
|
canvas.width = w; |
|
canvas.height = h; |
|
const emb = getEmbState(); |
|
emb.baseDrawn = false; |
|
if (emb.coords){ |
|
if (lastHoveredId != null) { drawNeighborhood(lastHoveredId); } |
|
else { const toXY = makeToXY(emb.coords); drawBase(emb, toXY); } |
|
} |
|
} |
|
} |
|
window.addEventListener('resize', () => resizeCanvas(false)); |
|
|
|
|
|
|
|
let debounceId; |
|
['input','change'].forEach(ev=>{ textIn.addEventListener(ev,()=>{ clearTimeout(debounceId); debounceId=setTimeout(async()=>{await tokenize(textIn.value); predict();},150); }); }); |
|
modelSel.addEventListener('change',async e=>{ const key=e.target.value; setStatus(`Switching to ${MODELS[key].label}…`); await loadModel(key); await tokenize(textIn.value??''); await predict(); }); |
|
|
|
|
|
await loadModel('qwen3'); |
|
modelSel.value = 'qwen3'; |
|
await tokenize(textIn.value??''); |
|
resizeCanvas(true); |
|
await predict(); |
|
</script> |
|
</body> |
|
</html> |
|
|