Next-Token-Predictor / index.html
PeterPinetree's picture
Update index.html
37c797e verified
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<title>Next Token Predictor</title>
<style>
:root{
--bg:#0b0f14; --text:#fff; --muted:#9aa4b2; --accent:#38bdf8; --border:#1f2a3a;
--chip:#111827; --chip-border:#263246; --chip-hover:#1a2434;
--mono: ui-monospace,Menlo,Consolas,monospace; --sans: system-ui, -apple-system,"Segoe UI", Roboto, Arial;
}
*{box-sizing:border-box}
body{margin:0;background:radial-gradient(1000px 600px at 50% -80px,#0c162a 15%,#081019 40%,var(--bg) 68%);color:var(--text);font-family:var(--sans)}
.wrap{max-width:1100px;margin:0 auto;padding:16px}
h1{margin:.2rem 0 .25rem;font-size:2.1rem;color:var(--accent)}
.sub{color:var(--muted);margin:0 0 .8rem}
/* Main two-column (Left: Top-10, Right: Map) */
.grid{display:grid;gap:12px;grid-template-columns:0.35fr 0.65fr}
@media (max-width:900px){.grid{grid-template-columns:1fr}.row{flex-wrap:wrap}}
.row{display:flex;gap:.6rem;align-items:center}
.card{background:linear-gradient(180deg,#0c1624,#0a1220);border:1px solid var(--border);border-radius:14px;padding:12px}
select,input{border-radius:10px;border:1px solid var(--border);background:#0a1220;color:var(--text);padding:.6rem .8rem;outline:none}
select:focus,input:focus{border-color:var(--accent)}
#status{color:var(--muted);font-size:.9rem}
/* Token chips (neighbors) */
.tokens{display:flex;gap:.4rem;flex-wrap:wrap}
.chip{border:1px solid var(--chip-border);background:var(--chip);padding:.35rem .5rem;border-radius:10px;font-family:var(--mono);color:var(--text);}
/* Top-10 list */
#topk{display:flex;flex-direction:column;gap:.4rem;padding-right:4px}
.k{
padding:.45rem .6rem;border-radius:10px;background:#102133;border:1px solid #1c2b44;
font-family:var(--mono);cursor:pointer;color:var(--text);
display:flex;align-items:center;justify-content:space-between;width:100%;text-align:left;
}
.k:hover{border-color:var(--accent)}
.note{color:var(--muted);font-size:.82rem}
/* Neighborhood viewer */
#emb .panel{
display:grid;
grid-template-columns:minmax(0,1fr) 260px; /* map grows, sidebar fixed */
gap:12px;
align-items:start;
}
/* Responsive canvas – take full width of its grid cell, keep aspect */
#scatter{
width:100%;
height:auto;
aspect-ratio:4/3; /* desktop aspect */
border-radius:10px;
background:#09121d;border:1px solid var(--border)
}
#nbrs{align-content:flex-start}
/* Mobile layout: stack map on top; neighbors in one column */
@media (max-width:700px){
#emb .panel{
grid-template-columns:1fr; /* stack */
grid-template-rows:auto auto;
}
#scatter{ aspect-ratio:1/1; } /* square-ish map on phones */
#nbrs{
display:grid; /* single-column neighbors */
grid-template-columns:1fr;
gap:.5rem;
}
.chip{ width:100%; } /* chips fill width neatly */
}
.legend{display:flex;gap:10px;align-items:center;margin:.25rem 0 .5rem}
.dot{width:10px;height:10px;border-radius:50%}
.all{background:#1a2a3a}
.target{background:#22d3ee}
.nb{background:#93c5fd}
.warn{color:#ffd79a}
.footer{margin-top:18px;text-align:center;color:var(--muted);font-size:.9rem}
.footer a{color:#8fd6ff;text-decoration:none}
.err{margin-top:8px;background:#1f2937;border:1px solid #374151;color:#ffb4b4;padding:8px 10px;border-radius:10px;display:none}
</style>
</head>
<body>
<main class="wrap">
<h1>Next Token Predictor</h1>
<div class="sub">Type a sentence to see the AI’s next-token guesses. Click to add a token, or hover to find similar ones.</div>
<section class="card">
<div class="row" style="gap:12px">
<div class="row">
<label style="margin-right:.5rem">Model</label>
<select id="model">
<option value="distilgpt2">distilgpt2</option>
<option value="qwen3" selected>Qwen3-0.6B</option>
</select>
</div>
<input id="text" placeholder="Enter your text here..." style="flex:1;min-width:240px" />
<div id="status">Loading…</div>
</div>
<div id="error" class="err"></div>
</section>
<section class="grid">
<article class="card">
<h3 style="margin:.2rem 0 .6rem">Top-10 next tokens</h3>
<div id="topk"></div>
</article>
<article id="emb" class="card">
<h3 style="margin:.2rem 0 .6rem">Semantic neighborhood</h3>
<div class="legend">
<div class="dot all"></div><div class="note">All tokens</div>
<div class="dot nb"></div><div class="note">Similar tokens</div>
<div class="dot target"></div><div class="note">Your token</div>
</div>
<div class="panel">
<canvas id="scatter" width="600" height="520"></canvas>
<div>
<div class="note" id="embInfo">Hover a suggestion to explore.</div>
<div id="nbrs" class="tokens" style="margin-top:.5rem"></div>
</div>
</div>
<div class="note" style="margin-top:.6rem">
Each dot is a token. Nearby dots have similar meanings. The bright dot is your chosen token.
Percentages show how closely each neighbor relates — higher means more similar.
</div>
</article>
</section>
<div class="footer">
Built by Peter Adams • Powered in your browser by <a href="https://xenova.github.io/transformers.js/" target="_blank" rel="noreferrer">Transformers.js</a>.
</div>
</main>
<script type="module">
// ---- Robust ESM loader ----
async function loadTransformers() {
const urls = [
'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.2/+esm',
'https://esm.run/@huggingface/transformers@3.7.2',
'https://esm.sh/@huggingface/transformers@3.7.2',
];
for (const u of urls) {
try {
const m = await import(u);
if (m?.env && m.AutoTokenizer && m.AutoModelForCausalLM) return m;
} catch {}
}
throw new Error('Failed to load @huggingface/transformers (ESM).');
}
const tf = await loadTransformers();
const DEVICE = (navigator.gpu ? 'webgpu' : 'wasm');
tf.env.useBrowserCache = true;
tf.env.allowRemoteModels = true;
tf.env.allowLocalModels = false;
// UI refs
const $ = s => document.querySelector(s);
const textIn = $('#text');
const statusEl = $('#status');
const topkEl = $('#topk');
const nbrsEl = $('#nbrs');
const embInfo = $('#embInfo');
const canvas = $('#scatter');
const ctx = canvas.getContext('2d');
const errBox = $('#error');
const modelSel = $('#model');
// Model registry
const MODELS = {
distilgpt2: {
id: 'Xenova/distilgpt2',
emb: {
coords: 'assets/embeddings/pca_top5k_coords.json',
neigh : 'assets/embeddings/neighbors_top5k_k40.json'
},
label: 'distilgpt2'
},
qwen3: {
id: 'onnx-community/Qwen3-0.6B-ONNX',
emb: {
coords: 'assets/embeddings/qwen_pca_top5k_coords.json',
neigh : 'assets/embeddings/qwen_neighbors_top5k_k40.json'
},
label: 'Qwen3-0.6B'
}
};
// State
let tokenizer = null, model = null;
let currentModel = 'qwen3';
let busy = false, flight = 0, warmed = false;
const EmbCache = {};
let lastHoveredId = null; // remember last hovered token for redraws on resize
let stickyId = null; // when you click/tap, persist this after predict()
function setStatus(m){ statusEl.textContent = m; }
function showError(e){ errBox.style.display='block'; errBox.textContent = e?.message || String(e); }
function clearError(){ errBox.style.display='none'; errBox.textContent=''; }
// Load model
async function loadModel(modelKey){
if (!MODELS[modelKey]) {
throw new Error(`Unknown model key "${modelKey}"`);
}
currentModel = modelKey;
const conf = MODELS[modelKey];
// Reset click/hover + UI when switching models
lastHoveredId = null; // preview state
stickyId = null; // clicked selection state
embInfo.textContent = 'Hover a suggestion to explore.';
nbrsEl.innerHTML = '';
try { ctx.clearRect(0, 0, canvas.width, canvas.height); } catch {}
// (Optional) mark the emb cache for this model as needing a redraw
if (EmbCache[currentModel]) {
EmbCache[currentModel].baseDrawn = false;
}
clearError();
setStatus(`Loading ${conf.label} tokenizer…`);
tokenizer = await tf.AutoTokenizer.from_pretrained(conf.id);
setStatus(`Loading ${conf.label} model…`);
model = await tf.AutoModelForCausalLM.from_pretrained(conf.id, { device: DEVICE });
setStatus('Ready.');
warmed = false;
}
// Helpers
const softmax = arr => { const m=Math.max(...arr); const exps=arr.map(x=>Math.exp(x-m)); const s=exps.reduce((a,b)=>a+b,0); return exps.map(x=>x/s); };
const topK = (probs, k) => probs.map((p,i)=>[p,i]).sort((a,b)=>b[0]-a[0]).slice(0,k);
function normalizeText(x){ if (x==null) return ''; if (typeof x==='string') return x; if (Array.isArray(x)) return x.map(v=>String(v??'')).join(''); if (typeof x==='object'&&'text'in x) return normalizeText(x.text); return String(x); }
async function tokenize(text){ text=normalizeText(text||textIn?.value||''); if(!text.trim()) text=' '; const enc=await tokenizer(text,{add_special_tokens:false}); tokenize.lastEnc=enc; return enc; }
function decodeId(id){ try{return tokenizer.decode([id],{skip_special_tokens:false,clean_up_tokenization_spaces:false});}catch{return '';} }
// Predict
async function predict(){
if (!tokenizer || !model) return;
if (busy) return;
busy = true; clearError();
const myFlight = ++flight;
try {
const enc = tokenize.lastEnc ?? await tokenize();
const out = await model(enc);
if (myFlight !== flight) return;
const [ , T, V ] = out.logits.dims;
const start = (T - 1) * V;
const last = Array.from(out.logits.data.slice(start, start + V));
const probs = softmax(last);
const k = topK(probs, 10);
topkEl.innerHTML = '';
for (const [p, i] of k) {
let tok = decodeId(i);
if (tok === '') {
tok = tokenizer.id_to_token ? (tokenizer.id_to_token(i) ?? '(special/space)') : '(special/space)';
}
const btn = document.createElement('button');
btn.className = 'k';
btn.innerHTML = `<span>${tok}</span><span>${(p * 100).toFixed(1)}%</span>`;
// Hover/pen = preview (desktop keeps working)
const preview = () => { lastHoveredId = i; stickyId = null; drawNeighborhood(i); };
btn.onmouseenter = preview;
btn.onpointerenter = preview;
// Click/tap = append + lock neighborhood
btn.onclick = async () => {
// show neighborhood immediately for mobile
lastHoveredId = i;
stickyId = i;
drawNeighborhood(i);
// append token and re-run prediction
const cur = normalizeText(textIn.value);
textIn.value = cur + tok;
await tokenize(textIn.value);
await predict();
};
topkEl.appendChild(btn);
}
// If a token was clicked, keep its neighborhood visible after re-render
if (stickyId != null) {
drawNeighborhood(stickyId);
}
if (!warmed) { warmed = true; setStatus('Ready.'); }
} catch (e) {
console.error(e); showError(e); setStatus('Error');
} finally {
busy = false;
}
}
// ===== Embedding Viewer (robust key resolution) =====
function getEmbState(){
if(!EmbCache[currentModel]) EmbCache[currentModel]={coords:null,neigh:null,keySet:null,keyMode:null,normIndex:null,baseDrawn:false};
return EmbCache[currentModel];
}
function normalizePiece(s){
return (s || '')
.replaceAll('▁', ' ')
.replaceAll('Ġ', ' ')
.replace(/\s+/g,' ')
.trim()
.toLowerCase();
}
function detectKeyMode(coords){
const keys = Object.keys(coords);
const numeric = keys.length && keys.every(k => String(+k) === k);
return numeric ? 'id' : 'token';
}
async function ensureEmbeddings(){
const emb=getEmbState();
if(emb.coords && emb.neigh && emb.keySet) return emb;
const files=MODELS[currentModel].emb;
emb.coords = await fetch(files.coords).then(r=>r.json());
emb.neigh = await fetch(files.neigh ).then(r=>r.json());
emb.keyMode = detectKeyMode(emb.coords);
emb.keySet = new Set(Object.keys(emb.coords));
emb.baseDrawn = false;
emb.normIndex = new Map();
if (emb.keyMode === 'token') {
for (const k of emb.keySet) {
const nk = normalizePiece(k);
if (!emb.normIndex.has(nk)) emb.normIndex.set(nk, k);
}
}
// Ensure canvas backing store matches CSS size
resizeCanvas(true);
return emb;
}
function idToCandidates(id){
const c = [];
c.push(String(id));
try {
if (tokenizer.id_to_token) {
const piece = tokenizer.id_to_token(id);
if (piece) {
c.push(piece);
const deSp = piece.replace(/^▁/, ' ').replace(/^Ġ/, ' ');
c.push(deSp);
if (!piece.startsWith(' ')) c.push(' ' + piece);
if (!deSp.startsWith(' ')) c.push(' ' + deSp);
c.push(piece.toLowerCase(), deSp.toLowerCase());
}
}
} catch {}
try {
const dec = decodeId(id);
if (dec) {
c.push(dec);
if (!dec.startsWith(' ')) c.push(' ' + dec);
c.push(dec.toLowerCase());
c.push('▁' + dec.replace(/^\s/,''));
c.push('Ġ' + dec.replace(/^\s/,''));
}
} catch {}
return Array.from(new Set(c));
}
function resolveCoordKey(emb, id){
for (const k of idToCandidates(id)) {
if (emb.keySet.has(k)) return k;
}
if (emb.keyMode === 'id') return null;
const base = (tokenizer.id_to_token?.(id)) || decodeId(id) || '';
const norm = normalizePiece(base);
if (norm && emb.normIndex?.has(norm)) return emb.normIndex.get(norm);
if (norm && emb.normIndex) {
let candidate = null, candLen = Infinity;
for (const [nk, original] of emb.normIndex.entries()) {
if (nk.includes(norm) || norm.includes(nk)) {
if (nk.length < candLen) { candidate = original; candLen = nk.length; }
}
}
if (candidate) return candidate;
}
return null;
}
function getNeighborList(emb, coordKey, id){
const N = emb.neigh?.neighbors || {};
let list = N[coordKey];
if (!list) list = N[String(id)];
if (!list) list = N[id];
if (!list) {
for (const k of idToCandidates(id)) { if (N[k]) { list = N[k]; break; } }
}
return Array.isArray(list) ? list : [];
}
function mapNeighborEntry(emb, entry){
const [nid, sim] = entry;
if (typeof nid === 'string' && emb.keySet.has(nid)) return [nid, sim];
const maybe = typeof nid === 'number' ? nid : +nid;
if (!Number.isNaN(maybe)) {
const k = resolveCoordKey(emb, maybe);
if (k) return [k, sim];
}
if (typeof nid === 'string') {
const nk = normalizePiece(nid);
const hit = emb.normIndex?.get(nk);
if (hit) return [hit, sim];
}
return null;
}
function getBounds(coords){ const pts=Object.values(coords); let minX=Infinity,minY=Infinity,maxX=-Infinity,maxY=-Infinity; for(const [x,y] of pts){ if(x<minX)minX=x; if(y<minY)minY=y; if(x>maxX)maxX=x; if(y>maxY)maxY=y; } return {minX,minY,maxX,maxY}; }
function makeToXY(coords){
const {minX,minY,maxX,maxY}=getBounds(coords);
const pad=18, w=canvas.width-pad*2, h=canvas.height-pad*2;
return ([x,y])=>{const nx=(x-minX)/(maxX-minX); const ny=(y-minY)/(maxY-minY); return [pad+nx*w, pad+(1-ny)*h];};
}
function drawBase(emb,toXY){
ctx.clearRect(0,0,canvas.width,canvas.height);
ctx.fillStyle='#1a2a3a';
for(const k in emb.coords){
const [x,y]=toXY(emb.coords[k]);
ctx.beginPath(); ctx.arc(x,y,2,0,Math.PI*2); ctx.fill();
}
emb.baseDrawn=true;
}
async function drawNeighborhood(tokenId){
const emb = await ensureEmbeddings();
const key = resolveCoordKey(emb, tokenId);
if (!key) {
embInfo.innerHTML = '<span class="warn">Neighborhood unavailable for this token (not in the current map).</span>';
nbrsEl.innerHTML = '';
if (!emb.baseDrawn) { const toXY = makeToXY(emb.coords); drawBase(emb, toXY); }
return;
}
const toXY = makeToXY(emb.coords);
drawBase(emb, toXY);
const targetXY = toXY(emb.coords[key]);
const rawList = getNeighborList(emb, key, tokenId);
const list = rawList.map(e => mapNeighborEntry(emb, e)).filter(Boolean);
// neighbors
ctx.fillStyle = '#93c5fd';
for (const [nk] of list){
const pt = emb.coords[nk];
if (!pt) continue;
const [x,y] = toXY(pt);
ctx.beginPath(); ctx.arc(x, y, 3.4, 0, Math.PI*2); ctx.fill();
}
// target
ctx.fillStyle = '#22d3ee';
ctx.beginPath(); ctx.arc(targetXY[0], targetXY[1], 4.8, 0, Math.PI*2); ctx.fill();
// chips
nbrsEl.innerHTML = '';
embInfo.textContent = 'Nearest neighbors:';
for (const [nk, sim] of list.slice(0,18)){
const label = (String(+nk) === nk)
? (decodeId(+nk) || (tokenizer.id_to_token ? tokenizer.id_to_token(+nk) : String(nk)))
: nk.replace(/^▁/,' ').replace(/^Ġ/,' ');
const b = document.createElement('div');
b.className = 'chip';
b.textContent = `${label} ${(sim*100).toFixed(1)}%`;
nbrsEl.appendChild(b);
}
}
// ===== Responsive canvas (keeps drawing buffer in sync with CSS size) =====
function resizeCanvas(force=false){
const dpr = Math.min(2, window.devicePixelRatio || 1);
const rect = canvas.getBoundingClientRect();
const w = Math.max(1, Math.round(rect.width * dpr));
const h = Math.max(1, Math.round(rect.height * dpr));
if (force || canvas.width !== w || canvas.height !== h){
canvas.width = w;
canvas.height = h;
const emb = getEmbState();
emb.baseDrawn = false;
if (emb.coords){
if (lastHoveredId != null) { drawNeighborhood(lastHoveredId); }
else { const toXY = makeToXY(emb.coords); drawBase(emb, toXY); }
}
}
}
window.addEventListener('resize', () => resizeCanvas(false));
// ==========================================================================
// Events
let debounceId;
['input','change'].forEach(ev=>{ textIn.addEventListener(ev,()=>{ clearTimeout(debounceId); debounceId=setTimeout(async()=>{await tokenize(textIn.value); predict();},150); }); });
modelSel.addEventListener('change',async e=>{ const key=e.target.value; setStatus(`Switching to ${MODELS[key].label}…`); await loadModel(key); await tokenize(textIn.value??''); await predict(); });
// Kickoff (Qwen default)
await loadModel('qwen3');
modelSel.value = 'qwen3';
await tokenize(textIn.value??'');
resizeCanvas(true); // ensure correct backing store before first draw
await predict();
</script>
</body>
</html>