|
<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8" /> |
|
<title>🎤 Kashmiri Real-Time ASR </title> |
|
<style> |
|
|
|
body { |
|
font-family: "Segoe UI", sans-serif; |
|
background-color: #f5f6fa; |
|
margin: 0; |
|
padding: 2rem; |
|
color: #2f3640; |
|
display: flex; |
|
flex-direction: column; |
|
align-items: center; |
|
} |
|
h1 { |
|
font-size: 2rem; |
|
margin-bottom: 1rem; |
|
} |
|
section { |
|
width: 100%; |
|
max-width: 900px; |
|
margin-bottom: 1.5rem; |
|
background: white; |
|
border-radius: 8px; |
|
padding: 1rem; |
|
box-shadow: 0 0 8px rgba(0,0,0,0.1); |
|
} |
|
section h2 { |
|
margin-top: 0; |
|
font-size: 1.2rem; |
|
border-bottom: 1px solid #dcdde1; |
|
padding-bottom: 0.5rem; |
|
color: #2f3640; |
|
} |
|
|
|
|
|
|
|
.mic-info { |
|
font-size: 0.9rem; |
|
color: #353b48; |
|
margin-top: 1rem; |
|
} |
|
.mic-info .label { |
|
font-weight: bold; |
|
} |
|
#vol { |
|
width: 100%; |
|
max-width: 500px; |
|
height: 20px; |
|
margin-top: 0.5rem; |
|
appearance: none; |
|
} |
|
#vol::-webkit-progress-bar { |
|
background-color: #dcdde1; |
|
border-radius: 8px; |
|
} |
|
#vol::-webkit-progress-value { |
|
background-color: #44bd32; |
|
border-radius: 8px; |
|
transition: width 0.2s; |
|
} |
|
#vol::-moz-progress-bar { |
|
background-color: #44bd32; |
|
border-radius: 8px; |
|
transition: width 0.2s; |
|
} |
|
|
|
|
|
.transcript-container { |
|
margin-top: 0.5rem; |
|
padding: 0.5rem; |
|
background: #fff; |
|
border: 1px solid #dcdde1; |
|
border-radius: 8px; |
|
max-height: 300px; |
|
overflow-y: auto; |
|
white-space: pre-wrap; |
|
font-size: 1.1rem; |
|
color: #353b48; |
|
} |
|
.transcript-container .final { |
|
color: green; |
|
display: inline; |
|
margin-right: 0.5em; |
|
} |
|
.transcript-container .interim { |
|
color: red; |
|
display: inline; |
|
} |
|
|
|
#translateBtn { |
|
background: linear-gradient(135deg, #1d241b, #2d422a); |
|
color: white; |
|
font-size: 1rem; |
|
padding: 0.75rem 1.5rem; |
|
border: none; |
|
border-radius: 8px; |
|
cursor: pointer; |
|
} |
|
|
|
#translateBtn:hover { |
|
background: linear-gradient(135deg, #6e8167, #7e9178); |
|
transform: translateY(-2px); |
|
} |
|
|
|
#translateBtn:active { |
|
transform: scale(0.98); |
|
} |
|
|
|
</style> |
|
</head> |
|
<body> |
|
<h1>Kashmiri کٲشُر Streaming Speech Recognition</h1> |
|
<h2>🎤 Speak into Your Microphone</h2> |
|
|
|
<section class="section--mic"> |
|
<h2>Microphone</h2> |
|
<div class="mic-info"> |
|
<span class="label">Device:</span> <span id="micName">Detecting…</span><br> |
|
<span class="label">Sample Rate:</span> <span id="sampleRate">-</span> Hz |
|
</div> |
|
<progress id="vol" max="1" value="0"></progress> |
|
</section> |
|
|
|
<section class="section--transcript"> |
|
<h2>Transcript</h2> |
|
<div id="transcript" class="transcript-container">…</div> |
|
</section> |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</section> |
|
|
|
<script> |
|
let orig_sample_rate; |
|
let ws; |
|
|
|
|
|
|
|
const vol = document.getElementById("vol"); |
|
const transcript = document.getElementById("transcript"); |
|
const micNameElem = document.getElementById("micName"); |
|
const sampleRateElem = document.getElementById("sampleRate"); |
|
|
|
|
|
navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => { |
|
const context = new AudioContext(); |
|
orig_sample_rate = context.sampleRate; |
|
|
|
|
|
const track = stream.getAudioTracks()[0]; |
|
micNameElem.textContent = track.label || 'Unknown'; |
|
sampleRateElem.textContent = orig_sample_rate; |
|
|
|
|
|
ws = new WebSocket(`wss://${location.host}/ws`); |
|
ws.onopen = () => sendConfig(); |
|
ws.onerror = err => console.error("WebSocket error:", err); |
|
ws.onclose = () => console.log("WebSocket closed"); |
|
|
|
|
|
ws.onmessage = e => { |
|
const msg = JSON.parse(e.data); |
|
console.log(msg) |
|
|
|
if (msg.volume !== undefined) { |
|
vol.value = Math.min(msg.volume, 1.0); |
|
} |
|
|
|
|
|
if (msg.final !== undefined) { |
|
finalUtterances.push(msg.final.trim()); |
|
currentInterim = ""; |
|
} else if (msg.partial !== undefined) { |
|
currentInterim = msg.partial; |
|
} |
|
|
|
|
|
transcript.innerHTML = |
|
finalUtterances |
|
.map(u => `<span class="final">${u}</span>`) |
|
.join("") |
|
+ (currentInterim |
|
? ` <span class="interim">${currentInterim}</span>` |
|
: ""); |
|
|
|
|
|
transcript.scrollTop = transcript.scrollHeight; |
|
}; |
|
|
|
|
|
const source = context.createMediaStreamSource(stream); |
|
const processor = context.createScriptProcessor(4096, 1, 1); |
|
source.connect(processor); |
|
processor.connect(context.destination); |
|
processor.onaudioprocess = e => { |
|
const input = e.inputBuffer.getChannelData(0); |
|
ws.send(new Float32Array(input).buffer); |
|
}; |
|
}); |
|
|
|
|
|
const finalUtterances = []; |
|
let currentInterim = ""; |
|
|
|
|
|
function sendConfig() { |
|
if (ws && ws.readyState === WebSocket.OPEN) { |
|
ws.send(JSON.stringify({ |
|
type: "config", |
|
sampleRate: orig_sample_rate, |
|
})); |
|
} |
|
} |
|
|
|
|
|
|
|
ws.onmessage = e => { |
|
const msg = JSON.parse(e.data); |
|
|
|
if (msg.volume !== undefined) { |
|
vol.value = Math.min(msg.volume, 1.0); |
|
} |
|
|
|
if (msg.final !== undefined) { |
|
|
|
finalUtterances.push(msg.final.trim()); |
|
currentInterim = ""; |
|
} else if (msg.partial !== undefined) { |
|
|
|
currentInterim = msg.partial; |
|
} |
|
|
|
|
|
transcript.innerHTML = |
|
finalUtterances |
|
.map(u => `<span class="final">${u}</span>`) |
|
.join("") |
|
+ (currentInterim |
|
? `<span class="interim">${currentInterim}</span>` |
|
: ""); |
|
|
|
|
|
transcript.scrollTop = transcript.scrollHeight; |
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</script> |
|
</body> |
|
</html> |
|
|