programindz's picture
removed translation
f1801f1
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>🎤 Kashmiri Real-Time ASR </title>
<style>
/* Page layout */
body {
font-family: "Segoe UI", sans-serif;
background-color: #f5f6fa;
margin: 0;
padding: 2rem;
color: #2f3640;
display: flex;
flex-direction: column;
align-items: center;
}
h1 {
font-size: 2rem;
margin-bottom: 1rem;
}
section {
width: 100%;
max-width: 900px;
margin-bottom: 1.5rem;
background: white;
border-radius: 8px;
padding: 1rem;
box-shadow: 0 0 8px rgba(0,0,0,0.1);
}
section h2 {
margin-top: 0;
font-size: 1.2rem;
border-bottom: 1px solid #dcdde1;
padding-bottom: 0.5rem;
color: #2f3640;
}
/* Mic info and volume */
.mic-info {
font-size: 0.9rem;
color: #353b48;
margin-top: 1rem;
}
.mic-info .label {
font-weight: bold;
}
#vol {
width: 100%;
max-width: 500px;
height: 20px;
margin-top: 0.5rem;
appearance: none;
}
#vol::-webkit-progress-bar {
background-color: #dcdde1;
border-radius: 8px;
}
#vol::-webkit-progress-value {
background-color: #44bd32;
border-radius: 8px;
transition: width 0.2s;
}
#vol::-moz-progress-bar {
background-color: #44bd32;
border-radius: 8px;
transition: width 0.2s;
}
/* Transcript */
.transcript-container {
margin-top: 0.5rem;
padding: 0.5rem;
background: #fff;
border: 1px solid #dcdde1;
border-radius: 8px;
max-height: 300px;
overflow-y: auto;
white-space: pre-wrap;
font-size: 1.1rem;
color: #353b48;
}
.transcript-container .final {
color: green;
display: inline;
margin-right: 0.5em;
}
.transcript-container .interim {
color: red;
display: inline;
}
#translateBtn {
background: linear-gradient(135deg, #1d241b, #2d422a);
color: white;
font-size: 1rem;
padding: 0.75rem 1.5rem;
border: none;
border-radius: 8px;
cursor: pointer;
}
#translateBtn:hover {
background: linear-gradient(135deg, #6e8167, #7e9178);
transform: translateY(-2px);
}
#translateBtn:active {
transform: scale(0.98);
}
</style>
</head>
<body>
<h1>Kashmiri کٲشُر Streaming Speech Recognition</h1>
<h2>🎤 Speak into Your Microphone</h2>
<section class="section--mic">
<h2>Microphone</h2>
<div class="mic-info">
<span class="label">Device:</span> <span id="micName">Detecting…</span><br>
<span class="label">Sample Rate:</span> <span id="sampleRate">-</span> Hz
</div>
<progress id="vol" max="1" value="0"></progress>
</section>
<section class="section--transcript">
<h2>Transcript</h2>
<div id="transcript" class="transcript-container"></div>
</section>
<!-- <section class="section--translate"> -->
<!-- <h2>Translation</h2> -->
<!-- <button id="translateBtn" onclick="sendForTranslation()">🌐 Translate to English</button> -->
<!-- <div id="translatedText" class="transcript-container" style="margin-top: 1rem;">…</div> -->
<!-- </section> -->
</section>
<script>
let orig_sample_rate;
let ws;
// let lastTranslatedText = "";
// let lastTranslation = "";
const vol = document.getElementById("vol");
const transcript = document.getElementById("transcript");
const micNameElem = document.getElementById("micName");
const sampleRateElem = document.getElementById("sampleRate");
navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
const context = new AudioContext();
orig_sample_rate = context.sampleRate;
// Update mic info in UI
const track = stream.getAudioTracks()[0];
micNameElem.textContent = track.label || 'Unknown';
sampleRateElem.textContent = orig_sample_rate;
// Now that we know the sample rate, open the WS
ws = new WebSocket(`wss://${location.host}/ws`);
ws.onopen = () => sendConfig();
ws.onerror = err => console.error("WebSocket error:", err);
ws.onclose = () => console.log("WebSocket closed");
// Unified handler for partial + final messages
ws.onmessage = e => {
const msg = JSON.parse(e.data);
console.log(msg)
// 1) update volume bar
if (msg.volume !== undefined) {
vol.value = Math.min(msg.volume, 1.0);
}
// 2) distinguish “final” vs “partial”
if (msg.final !== undefined) {
finalUtterances.push(msg.final.trim());
currentInterim = "";
} else if (msg.partial !== undefined) {
currentInterim = msg.partial;
}
// 3) rebuild the full, colored transcript
transcript.innerHTML =
finalUtterances
.map(u => `<span class="final">${u}</span>`)
.join("") /* margin in CSS handles spacing */
+ (currentInterim
? ` <span class="interim">${currentInterim}</span>`
: "");
// 4) auto-scroll to newest text
transcript.scrollTop = transcript.scrollHeight;
};
const source = context.createMediaStreamSource(stream);
const processor = context.createScriptProcessor(4096, 1, 1);
source.connect(processor);
processor.connect(context.destination);
processor.onaudioprocess = e => {
const input = e.inputBuffer.getChannelData(0);
ws.send(new Float32Array(input).buffer);
};
});
// 2) Declare state for final/interim rendering
const finalUtterances = [];
let currentInterim = "";
// 4) Extend sendConfig() to include epRule1/2/3
function sendConfig() {
if (ws && ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify({
type: "config",
sampleRate: orig_sample_rate,
}));
}
}
// 6) Replace your existing ws.onmessage handler with this:
ws.onmessage = e => {
const msg = JSON.parse(e.data);
if (msg.volume !== undefined) {
vol.value = Math.min(msg.volume, 1.0);
}
if (msg.final !== undefined) {
// endpoint fired → lock in the final utterance
finalUtterances.push(msg.final.trim());
currentInterim = "";
} else if (msg.partial !== undefined) {
// update the rolling interim
currentInterim = msg.partial;
}
// rebuild the full transcript: green finals + red interim
transcript.innerHTML =
finalUtterances
.map(u => `<span class="final">${u}</span>`)
.join("") // no explicit space here, margin handles it
+ (currentInterim
? `<span class="interim">${currentInterim}</span>`
: "");
// always scroll to bottom
transcript.scrollTop = transcript.scrollHeight;
};
// This part is for translation
// async function sendForTranslation() {
// const fullText = finalUtterances.join(" ").trim();
// if (!fullText) {
// // alert("No transcript to translate.");
// console.log('[TRANSLATION] Nothing to translate')
// return;
// }
// if (fullText === lastTranslatedText) {
// console.log("Using cached translation");
// document.getElementById("translatedText").textContent = lastTranslation;
// return;
// }
// const response = await fetch("/translate", {
// method: "POST",
// headers: {
// "Content-Type": "application/json"
// },
// body: JSON.stringify({ text: fullText })
// });
// const data = await response.json();
// lastTranslatedText = fullText;
// lastTranslation = data.translation;
// // Display result
// document.getElementById("translatedText").textContent = data.translation;
// }
</script>
</body>
</html>