Xenova's picture
Xenova HF Staff
Upload 13 files
e9129d0 verified
raw
history blame
12.1 kB
import { useEffect, useState, useRef } from "react";
import { Mic, PhoneOff, ChevronDown } from "lucide-react";
import { INPUT_SAMPLE_RATE } from "./constants";
import WORKLET from "./play-worklet.js";
export default function App() {
const [callStartTime, setCallStartTime] = useState(null);
const [callStarted, setCallStarted] = useState(false);
const [playing, setPlaying] = useState(false);
const [voice, setVoice] = useState("af_heart");
const [voices, setVoices] = useState([]);
const [isListening, setIsListening] = useState(false);
const [isSpeaking, setIsSpeaking] = useState(false);
const [listeningScale, setListeningScale] = useState(1);
const [speakingScale, setSpeakingScale] = useState(1);
const [ripples, setRipples] = useState([]);
const [ready, setReady] = useState(false);
const [error, setError] = useState(null);
const [elapsedTime, setElapsedTime] = useState("00:00");
const worker = useRef(null);
const node = useRef(null);
useEffect(() => {
worker.current?.postMessage({
type: "set_voice",
voice,
});
}, [voice]);
useEffect(() => {
if (!callStarted) {
// Reset worker state after call ends
worker.current?.postMessage({
type: "end_call",
});
}
}, [callStarted]);
useEffect(() => {
if (callStarted && callStartTime) {
const interval = setInterval(() => {
const diff = Math.floor((Date.now() - callStartTime) / 1000);
const minutes = String(Math.floor(diff / 60)).padStart(2, "0");
const seconds = String(diff % 60).padStart(2, "0");
setElapsedTime(`${minutes}:${seconds}`);
}, 1000);
return () => clearInterval(interval);
} else {
setElapsedTime("00:00");
}
}, [callStarted, callStartTime]);
useEffect(() => {
worker.current ??= new Worker(new URL("./worker.js", import.meta.url), {
type: "module",
});
const onMessage = ({ data }) => {
console.log("Worker message:", data);
if (data.error) {
return onError(data.error);
}
switch (data.type) {
case "status":
if (data.status === "recording_start") {
setIsListening(true);
setIsSpeaking(false);
} else if (data.status === "recording_end") {
setIsListening(false);
} else if (data.status === "ready") {
setVoices(data.voices);
setReady(true);
}
break;
case "output":
if (!playing) {
node.current?.port.postMessage(data.result.audio);
setPlaying(true);
setIsSpeaking(true);
setIsListening(false);
}
break;
}
};
const onError = (err) => setError(err.message);
worker.current.addEventListener("message", onMessage);
worker.current.addEventListener("error", onError);
return () => {
worker.current.removeEventListener("message", onMessage);
worker.current.removeEventListener("error", onError);
};
}, []);
useEffect(() => {
if (!callStarted) return;
let worklet;
let inputAudioContext;
let source;
let ignore = false;
let outputAudioContext;
const audioStreamPromise = navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
echoCancellation: true,
autoGainControl: true,
noiseSuppression: true,
sampleRate: INPUT_SAMPLE_RATE,
},
});
audioStreamPromise
.then(async (stream) => {
if (ignore) return;
inputAudioContext = new (window.AudioContext ||
window.webkitAudioContext)({
sampleRate: INPUT_SAMPLE_RATE,
});
const analyser = inputAudioContext.createAnalyser();
analyser.fftSize = 256;
source = inputAudioContext.createMediaStreamSource(stream);
source.connect(analyser);
const inputDataArray = new Uint8Array(analyser.frequencyBinCount);
function calculateRMS(array) {
let sum = 0;
for (let i = 0; i < array.length; ++i) {
const normalized = array[i] / 128 - 1;
sum += normalized * normalized;
}
const rms = Math.sqrt(sum / array.length);
return rms;
}
await inputAudioContext.audioWorklet.addModule(
new URL("./vad-processor.js", import.meta.url),
);
worklet = new AudioWorkletNode(inputAudioContext, "vad-processor", {
numberOfInputs: 1,
numberOfOutputs: 0,
channelCount: 1,
channelCountMode: "explicit",
channelInterpretation: "discrete",
});
source.connect(worklet);
worklet.port.onmessage = (event) => {
const { buffer } = event.data;
worker.current?.postMessage({ type: "audio", buffer });
};
outputAudioContext = new AudioContext({
sampleRate: 24000,
});
outputAudioContext.resume();
const blob = new Blob([`(${WORKLET.toString()})()`], {
type: "application/javascript",
});
const url = URL.createObjectURL(blob);
await outputAudioContext.audioWorklet.addModule(url);
URL.revokeObjectURL(url);
node.current = new AudioWorkletNode(
outputAudioContext,
"buffered-audio-worklet-processor",
);
node.current.port.onmessage = (event) => {
if (event.data.type === "playback_ended") {
setPlaying(false);
setIsSpeaking(false);
worker.current?.postMessage({ type: "playback_ended" });
}
};
const outputAnalyser = outputAudioContext.createAnalyser();
outputAnalyser.fftSize = 256;
node.current.connect(outputAnalyser);
outputAnalyser.connect(outputAudioContext.destination);
const outputDataArray = new Uint8Array(
outputAnalyser.frequencyBinCount,
);
function updateVisualizers() {
analyser.getByteTimeDomainData(inputDataArray);
const rms = calculateRMS(inputDataArray);
const targetScale = 1 + Math.min(1.25 * rms, 0.25);
setListeningScale((prev) => prev + (targetScale - prev) * 0.25);
outputAnalyser.getByteTimeDomainData(outputDataArray);
const outputRMS = calculateRMS(outputDataArray);
const targetOutputScale = 1 + Math.min(1.25 * outputRMS, 0.25);
setSpeakingScale((prev) => prev + (targetOutputScale - prev) * 0.25);
requestAnimationFrame(updateVisualizers);
}
updateVisualizers();
})
.catch((err) => {
setError(err.message);
console.error(err);
});
return () => {
ignore = true;
audioStreamPromise.then((stream) =>
stream.getTracks().forEach((track) => track.stop()),
);
source?.disconnect();
worklet?.disconnect();
inputAudioContext?.close();
outputAudioContext?.close();
};
}, [callStarted]);
useEffect(() => {
if (!callStarted) return;
const interval = setInterval(() => {
const id = Date.now();
setRipples((prev) => [...prev, id]);
setTimeout(() => {
setRipples((prev) => prev.filter((r) => r !== id));
}, 1500);
}, 1000);
return () => clearInterval(interval);
}, [callStarted]);
return (
<div className="h-screen min-h-[240px] flex items-center justify-center bg-gray-50 p-4 relative">
<div className="h-full max-h-[320px] w-[640px] bg-white rounded-xl shadow-lg p-8 flex items-center justify-between space-x-16">
<div className="text-green-700 w-[140px]">
<div className="text-xl font-bold flex justify-between">
{voices?.[voice]?.name}
<span className="font-normal text-gray-500">{elapsedTime}</span>
</div>
<div className="text-base relative">
<button
type="button"
disabled={!ready}
className={`w-full flex items-center justify-between border border-gray-300 rounded-md transition-colors ${
ready
? "bg-transparent hover:border-gray-400"
: "bg-gray-100 opacity-50 cursor-not-allowed"
}`}
>
<span className="px-2 py-1">Select voice</span>
<ChevronDown className="absolute right-2" />
</button>
<select
value={voice}
onChange={(e) => setVoice(e.target.value)}
className="absolute inset-0 opacity-0 cursor-pointer"
disabled={!ready}
>
{Object.entries(voices).map(([key, v]) => (
<option key={key} value={key}>
{`${v.name} (${
v.language === "en-us" ? "American" : v.language
} ${v.gender})`}
</option>
))}
</select>
</div>
</div>
<div className="relative flex items-center justify-center w-32 h-32 flex-shrink-0 aspect-square">
{callStarted &&
ripples.map((id) => (
<div
key={id}
className="absolute inset-0 rounded-full border-2 border-green-200 pointer-events-none"
style={{ animation: "ripple 1.5s ease-out forwards" }}
/>
))}
<div className="absolute z-10 text-lg text-gray-700">
{!ready ? "Loading..." : ""}
{isListening && "Listening..."}
{isSpeaking && "Speaking..."}
</div>
{/* Pulsing loader while initializing */}
<div
className={`absolute w-32 h-32 rounded-full bg-green-200 ${
!ready ? "animate-ping opacity-75" : ""
}`}
style={{ animationDuration: "1.5s" }}
/>
{/* Main rings */}
<div
className={`absolute w-32 h-32 rounded-full shadow-inner transition-transform duration-300 ease-out bg-green-300 ${
!ready ? "opacity-0" : ""
}`}
style={{ transform: `scale(${speakingScale})` }}
/>
<div
className={`absolute w-32 h-32 rounded-full shadow-inner transition-transform duration-300 ease-out bg-green-200 ${
!ready ? "opacity-0" : ""
}`}
style={{ transform: `scale(${listeningScale})` }}
/>
</div>
<div className="space-y-4 w-[140px]">
{callStarted ? (
<button
className="flex items-center space-x-2 px-4 py-2 bg-red-100 text-red-700 rounded-md hover:bg-red-200"
onClick={() => {
setCallStarted(false);
setCallStartTime(null);
setPlaying(false);
setIsListening(false);
setIsSpeaking(false);
}}
>
<PhoneOff className="w-5 h-5" />
<span>End call</span>
</button>
) : (
<button
className={`flex items-center space-x-2 px-4 py-2 rounded-md ${
ready
? "bg-blue-100 text-blue-700 hover:bg-blue-200"
: "bg-blue-100 text-blue-700 opacity-50 cursor-not-allowed"
}`}
onClick={() => {
setCallStartTime(Date.now());
setCallStarted(true);
worker.current?.postMessage({ type: "start_call" });
}}
disabled={!ready}
>
<span>Start call</span>
</button>
)}
</div>
</div>
<div className="absolute bottom-4 text-sm">
Built with{" "}
<a
href="https://github.com/huggingface/transformers.js"
rel="noopener noreferrer"
target="_blank"
className="text-blue-600 hover:underline"
>
🤗 Transformers.js
</a>
</div>
</div>
);
}