Virtual-Kimi / kimi-js /kimi-voices.js
VirtualKimi's picture
Upload 34 files
bcbb712 verified
// ===== KIMI VOICE MANAGEMENT MODULE =====
class KimiVoiceManager {
constructor(database, memory) {
this.db = database;
this.memory = memory;
this.isInitialized = false;
// Voice properties
this.speechSynthesis = window.speechSynthesis;
this.currentVoice = null;
this.availableVoices = [];
// Speech recognition
this.SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
this.recognition = null;
this.isListening = false;
this.isStoppingVolontaire = false;
// DOM elements
this.micButton = null;
// Real-time transcript overlay elements (shows live speech transcription and AI responses)
this.transcriptContainer = null; // Container for transcript overlay
this.transcriptText = null; // Text element displaying current transcript
// Callback for voice message analysis
this.onSpeechAnalysis = null;
// Reference to mic handler function for removal
this.handleMicClick = null;
this.transcriptHideTimeout = null;
this.listeningTimeout = null;
// Selected character for responses (will be updated from database)
this.selectedCharacter = window.KIMI_CONFIG?.DEFAULTS?.SELECTED_CHARACTER || "Kimi";
// Speaking flag
this.isSpeaking = false;
// Auto-stop listening duration (in milliseconds)
this.autoStopDuration = 15000; // 15 seconds
// Silence timeout after final transcript (in milliseconds)
this.silenceTimeout = 2200; // 2.2 seconds
// Track if microphone permission has been granted
this.micPermissionGranted = false;
// Debounced microphone toggle (centralized utility)
this._debouncedToggleMicrophone = window.KimiPerformanceUtils
? window.KimiPerformanceUtils.debounce(() => this._toggleMicrophoneCore(), 300, false, this)
: null;
// Browser detection
this.browser = this._detectBrowser();
}
// ===== INITIALIZATION =====
async init() {
// Avoid double initialization
if (this.isInitialized) {
console.log("VoiceManager already initialized, ignored");
return true;
}
try {
// Initialize DOM elements with verification
this.micButton = document.getElementById("mic-button");
this.transcriptContainer = document.querySelector(".transcript-container");
this.transcriptText = document.getElementById("transcript");
if (!this.micButton) {
if (window.KIMI_CONFIG?.DEBUG?.VOICE) {
console.warn("Microphone button not found in DOM!");
}
return false;
}
// Check transcript elements (non-critical, just warn)
if (!this.transcriptContainer) {
if (window.KIMI_CONFIG?.DEBUG?.VOICE) {
console.warn("Transcript container not found in DOM - transcript feature will be disabled");
}
}
if (!this.transcriptText) {
if (window.KIMI_CONFIG?.DEBUG?.VOICE) {
console.warn("Transcript text element not found in DOM - transcript feature will be disabled");
}
}
// Initialize voice synthesis
await this.initVoices();
// Only setup listener once during initialization
if (!this._voicesListenerSetup) {
this.setupVoicesChangedListener();
this._voicesListenerSetup = true;
}
this.setupLanguageSelector();
// Initialize speech recognition
this.setupSpeechRecognition();
this.setupMicrophoneButton();
// Check current microphone permission status
await this.checkMicrophonePermission();
// Initialize selected character with proper display name
if (this.db && typeof this.db.getSelectedCharacter === "function") {
const charKey = await this.db.getSelectedCharacter();
if (charKey && window.KIMI_CHARACTERS && window.KIMI_CHARACTERS[charKey]) {
// Use the display name, not the key
this.selectedCharacter = window.KIMI_CHARACTERS[charKey].name;
} else if (charKey) {
// Fallback to key if KIMI_CHARACTERS not available
this.selectedCharacter = charKey;
}
}
this.isInitialized = true;
return true;
} catch (error) {
console.error("Error during VoiceManager initialization:", error);
return false;
}
}
_detectBrowser() {
const ua = navigator.userAgent || "";
const isOpera = (!!window.opr && !!opr.addons) || ua.includes(" OPR/");
const isFirefox = typeof InstallTrigger !== "undefined" || ua.toLowerCase().includes("firefox");
const isSafari = /Safari\//.test(ua) && !/Chrom(e|ium)\//.test(ua) && !/Edg\//.test(ua);
const isEdge = /Edg\//.test(ua);
// Detect Brave explicitly: navigator.brave exists in many Brave builds, UA may also include 'Brave'
const isBrave =
(!!navigator.brave && typeof navigator.brave.isBrave === "function") || ua.toLowerCase().includes("brave");
const isChrome = /Chrome\//.test(ua) && !isEdge && !isOpera && !isBrave;
if (isFirefox) return "firefox";
if (isOpera) return "opera";
if (isBrave) return "brave";
if (isSafari) return "safari";
if (isEdge) return "edge";
if (isChrome) return "chrome";
return "unknown";
}
_getUnsupportedSRMessage() {
// Build an i18n key by browser, then fallback to English if translation system isn't ready
let key = "sr_not_supported_generic";
if (this.browser === "firefox") key = "sr_not_supported_firefox";
else if (this.browser === "opera") key = "sr_not_supported_opera";
else if (this.browser === "safari") key = "sr_not_supported_safari";
const translated = typeof window.kimiI18nManager?.t === "function" ? window.kimiI18nManager.t(key) : undefined;
// Many i18n libs return the key itself if missing; detect that and fall back to English
if (!translated || translated === key) {
if (key === "sr_not_supported_firefox") {
return "Speech recognition is not supported on Firefox. Please use Chrome, Edge, or Brave.";
}
if (key === "sr_not_supported_opera") {
return "Speech recognition may not work on Opera. Please try Chrome, Edge, or Brave.";
}
if (key === "sr_not_supported_safari") {
return "Speech recognition support varies on Safari. Prefer Chrome or Edge for best results.";
}
return "Speech recognition is not available in this browser.";
}
return translated;
}
// ===== MICROPHONE PERMISSION MANAGEMENT =====
async checkMicrophonePermission() {
try {
// Check if running on file:// protocol
if (window.location.protocol === "file:") {
if (window.KIMI_CONFIG?.DEBUG?.VOICE) {
console.log("🎀 Running on file:// protocol - microphone permissions will be requested each time");
}
this.micPermissionGranted = false;
return;
}
if (!navigator.permissions) {
if (window.KIMI_CONFIG?.DEBUG?.VOICE) {
console.log("🎀 Permissions API not available");
}
this.micPermissionGranted = false; // Set default state
return;
}
const permissionStatus = await navigator.permissions.query({ name: "microphone" });
this.micPermissionGranted = permissionStatus.state === "granted";
// Listen for permission changes
permissionStatus.addEventListener("change", () => {
this.micPermissionGranted = permissionStatus.state === "granted";
});
} catch (error) {
console.log("🎀 Could not check microphone permission:", error);
this.micPermissionGranted = false;
}
}
// ===== VOICE SYNTHESIS =====
async initVoices() {
// Prevent multiple simultaneous calls
if (this._initializingVoices) {
return;
}
this._initializingVoices = true;
this.availableVoices = this.speechSynthesis.getVoices();
// Resolve selectedLanguage before any early return so SR uses correct language
if (!this.selectedLanguage) {
try {
const selectedLanguage = await this.db?.getPreference("selectedLanguage", "en");
this.selectedLanguage = window.KimiLanguageUtils.normalizeLanguageCode(selectedLanguage || "en") || "en";
} catch (_) {
this.selectedLanguage = "en";
}
}
if (this.availableVoices.length === 0) {
this._initializingVoices = false;
return; // onvoiceschanged will retry later
}
const effectiveLang = await this.getEffectiveLanguage(this.selectedLanguage);
const savedVoice = await this.db?.getPreference("selectedVoice", "auto");
const filteredVoices = this.getVoicesForLanguage(effectiveLang);
if (savedVoice && savedVoice !== "auto") {
const foundVoice = filteredVoices.find(voice => voice.name === savedVoice);
if (foundVoice) {
this.currentVoice = foundVoice;
console.log(
`🎀 Voice restored from cache: "${foundVoice.name}" (${foundVoice.lang}) for language "${effectiveLang}"`
);
this.updateVoiceSelector();
this._initializingVoices = false;
return;
} else {
console.log(
`🎀 Saved voice "${savedVoice}" not compatible with language "${effectiveLang}", using auto-selection`
);
await this.db?.setPreference("selectedVoice", "auto");
}
}
// Prefer female voices if available in the language-compatible voices
// Use real voice names since voice.gender is rarely provided by browsers
const femaleVoice = filteredVoices.find(voice => {
const name = voice.name.toLowerCase();
// Common female voice names across different platforms
const femaleNames = [
// Microsoft voices
"aria",
"emma",
"jenny",
"michelle",
"karen",
"heather",
"susan",
"joanna",
"salli",
"kimberly",
"kendra",
"ivy",
"rebecca",
"zira",
"eva",
"linda",
"denise",
"elsa",
"nathalie",
"julie",
"hortense",
"marie",
"pauline",
"claudia",
"lucia",
"paola",
"bianca",
"cosima",
"katja",
"hedda",
"helena",
"naayf",
"sabina",
"naja",
"sara",
"amelie",
"lea",
"manon",
// Google voices
"wavenet-a",
"wavenet-c",
"wavenet-e",
"wavenet-f",
"wavenet-g",
"standard-a",
"standard-c",
"standard-e",
// Apple voices
"allison",
"ava",
"samantha",
"susan",
"vicki",
"victoria",
"audrey",
"aurelie",
"marie",
"thomas",
"amelie",
// General keywords
"female",
"woman",
"girl",
"lady"
];
// Check if voice name contains any female name
return (
femaleNames.some(femaleName => name.includes(femaleName)) ||
(voice.gender && voice.gender.toLowerCase() === "female")
);
});
// Debug: Voice analysis (debug mode only)
if (window.KIMI_DEBUG_VOICE) {
console.log(`🎀 Female voice found: "${femaleVoice.name}" (${femaleVoice.lang})`);
} else if (window.KIMI_DEBUG_VOICE) {
console.log(
`🎀 No female voice found, using first available: "${filteredVoices[0]?.name}" (${filteredVoices[0]?.lang})`
);
// Debug: Show what voices are available and why they don't match
if (filteredVoices.length > 0 && filteredVoices.length <= 5) {
console.log(
`🎀 Available voices for ${effectiveLang}:`,
filteredVoices.map(v => ({
name: v.name,
lang: v.lang,
gender: v.gender || "undefined"
}))
);
}
}
// Use female voice if found, otherwise first compatible voice, with proper fallback
// KEEP legacy auto-selection behavior only for Chrome/Edge where it was reliable.
// For other browsers (Firefox/Brave/Opera), avoid auto-selecting to prevent wrong default (e.g., Hortense).
const browser = this.browser || this._detectBrowser();
if (browser === "chrome" || browser === "edge") {
this.currentVoice = femaleVoice || filteredVoices[0] || null;
} else {
// Do not auto-select on less predictable browsers
this.currentVoice = femaleVoice && filteredVoices.length > 1 ? femaleVoice : null;
}
if (!this.currentVoice) {
console.warn("🎀 No voices available for speech synthesis - this may resolve automatically when voices load");
this._initializingVoices = false;
// Don't return here - let the system continue, voices may load later via onvoiceschanged
// The updateVoiceSelector will handle the empty state gracefully
} else {
// Log successful voice selection with language info
console.log(
`🎀 Voice loaded: "${this.currentVoice.name}" (${this.currentVoice.lang}) for language "${effectiveLang}"`
);
}
// Do not overwrite "auto" preference here; only update if user selects a specific voice
this.updateVoiceSelector();
this._initializingVoices = false;
}
updateVoiceSelector() {
const voiceSelect = document.getElementById("voice-selection");
if (!voiceSelect) return;
// Clear existing options
while (voiceSelect.firstChild) {
voiceSelect.removeChild(voiceSelect.firstChild);
}
// Add auto option
const autoOption = document.createElement("option");
autoOption.value = "auto";
autoOption.textContent = "Automatic (Best voice for selected language)";
voiceSelect.appendChild(autoOption);
const filteredVoices = this.getVoicesForLanguage(this.selectedLanguage);
// If browser is not Chrome or Edge, do NOT expose voice options even when voices exist.
// This avoids misleading users on Brave/Firefox/Opera/Safari who might think TTS is supported when it's not.
const browser = this.browser || this._detectBrowser();
if ((browser !== "chrome" && browser !== "edge") || filteredVoices.length === 0) {
const noVoicesOption = document.createElement("option");
noVoicesOption.value = "none";
noVoicesOption.textContent = "No voices available for this browser";
noVoicesOption.disabled = true;
voiceSelect.appendChild(noVoicesOption);
} else {
filteredVoices.forEach(voice => {
const option = document.createElement("option");
option.value = voice.name;
option.textContent = `${voice.name} (${voice.lang})`;
if (this.currentVoice && voice.name === this.currentVoice.name) {
option.selected = true;
}
voiceSelect.appendChild(option);
});
}
// Remove existing handler before adding new one
if (this.voiceChangeHandler) {
voiceSelect.removeEventListener("change", this.voiceChangeHandler);
}
// Create and store the handler
this.voiceChangeHandler = this.handleVoiceChange.bind(this);
voiceSelect.addEventListener("change", this.voiceChangeHandler);
}
async handleVoiceChange(e) {
if (e.target.value === "auto") {
console.log(`🎀 Voice set to automatic selection for language "${this.selectedLanguage}"`);
await this.db?.setPreference("selectedVoice", "auto");
this.currentVoice = null; // clear immediate in-memory voice
// Re-initialize voices synchronously so currentVoice is set before other code reacts
try {
await this.initVoices();
} catch (err) {
// If init fails, leave currentVoice null but don't throw
console.warn("🎀 initVoices failed after setting auto:", err);
}
} else {
this.currentVoice = this.availableVoices.find(voice => voice.name === e.target.value);
console.log(`🎀 Voice manually selected: "${this.currentVoice?.name}" (${this.currentVoice?.lang})`);
await this.db?.setPreference("selectedVoice", e.target.value);
}
}
setupVoicesChangedListener() {
if (this.speechSynthesis.onvoiceschanged !== undefined) {
// Prevent multiple event listeners
this.speechSynthesis.onvoiceschanged = null;
this.speechSynthesis.onvoiceschanged = async () => {
// Only reinitialize if voices are actually available now
if (this.speechSynthesis.getVoices().length > 0) {
await this.initVoices();
}
};
}
// Fallback: Only use timeout if onvoiceschanged is not supported
if (this.availableVoices.length === 0 && this.speechSynthesis.onvoiceschanged === undefined) {
setTimeout(async () => {
await this.initVoices();
}, 1000);
}
}
// ===== LANGUAGE UTILITIES =====
getLanguageCode(langShort) {
const languageMap = {
en: "en-US",
fr: "fr-FR",
es: "es-ES",
de: "de-DE",
it: "it-IT",
ja: "ja-JP",
zh: "zh-CN"
};
return languageMap[langShort] || langShort;
}
// language normalization handled by window.KimiLanguageUtils.normalizeLanguageCode
getVoicesForLanguage(language) {
const norm = window.KimiLanguageUtils.normalizeLanguageCode(language || "");
// First pass: voices whose lang primary subtag starts with normalized code
let filteredVoices = this.availableVoices.filter(voice => {
try {
const vlang = String(voice.lang || "").toLowerCase();
return vlang.startsWith(norm);
} catch (e) {
return false;
}
});
// Second pass: voices that contain the code anywhere
if (filteredVoices.length === 0 && norm) {
filteredVoices = this.availableVoices.filter(voice =>
String(voice.lang || "")
.toLowerCase()
.includes(norm)
);
}
// Do not fall back to all voices: if none match, return empty array so UI shows "no voices available"
if (filteredVoices.length === 0) return [];
return filteredVoices;
}
// ===== VOICE PREFERENCE UTILITIES =====
getVoicePreference(paramType, options = {}) {
// Hierarchy: options > memory.preferences > kimiMemory.preferences > DOM element > default
const defaults = {
rate: window.KIMI_CONFIG?.DEFAULTS?.VOICE_RATE || 1.1,
pitch: window.KIMI_CONFIG?.DEFAULTS?.VOICE_PITCH || 1.1,
volume: window.KIMI_CONFIG?.DEFAULTS?.VOICE_VOLUME || 0.8
};
const elementIds = {
rate: "voice-rate",
pitch: "voice-pitch",
volume: "voice-volume"
};
const memoryKeys = {
rate: "voiceRate",
pitch: "voicePitch",
volume: "voiceVolume"
};
// 1. Check options parameter
if (options[paramType] !== undefined) {
return parseFloat(options[paramType]);
}
// 2. Check local memory preferences
if (this.memory?.preferences?.[memoryKeys[paramType]] !== undefined) {
return parseFloat(this.memory.preferences[memoryKeys[paramType]]);
}
// 3. Check global memory preferences
if (window.kimiMemory?.preferences?.[memoryKeys[paramType]] !== undefined) {
return parseFloat(window.kimiMemory.preferences[memoryKeys[paramType]]);
}
// 4. Check DOM element
const element = document.getElementById(elementIds[paramType]);
if (element) {
return parseFloat(element.value);
}
// 5. Return default value
return defaults[paramType];
}
// ===== CHAT MESSAGE UTILITIES =====
handleChatMessage(userMessage, kimiResponse) {
// Always save to chat history, regardless of chat visibility
const addMessageToChat = window.addMessageToChat || (typeof addMessageToChat !== "undefined" ? addMessageToChat : null);
if (addMessageToChat) {
// Save messages to history
addMessageToChat("user", userMessage);
addMessageToChat(this.selectedCharacter.toLowerCase(), kimiResponse);
} else {
// Fallback: only add to visible chat if available
const chatContainer = document.getElementById("chat-container");
const chatMessages = document.getElementById("chat-messages");
if (chatContainer && chatContainer.classList.contains("visible") && chatMessages) {
this.createChatMessage(chatMessages, "user", userMessage);
this.createChatMessage(chatMessages, this.selectedCharacter.toLowerCase(), kimiResponse);
chatMessages.scrollTop = chatMessages.scrollHeight;
}
}
}
createChatMessage(container, sender, text) {
const messageDiv = document.createElement("div");
messageDiv.className = `message ${sender}`;
const textDiv = document.createElement("div");
textDiv.textContent = text;
const timeDiv = document.createElement("div");
timeDiv.className = "message-time";
timeDiv.textContent = new Date().toLocaleTimeString("en-US", {
hour: "2-digit",
minute: "2-digit"
});
messageDiv.appendChild(textDiv);
messageDiv.appendChild(timeDiv);
container.appendChild(messageDiv);
}
async speak(text, options = {}) {
// If no text or voice not ready, attempt short retries for voice initialization
if (!text) {
console.warn("Unable to speak: empty text");
return;
}
const maxRetries = 3;
let attempt = 0;
while (!this.currentVoice && attempt < maxRetries) {
// Small jittered backoff
const wait = 100 + Math.floor(Math.random() * 200); // 100-300ms
await new Promise(r => setTimeout(r, wait));
// If voices available, try to init
if (this.availableVoices.length > 0) {
// attempt to pick a voice for the current language
try {
await this.initVoices();
} catch (e) {
// ignore and retry
}
}
attempt++;
}
if (!this.currentVoice) {
console.warn("Unable to speak: voice not initialized after retries");
return;
}
this.clearTranscriptTimeout();
if (this.speechSynthesis.speaking) {
this.speechSynthesis.cancel();
}
// Clean text for better speech synthesis
let processedText = this._normalizeForSpeech(text);
// Get voice settings using centralized utility
let customRate = this.getVoicePreference("rate", options);
let customPitch = this.getVoicePreference("pitch", options);
// Check for emotional indicators in original text (before processing)
const lowerText = text.toLowerCase();
if (
lowerText.includes("❀️") ||
lowerText.includes("πŸ’•") ||
lowerText.includes("😘") ||
lowerText.includes("amour") ||
lowerText.includes("love") ||
lowerText.includes("bisou")
) {
// Tender loving content - slower and higher pitch
customRate = Math.max(0.7, customRate - 0.2);
customPitch = Math.min(1.3, customPitch + 0.1);
}
const utterance = new SpeechSynthesisUtterance(processedText);
utterance.voice = this.currentVoice;
utterance.rate = customRate;
utterance.pitch = customPitch;
// Get volume using centralized utility
utterance.volume = this.getVoicePreference("volume", options);
// Use centralized emotion system for consistency
const emotionFromText = window.kimiEmotionSystem?.analyzeEmotionValidated(text) || "neutral";
// PRE-PREPARE speaking animation before TTS starts
if (window.kimiVideo) {
// Always prepare a speaking context based on detected emotion
requestAnimationFrame(async () => {
try {
const traits = await this.db?.getAllPersonalityTraits(
window.kimiMemory?.selectedCharacter || (await this.db.getSelectedCharacter())
);
const affection = traits ? traits.affection : 50;
// Choose the appropriate speaking context
if (emotionFromText === "negative") {
window.kimiVideo.switchToContext("speakingNegative", "negative", null, traits || {}, affection);
} else if (emotionFromText === "neutral") {
// Even neutral text should use speaking context during TTS
window.kimiVideo.switchToContext("speakingPositive", "neutral", null, traits || {}, affection);
} else {
// For positive and specific emotions
const videoCategory =
window.kimiEmotionSystem?.getVideoCategory(emotionFromText, traits) || "speakingPositive";
window.kimiVideo.switchToContext(videoCategory, emotionFromText, null, traits || {}, affection);
}
} catch (e) {
console.warn("Failed to prepare speaking animation:", e);
}
});
}
if (typeof window.updatePersonalityTraitsFromEmotion === "function") {
window.updatePersonalityTraitsFromEmotion(emotionFromText, text);
}
this.showResponseWithPerfectTiming(text);
utterance.onstart = async () => {
this.isSpeaking = true;
// IMMEDIATELY switch to appropriate speaking animation when TTS starts
try {
if (window.kimiVideo) {
const traits = await this.db?.getAllPersonalityTraits(
window.kimiMemory?.selectedCharacter || (await this.db.getSelectedCharacter())
);
const affection = traits ? traits.affection : 50;
// Choose speaking context based on the detected emotion using centralized logic
if (emotionFromText === "negative") {
window.kimiVideo.switchToContext("speakingNegative", "negative", null, traits || {}, affection);
} else if (emotionFromText === "neutral") {
// Even for neutral speech, use speaking context during TTS
window.kimiVideo.switchToContext("speakingPositive", "neutral", null, traits || {}, affection);
} else {
// For positive and specific emotions, use appropriate speaking context
const videoCategory =
window.kimiEmotionSystem?.getVideoCategory(emotionFromText, traits) || "speakingPositive";
window.kimiVideo.switchToContext(videoCategory, emotionFromText, null, traits || {}, affection);
}
}
} catch (e) {
console.warn("Failed to switch to speaking context:", e);
}
};
utterance.onend = () => {
this.isSpeaking = false;
// Hide transcript overlay when AI finishes speaking
this.updateTranscriptVisibility(false);
// Clear any pending hide timeout
this.clearTranscriptTimeout();
// IMMEDIATELY return to neutral when TTS ends
if (window.kimiVideo) {
try {
const info = window.kimiVideo.getCurrentVideoInfo ? window.kimiVideo.getCurrentVideoInfo() : null;
// Only return to neutral if currently in a speaking context
if (info && (info.context === "speakingPositive" || info.context === "speakingNegative")) {
// Use async pattern to get traits for neutral transition
(async () => {
try {
const traits = await this.db?.getAllPersonalityTraits(
window.kimiMemory?.selectedCharacter || (await this.db.getSelectedCharacter())
);
window.kimiVideo.switchToContext(
"neutral",
"neutral",
null,
traits || {},
traits?.affection || 50
);
} catch (e) {
// Fallback without traits
window.kimiVideo.switchToContext("neutral", "neutral", null, {}, 50);
}
})();
}
} catch (e) {
console.warn("Failed to return to neutral after TTS:", e);
}
}
};
utterance.onerror = e => {
this.isSpeaking = false;
this.updateTranscriptVisibility(false);
this.clearTranscriptTimeout();
};
this.speechSynthesis.speak(utterance);
}
/**
* Normalize raw model text into something natural for browser speech synthesis.
* Goals:
* - Remove emojis / pictographs (engines try to read them literally)
* - Collapse excessive punctuation while preserving rhythm
* - Convert ellipses to a Unicode ellipsis (…)
* - Remove markdown / formatting artifacts (* _ ~ ` # [] <> etc.)
* - Remove stray markup like **bold**, inline code, URLs parentheses clutter
* - Keep meaningful punctuation (. , ! ? ; :)
* - Avoid inserting artificial words (e.g., "pause")
*/
_normalizeForSpeech(raw) {
if (!raw) return "";
let txt = raw;
// Remove URLs completely (they sound awkward) – keep none.
txt = txt.replace(/https?:\/\/\S+/gi, " ");
// Remove markdown code blocks and inline code markers
txt = txt.replace(/`{3}[\s\S]*?`{3}/g, " "); // fenced blocks
txt = txt.replace(/`([^`]+)`/g, "$1"); // inline code unwrap
// Remove emphasis markers (*, _, ~) while keeping inner text
txt = txt.replace(/\*{1,3}([^*]+)\*{1,3}/g, "$1");
txt = txt.replace(/_{1,3}([^_]+)_{1,3}/g, "$1");
txt = txt.replace(/~{1,2}([^~]+)~{1,2}/g, "$1");
// Strip remaining markdown heading symbols at line starts
txt = txt.replace(/^\s{0,3}#{1,6}\s+/gm, "");
// Remove HTML/XML tags
txt = txt.replace(/<[^>]+>/g, " ");
// Remove brackets content if it is link style [text](url)
txt = txt.replace(/\[([^\]]+)\]\([^)]*\)/g, "$1");
// Remove leftover standalone brackets
txt = txt.replace(/[\[\]<>]/g, " ");
// Remove emojis / pictographic chars
txt = txt.replace(/[\p{Emoji}\p{Extended_Pictographic}]/gu, " ");
// Normalize ellipses: sequences of 3+ dots -> single ellipsis surrounded by light spaces
txt = txt.replace(/\.{3,}/g, " … ");
// Replace double dots with single period + space
txt = txt.replace(/\.\./g, ". ");
// Collapse multiple exclamation/question marks to single (keeps expressiveness but avoids stutter)
txt = txt.replace(/!{2,}/g, "!");
txt = txt.replace(/\?{2,}/g, "?");
// Space after sentence punctuation if missing
txt = txt.replace(/([.!?])([^\s\d])/g, "$1 $2");
// Replace underscores or asterisks still present with spaces
txt = txt.replace(/[*_]{2,}/g, " ");
// Remove stray backticks
txt = txt.replace(/`+/g, " ");
// Collapse mixed punctuation like ?!?! to a single terminal symbol keeping first char
txt = txt.replace(/([!?]){2,}/g, "$1");
// Remove repeated commas / semicolons / colons
txt = txt.replace(/,{2,}/g, ",");
txt = txt.replace(/;{2,}/g, ";");
txt = txt.replace(/:{2,}/g, ":");
// Remove leading/trailing punctuation clusters
txt = txt.replace(/^[\s.,;:!?]+/, "").replace(/[\s.,;:!?]+$/, "");
// Collapse whitespace
txt = txt.replace(/\s+/g, " ");
// Final trim
txt = txt.trim();
return txt;
}
// Intelligently calculate synthesis duration
calculateSpeechDuration(text, rate = 0.9) {
const baseWordsPerMinute = 150;
const adjustedWPM = baseWordsPerMinute * rate;
const wordCount = text.split(/\s+/).length;
const estimatedMinutes = wordCount / adjustedWPM;
const estimatedMilliseconds = estimatedMinutes * 60 * 1000;
const bufferTime = text.split(/[.!?]/).length * 500;
return Math.max(estimatedMilliseconds + bufferTime, 2000);
}
// ===== REAL-TIME TRANSCRIPT DISPLAY =====
// Centralized transcript timeout management
clearTranscriptTimeout() {
if (this.transcriptHideTimeout) {
clearTimeout(this.transcriptHideTimeout);
this.transcriptHideTimeout = null;
}
}
// Utility method to safely check transcript preference and control visibility
async updateTranscriptVisibility(shouldShow, text = null) {
if (!this.transcriptContainer || !this.transcriptText) return false;
const showTranscript = await this.db?.getPreference(
"showTranscript",
window.KIMI_CONFIG?.DEFAULTS?.SHOW_TRANSCRIPT ?? true
);
if (!showTranscript) {
// If transcript is disabled, always hide
this.transcriptContainer.classList.remove("visible");
return false;
}
if (shouldShow) {
if (text) {
// Show with text content
this.transcriptText.textContent = text;
this.transcriptContainer.classList.add("visible");
return true;
} else {
// Show but keep existing text (for cases where we just want to maintain visibility)
this.transcriptContainer.classList.add("visible");
return true;
}
} else {
// Hide transcript
this.transcriptContainer.classList.remove("visible");
return false;
}
}
// Show AI response text in real-time transcript overlay when AI is speaking
async showResponseWithPerfectTiming(text) {
const success = await this.updateTranscriptVisibility(true, `${this.selectedCharacter}: ${text}`);
if (success) {
this.clearTranscriptTimeout();
}
}
showResponse(text) {
this.showResponseWithPerfectTiming(text);
}
// Show user voice input text in real-time transcript overlay during speech recognition
async showUserMessage(text, duration = 3000) {
const success = await this.updateTranscriptVisibility(true, text);
if (success) {
this.clearTranscriptTimeout();
// Auto-hide transcript after specified duration
this.transcriptHideTimeout = setTimeout(async () => {
await this.updateTranscriptVisibility(false);
this.transcriptHideTimeout = null;
}, duration);
}
}
// ===== SPEECH RECOGNITION =====
async setupSpeechRecognition() {
if (!this.SpeechRecognition) {
// Do not show a UI message during initial load; only log.
console.log("Your browser does not support speech recognition.");
return;
}
// Always create a fresh instance (some browsers cache language at construction time)
this.recognition = new this.SpeechRecognition();
this.recognition.continuous = true;
// Resolve effective language (block invalid 'auto')
const normalized = await this.getEffectiveLanguage(this.selectedLanguage);
const langCode = this.getLanguageCode(normalized || "en");
try {
this.recognition.lang = langCode;
} catch (e) {
console.warn("Could not set recognition.lang, fallback en-US", e);
this.recognition.lang = "en-US";
}
console.log(`🎀 SpeechRecognition initialized (lang=${this.recognition.lang})`);
this.recognition.interimResults = true;
// Add onstart handler to confirm permission
this.recognition.onstart = () => {
if (!this.micPermissionGranted) {
this.micPermissionGranted = true;
console.log("🎀 Microphone permission confirmed via onstart");
}
};
this.recognition.onresult = async event => {
// Mark permission as granted if we get results
if (!this.micPermissionGranted) {
this.micPermissionGranted = true;
console.log("🎀 Microphone permission confirmed via onresult");
}
// Process speech recognition results into final and interim transcripts
let final_transcript = "";
let interim_transcript = "";
for (let i = event.resultIndex; i < event.results.length; ++i) {
if (event.results[i].isFinal) {
final_transcript += event.results[i][0].transcript;
} else {
interim_transcript += event.results[i][0].transcript;
}
}
// Display real-time speech transcription if enabled
const transcriptText = final_transcript || interim_transcript;
if (transcriptText) {
await this.updateTranscriptVisibility(true, transcriptText);
}
if (final_transcript && this.onSpeechAnalysis) {
try {
// Show final user message in transcript before processing
await this.showUserMessage(`You: ${final_transcript}`, 2000);
// Auto-stop after silence timeout following final transcript
setTimeout(() => {
this.stopListening();
}, this.silenceTimeout);
(async () => {
let response;
if (typeof window.analyzeAndReact === "function") {
response = await window.analyzeAndReact(final_transcript);
} else if (this.onSpeechAnalysis) {
response = await this.onSpeechAnalysis(final_transcript);
}
if (response) {
this.handleChatMessage(final_transcript, response);
setTimeout(() => {
this.speak(response);
}, 500);
}
})();
} catch (error) {
console.error("🎀 Error during voice analysis:", error);
}
}
};
this.recognition.onerror = event => {
console.error("🎀 Speech recognition error:", event.error);
if (event.error === "not-allowed" || event.error === "service-not-allowed") {
console.log("🎀 Permission denied - stopping listening");
this.micPermissionGranted = false;
this.stopListening();
const message =
window.kimiI18nManager?.t("mic_permission_denied") || "Microphone permission denied. Click again to retry.";
// Use promise-based approach for async operation
this.updateTranscriptVisibility(true, message).then(() => {
setTimeout(() => {
this.updateTranscriptVisibility(false);
}, 2000);
});
} else {
this.stopListening();
}
};
this.recognition.onend = () => {
console.log("🎀 Speech recognition ended");
// Clear timeout if recognition ends naturally
if (this.listeningTimeout) {
clearTimeout(this.listeningTimeout);
this.listeningTimeout = null;
}
// Always reset listening state when recognition ends
this.isListening = false;
if (this.isStoppingVolontaire) {
console.log("Voluntary stop confirmed");
this.isStoppingVolontaire = false;
if (this.micButton) {
this.micButton.classList.remove("mic-pulse-active");
this.micButton.classList.remove("is-listening");
}
return;
}
// User must click the mic button again to reactivate listening
this.isListening = false;
if (this.micButton) this.micButton.classList.remove("is-listening");
if (this.micButton) this.micButton.classList.remove("mic-pulse-active");
this.updateTranscriptVisibility(false);
};
}
setupMicrophoneButton() {
if (!this.micButton) {
console.error("setupMicrophoneButton: Mic button not found!");
return;
}
// Remove any existing event listener to prevent duplicates
this.micButton.removeEventListener("click", this.handleMicClick);
// Create the click handler function
this.handleMicClick = () => {
if (!this.SpeechRecognition) {
console.warn("🎀 Speech recognition not available");
let key = "sr_not_supported_generic";
if (this.browser === "firefox") key = "sr_not_supported_firefox";
else if (this.browser === "opera") key = "sr_not_supported_opera";
else if (this.browser === "safari") key = "sr_not_supported_safari";
const message = window.kimiI18nManager?.t(key) || "Speech recognition is not available in this browser.";
this.updateTranscriptVisibility(true, message).then(() => {
setTimeout(() => {
this.updateTranscriptVisibility(false);
}, 4000);
});
return;
}
if (this.isListening) {
console.log("🎀 Stopping microphone via button click");
this.stopListening();
} else {
console.log("🎀 Starting microphone via button click");
this.startListening();
}
};
// Add the event listener
this.micButton.addEventListener("click", this.handleMicClick);
}
async startListening() {
// Show helpful message if SR API is missing
if (!this.SpeechRecognition) {
let key = "sr_not_supported_generic";
if (this.browser === "firefox") key = "sr_not_supported_firefox";
else if (this.browser === "opera") key = "sr_not_supported_opera";
else if (this.browser === "safari") key = "sr_not_supported_safari";
const message = window.kimiI18nManager?.t(key) || "Speech recognition is not available in this browser.";
this.updateTranscriptVisibility(true, message).then(() => {
setTimeout(() => {
this.updateTranscriptVisibility(false);
}, 4000);
});
return;
}
if (!this.recognition || this.isListening) return;
// Check microphone API availability
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
console.warn("MediaDevices API not available");
const message = window.kimiI18nManager?.t("mic_not_supported") || "Microphone not supported in this browser.";
this.updateTranscriptVisibility(true, message).then(() => {
setTimeout(() => {
this.updateTranscriptVisibility(false);
}, 3000);
});
return;
}
// If microphone permission already granted, start directly
if (this.micPermissionGranted) {
console.log("🎀 Microphone permission already granted");
this.startRecognitionDirectly();
return;
}
// Check current permission status
try {
const permissionStatus = await navigator.permissions.query({ name: "microphone" });
console.log("🎀 Current microphone permission status:", permissionStatus.state);
if (permissionStatus.state === "granted") {
this.micPermissionGranted = true;
this.startRecognitionDirectly();
return;
} else if (permissionStatus.state === "denied") {
console.log("🎀 Microphone permission denied");
const message =
window.kimiI18nManager?.t("mic_permission_denied") ||
"Microphone permission denied. Please allow access in browser settings.";
this.updateTranscriptVisibility(true, message).then(() => {
setTimeout(() => {
this.updateTranscriptVisibility(false);
}, 4000);
});
return;
}
} catch (error) {
console.log("🎀 Could not check permission status:", error);
}
// Permission is 'prompt' or unknown, proceed with recognition start (will trigger permission dialog)
this.startRecognitionDirectly();
}
startRecognitionDirectly() {
// Prevent starting if already listening or if recognition is in an active state
if (this.isListening) {
console.log("🎀 Already listening, ignoring start request");
return;
}
// Check if recognition is already in progress
if (this.recognition && this.recognition.state && this.recognition.state !== "inactive") {
console.log("🎀 Recognition already active, stopping first");
try {
this.recognition.stop();
} catch (e) {
console.warn("🎀 Error stopping existing recognition:", e);
}
// Wait a bit before trying to start again
setTimeout(() => {
this.startRecognitionDirectly();
}, 100);
return;
}
this.isListening = true;
this.isStoppingVolontaire = false;
if (this.micButton) {
this.micButton.classList.add("is-listening");
} else {
console.error("Unable to add 'is-listening' - mic button not found");
}
if (window.kimiVideo) {
window.kimiVideo.startListening();
}
// Set auto-stop timeout
this.listeningTimeout = setTimeout(() => {
console.log("🎀 Auto-stopping listening after timeout");
this.stopListening();
}, this.autoStopDuration);
try {
this.recognition.start();
console.log("🎀 Started listening with auto-stop timeout");
} catch (error) {
console.error("Error starting listening:", error);
this.isListening = false; // Reset state on error
this.stopListening();
// Show user-friendly error message
const message =
window.kimiI18nManager?.t("mic_permission_denied") || "Microphone permission denied. Click again to retry.";
this.updateTranscriptVisibility(true, message).then(() => {
setTimeout(() => {
this.updateTranscriptVisibility(false);
}, 3000);
});
}
}
stopListening() {
if (!this.recognition || !this.isListening) return;
// Clear auto-stop timeout if it exists
if (this.listeningTimeout) {
clearTimeout(this.listeningTimeout);
this.listeningTimeout = null;
}
this.isListening = false;
this.isStoppingVolontaire = true;
if (this.micButton) {
this.micButton.classList.remove("is-listening");
this.micButton.classList.add("mic-pulse-active");
} else {
console.error("Unable to remove 'is-listening' - mic button not found");
}
if (window.kimiVideo) {
const currentInfo = window.kimiVideo.getCurrentVideoInfo ? window.kimiVideo.getCurrentVideoInfo() : null;
if (
currentInfo &&
(currentInfo.context === "speakingPositive" ||
currentInfo.context === "speakingNegative" ||
currentInfo.context === "dancing")
) {
// Let emotion video finish naturally
} else if (this.isStoppingVolontaire) {
// Use centralized video utility for neutral transition
window.kimiVideo.returnToNeutral();
}
}
this.clearTranscriptTimeout();
if (!this.speechSynthesis.speaking) {
// Hide transcript after delay if AI is not speaking
this.transcriptHideTimeout = setTimeout(async () => {
await this.updateTranscriptVisibility(false);
this.transcriptHideTimeout = null;
}, 2000);
}
try {
this.recognition.stop();
console.log("🎀 Stopped listening");
} catch (error) {
console.error("Error stopping listening:", error);
}
}
// ===== UTILITY METHODS =====
isVoiceAvailable() {
return this.currentVoice !== null;
}
getCurrentVoice() {
return this.currentVoice;
}
getAvailableVoices() {
return this.availableVoices;
}
setOnSpeechAnalysis(callback) {
this.onSpeechAnalysis = callback;
}
async testVoice() {
const testMessages = [
window.kimiI18nManager?.t("test_voice_message_1") || "Hello my beloved! πŸ’•",
window.kimiI18nManager?.t("test_voice_message_2") || "I am Kimi, your virtual companion!",
window.kimiI18nManager?.t("test_voice_message_3") || "How are you today, my love?"
];
}
destroy() {
// Clear all timeouts
if (this.listeningTimeout) {
clearTimeout(this.listeningTimeout);
this.listeningTimeout = null;
}
this.clearTranscriptTimeout();
if (this.recognition) {
this.recognition.stop();
this.recognition = null;
}
if (this.speechSynthesis.speaking) {
this.speechSynthesis.cancel();
}
// Clean up mic button event listener
if (this.micButton && this.handleMicClick) {
this.micButton.removeEventListener("click", this.handleMicClick);
}
// Clean up voice selector event listener
if (this.voiceChangeHandler) {
const voiceSelect = document.getElementById("voice-selection");
if (voiceSelect) {
voiceSelect.removeEventListener("change", this.voiceChangeHandler);
}
this.voiceChangeHandler = null;
}
// Clean up language selector event listener
if (this.languageChangeHandler) {
const languageSelect = document.getElementById("language-selection");
if (languageSelect) {
languageSelect.removeEventListener("change", this.languageChangeHandler);
}
this.languageChangeHandler = null;
}
// Reset state
this.currentVoice = null;
this.isInitialized = false;
this.isListening = false;
this.isStoppingVolontaire = false;
this.handleMicClick = null;
console.log("KimiVoiceManager destroyed and cleaned up");
}
setupLanguageSelector() {
const languageSelect = document.getElementById("language-selection");
if (!languageSelect) return;
languageSelect.value = this.selectedLanguage || "en";
// Remove existing handler before adding new one
if (this.languageChangeHandler) {
languageSelect.removeEventListener("change", this.languageChangeHandler);
}
// Create and store the handler
this.languageChangeHandler = this.handleLanguageChange.bind(this);
languageSelect.addEventListener("change", this.languageChangeHandler);
}
async handleLanguageChange(e) {
const rawLang = e.target.value;
const newLang = window.KimiLanguageUtils?.normalizeLanguageCode
? window.KimiLanguageUtils.normalizeLanguageCode(rawLang)
: rawLang;
const oldLang = this.selectedLanguage;
console.log(`🎀 Language changing: "${oldLang}" β†’ "${newLang}"`);
this.selectedLanguage = newLang;
await this.db?.setPreference("selectedLanguage", newLang);
// Update i18n system for interface translations
if (window.kimiI18nManager?.setLanguage) {
await window.kimiI18nManager.setLanguage(newLang);
}
// Check saved voice compatibility: only reset to 'auto' if incompatible
try {
const currentVoicePref = await this.db?.getPreference("selectedVoice", "auto");
// Clear in-memory currentVoice to allow re-selection
this.currentVoice = null;
if (currentVoicePref && currentVoicePref !== "auto") {
// If saved voice name exists, check if it's present among filtered voices for the new language
const filtered = this.getVoicesForLanguage(newLang);
const compatible = filtered.some(v => v.name === currentVoicePref);
if (!compatible) {
// Only write 'auto' when incompatible
await this.db?.setPreference("selectedVoice", "auto");
}
}
// Re-init voices to pick a correct voice for the new language
await this.initVoices();
// Ensure voice selector reflects new language even if no voice chosen
this.updateVoiceSelector();
} catch (err) {
// On error, fall back to safe behavior: init voices and set 'auto'
try {
await this.db?.setPreference("selectedVoice", "auto");
} catch {}
await this.initVoices();
this.updateVoiceSelector();
}
if (this.currentVoice) {
console.log(`🎀 Voice selected for "${newLang}": "${this.currentVoice.name}" (${this.currentVoice.lang})`);
} else {
console.warn(`🎀 No voice found for language "${newLang}"`);
}
// Single clear path: recreate recognition instance with new language
this._refreshRecognitionLanguage(newLang);
}
/**
* Recreate speech recognition instance with a new language.
* Some browsers (notably Chrome) may ignore lang changes mid-session; recreating ensures consistency.
*/
async _refreshRecognitionLanguage(newLang) {
if (!this.SpeechRecognition) return;
const wasListening = this.isListening;
if (this.recognition) {
try {
if (this.isListening) this.recognition.stop();
} catch {}
this.recognition.onresult = null;
this.recognition.onstart = null;
this.recognition.onend = null;
this.recognition.onerror = null;
this.recognition = null;
}
this.selectedLanguage = newLang;
await this.setupSpeechRecognition();
console.log(`🎀 Recognition language refreshed -> ${this.recognition?.lang}`);
// Restart listening if it was active
if (wasListening) {
// Small delay to allow new instance to settle
setTimeout(() => {
this.startListening();
}, 150);
}
}
// Return a normalized concrete language code (primary subtag) never 'auto'
async getEffectiveLanguage(raw) {
let base = raw || this.selectedLanguage || "en";
if (base === "auto") {
try {
if (window.KimiLanguageUtils?.getLanguage) {
base = await window.KimiLanguageUtils.getLanguage();
} else {
base = navigator.language?.split("-")[0] || "en";
}
} catch {
base = "en";
}
}
return window.KimiLanguageUtils?.normalizeLanguageCode
? window.KimiLanguageUtils.normalizeLanguageCode(base)
: base || "en";
}
async updateSelectedCharacter() {
if (this.db && typeof this.db.getSelectedCharacter === "function") {
const charKey = await this.db.getSelectedCharacter();
if (charKey && window.KIMI_CHARACTERS && window.KIMI_CHARACTERS[charKey]) {
// Use the display name, not the key
this.selectedCharacter = window.KIMI_CHARACTERS[charKey].name;
} else if (charKey) {
// Fallback to key if KIMI_CHARACTERS not available
this.selectedCharacter = charKey;
}
}
}
// Public method for external microphone toggle (keyboard, etc.)
toggleMicrophone() {
if (this._debouncedToggleMicrophone) return this._debouncedToggleMicrophone();
return this._toggleMicrophoneCore();
}
_toggleMicrophoneCore() {
if (!this.SpeechRecognition) {
console.warn("🎀 Speech recognition not available");
return false;
}
// If Kimi is speaking, stop speech synthesis first
if (this.isSpeaking && this.speechSynthesis.speaking) {
console.log("🎀 Interrupting speech to start listening");
this.speechSynthesis.cancel();
this.isSpeaking = false;
this.updateTranscriptVisibility(false);
}
if (this.isListening) {
console.log("🎀 Stopping microphone via external trigger");
this.stopListening();
} else {
console.log("🎀 Starting microphone via external trigger");
this.startListening();
}
return true;
}
// Configuration methods for timeout durations
setSilenceTimeout(milliseconds) {
if (typeof milliseconds === "number" && milliseconds > 0) {
this.silenceTimeout = milliseconds;
console.log(`🎀 Silence timeout set to ${milliseconds}ms`);
} else {
console.warn("🎀 Invalid silence timeout value");
}
}
setAutoStopDuration(milliseconds) {
if (typeof milliseconds === "number" && milliseconds > 0) {
this.autoStopDuration = milliseconds;
console.log(`🎀 Auto-stop duration set to ${milliseconds}ms`);
} else {
console.warn("🎀 Invalid auto-stop duration value");
}
}
// Get current timeout configurations
getTimeoutConfiguration() {
return {
silenceTimeout: this.silenceTimeout,
autoStopDuration: this.autoStopDuration
};
}
}
// Export for usage
window.KimiVoiceManager = KimiVoiceManager;