whisper / index.html
soiz1's picture
Update index.html
c3bc8d5 verified
<!DOCTYPE html>
<html lang="ja">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>リアルタイム音声認識</title>
</head>
<body>
<h1>リアルタイム音声認識</h1>
<label for="language">言語選択: </label>
<select id="language">
<option value="none" selected>言語設定なし</option>
<option value="ja-JP">日本語</option>
<option value="en-US">英語</option>
<option value="zh-CN">中国語</option>
<option value="fr-FR">フランス語</option>
</select>
<label for="model">モデル選択: </label>
<select id="model">
<option value="openai/whisper-tiny">tiny</option>
<option value="openai/whisper-base">base</option>
<option value="openai/whisper-small">small</option>
<option value="openai/whisper-medium">medium</option>
<option value="openai/whisper-large">large</option>
<option value="openai/whisper-large-v2">large-v2</option>
<option value="openai/whisper-large-v3">large-v3</option>
<option value="openai/whisper-large-v3-turbo" selected>large-v3-turbo</option>
</select>
<button id="start">開始</button>
<button id="stop">停止</button>
<p id="output"></p>
<script>
const output = document.getElementById("output");
const startBtn = document.getElementById("start");
const stopBtn = document.getElementById("stop");
const languageSelect = document.getElementById("language");
const modelSelect = document.getElementById("model");
let recognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
recognition.continuous = true;
recognition.interimResults = true;
function updateLanguage() {
const selectedLang = languageSelect.value;
recognition.lang = selectedLang !== "none" ? selectedLang : "";
}
languageSelect.addEventListener("change", updateLanguage);
recognition.onresult = async (event) => {
let transcript = "";
for (let i = 0; i < event.results.length; i++) {
transcript += event.results[i][0].transcript;
}
output.innerText = transcript;
const blob = new Blob([transcript], { type: "text/plain" });
const file = new File([blob], "transcript.txt", { type: "text/plain" });
await query(file);
};
startBtn.addEventListener("click", () => {
updateLanguage();
recognition.start();
});
stopBtn.addEventListener("click", () => recognition.stop());
async function query(file) {
const formData = new FormData();
formData.append("file", file);
formData.append("task", "automatic-speech-recognition");
formData.append("generate_kwargs", JSON.stringify({"task": "transcribe"}));
formData.append("return_timestamps", "true");
const model = modelSelect.value;
const response = await fetch(
`https://api-inference.huggingface.co/models/${model}`,
{
headers: {
Authorization: "Bearer hf_" + "tbvcBbtoRnhEkOjPAnjEgKLDAlBKflTBSe"
},
method: "POST",
body: formData,
}
);
const result = await response.json();
if (result.text) {
output.innerText = result.text;
}
console.log(JSON.stringify(result));
}
</script>
</body>
</html>