File size: 3,827 Bytes
6a5d043 66a3af7 6a5d043 66a3af7 5652f2f 6a5d043 5652f2f f8b808c 5652f2f 1f3aca8 5652f2f 66a3af7 5652f2f 1f3aca8 5652f2f c3bc8d5 daaad73 c3bc8d5 daaad73 5652f2f d1c6a0a 47484c4 c3bc8d5 d1c6a0a 1f3aca8 c3bc8d5 5652f2f 99246b6 5652f2f 66a3af7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
<!DOCTYPE html>
<html lang="ja">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>リアルタイム音声認識</title>
</head>
<body>
<h1>リアルタイム音声認識</h1>
<label for="language">言語選択: </label>
<select id="language">
<option value="none" selected>言語設定なし</option>
<option value="ja-JP">日本語</option>
<option value="en-US">英語</option>
<option value="zh-CN">中国語</option>
<option value="fr-FR">フランス語</option>
</select>
<label for="model">モデル選択: </label>
<select id="model">
<option value="openai/whisper-tiny">tiny</option>
<option value="openai/whisper-base">base</option>
<option value="openai/whisper-small">small</option>
<option value="openai/whisper-medium">medium</option>
<option value="openai/whisper-large">large</option>
<option value="openai/whisper-large-v2">large-v2</option>
<option value="openai/whisper-large-v3">large-v3</option>
<option value="openai/whisper-large-v3-turbo" selected>large-v3-turbo</option>
</select>
<button id="start">開始</button>
<button id="stop">停止</button>
<p id="output"></p>
<script>
const output = document.getElementById("output");
const startBtn = document.getElementById("start");
const stopBtn = document.getElementById("stop");
const languageSelect = document.getElementById("language");
const modelSelect = document.getElementById("model");
let recognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
recognition.continuous = true;
recognition.interimResults = true;
function updateLanguage() {
const selectedLang = languageSelect.value;
recognition.lang = selectedLang !== "none" ? selectedLang : "";
}
languageSelect.addEventListener("change", updateLanguage);
recognition.onresult = async (event) => {
let transcript = "";
for (let i = 0; i < event.results.length; i++) {
transcript += event.results[i][0].transcript;
}
output.innerText = transcript;
const blob = new Blob([transcript], { type: "text/plain" });
const file = new File([blob], "transcript.txt", { type: "text/plain" });
await query(file);
};
startBtn.addEventListener("click", () => {
updateLanguage();
recognition.start();
});
stopBtn.addEventListener("click", () => recognition.stop());
async function query(file) {
const formData = new FormData();
formData.append("file", file);
formData.append("task", "automatic-speech-recognition");
formData.append("generate_kwargs", JSON.stringify({"task": "transcribe"}));
formData.append("return_timestamps", "true");
const model = modelSelect.value;
const response = await fetch(
`https://api-inference.huggingface.co/models/${model}`,
{
headers: {
Authorization: "Bearer hf_" + "tbvcBbtoRnhEkOjPAnjEgKLDAlBKflTBSe"
},
method: "POST",
body: formData,
}
);
const result = await response.json();
if (result.text) {
output.innerText = result.text;
}
console.log(JSON.stringify(result));
}
</script>
</body>
</html>
|