|
<!DOCTYPE html> |
|
<html lang="ja"> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
<title>リアルタイム音声認識</title> |
|
</head> |
|
<body> |
|
<h1>リアルタイム音声認識</h1> |
|
<label for="language">言語選択: </label> |
|
<select id="language"> |
|
<option value="none" selected>言語設定なし</option> |
|
<option value="ja-JP">日本語</option> |
|
<option value="en-US">英語</option> |
|
<option value="zh-CN">中国語</option> |
|
<option value="fr-FR">フランス語</option> |
|
</select> |
|
|
|
<label for="model">モデル選択: </label> |
|
<select id="model"> |
|
<option value="openai/whisper-tiny">tiny</option> |
|
<option value="openai/whisper-base">base</option> |
|
<option value="openai/whisper-small">small</option> |
|
<option value="openai/whisper-medium">medium</option> |
|
<option value="openai/whisper-large">large</option> |
|
<option value="openai/whisper-large-v2">large-v2</option> |
|
<option value="openai/whisper-large-v3">large-v3</option> |
|
<option value="openai/whisper-large-v3-turbo" selected>large-v3-turbo</option> |
|
</select> |
|
|
|
<button id="start">開始</button> |
|
<button id="stop">停止</button> |
|
<p id="output"></p> |
|
|
|
<script> |
|
const output = document.getElementById("output"); |
|
const startBtn = document.getElementById("start"); |
|
const stopBtn = document.getElementById("stop"); |
|
const languageSelect = document.getElementById("language"); |
|
const modelSelect = document.getElementById("model"); |
|
|
|
let recognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)(); |
|
recognition.continuous = true; |
|
recognition.interimResults = true; |
|
|
|
function updateLanguage() { |
|
const selectedLang = languageSelect.value; |
|
recognition.lang = selectedLang !== "none" ? selectedLang : ""; |
|
} |
|
|
|
languageSelect.addEventListener("change", updateLanguage); |
|
|
|
recognition.onresult = async (event) => { |
|
let transcript = ""; |
|
for (let i = 0; i < event.results.length; i++) { |
|
transcript += event.results[i][0].transcript; |
|
} |
|
output.innerText = transcript; |
|
|
|
const blob = new Blob([transcript], { type: "text/plain" }); |
|
const file = new File([blob], "transcript.txt", { type: "text/plain" }); |
|
await query(file); |
|
}; |
|
|
|
startBtn.addEventListener("click", () => { |
|
updateLanguage(); |
|
recognition.start(); |
|
}); |
|
stopBtn.addEventListener("click", () => recognition.stop()); |
|
|
|
async function query(file) { |
|
const formData = new FormData(); |
|
formData.append("file", file); |
|
formData.append("task", "automatic-speech-recognition"); |
|
formData.append("generate_kwargs", JSON.stringify({"task": "transcribe"})); |
|
formData.append("return_timestamps", "true"); |
|
|
|
const model = modelSelect.value; |
|
|
|
const response = await fetch( |
|
`https://api-inference.huggingface.co/models/${model}`, |
|
{ |
|
headers: { |
|
Authorization: "Bearer hf_" + "tbvcBbtoRnhEkOjPAnjEgKLDAlBKflTBSe" |
|
}, |
|
method: "POST", |
|
body: formData, |
|
} |
|
); |
|
const result = await response.json(); |
|
|
|
if (result.text) { |
|
output.innerText = result.text; |
|
} |
|
|
|
console.log(JSON.stringify(result)); |
|
} |
|
</script> |
|
</body> |
|
</html> |
|
|