Spaces:

soiz1
/

whisper

Running

App Files Files Community

whisper / index.html

soiz1

Update index.html

c3bc8d5 verified 8 months ago

raw

history blame contribute delete

3.83 kB

	<!DOCTYPE html>
	<html lang="ja">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>リアルタイム音声認識</title>
	</head>
	<body>
	<h1>リアルタイム音声認識</h1>
	<label for="language">言語選択: </label>
	<select id="language">
	<option value="none" selected>言語設定なし</option>
	<option value="ja-JP">日本語</option>
	<option value="en-US">英語</option>
	<option value="zh-CN">中国語</option>
	<option value="fr-FR">フランス語</option>
	</select>

	<label for="model">モデル選択: </label>
	<select id="model">
	<option value="openai/whisper-tiny">tiny</option>
	<option value="openai/whisper-base">base</option>
	<option value="openai/whisper-small">small</option>
	<option value="openai/whisper-medium">medium</option>
	<option value="openai/whisper-large">large</option>
	<option value="openai/whisper-large-v2">large-v2</option>
	<option value="openai/whisper-large-v3">large-v3</option>
	<option value="openai/whisper-large-v3-turbo" selected>large-v3-turbo</option>
	</select>

	<button id="start">開始</button>
	<button id="stop">停止</button>
	<p id="output"></p>

	<script>
	const output = document.getElementById("output");
	const startBtn = document.getElementById("start");
	const stopBtn = document.getElementById("stop");
	const languageSelect = document.getElementById("language");
	const modelSelect = document.getElementById("model");

	let recognition = new (window.SpeechRecognition \|\| window.webkitSpeechRecognition)();
	recognition.continuous = true;
	recognition.interimResults = true;

	function updateLanguage() {
	const selectedLang = languageSelect.value;
	recognition.lang = selectedLang !== "none" ? selectedLang : "";
	}

	languageSelect.addEventListener("change", updateLanguage);

	recognition.onresult = async (event) => {
	let transcript = "";
	for (let i = 0; i < event.results.length; i++) {
	transcript += event.results[i][0].transcript;
	}
	output.innerText = transcript;

	const blob = new Blob([transcript], { type: "text/plain" });
	const file = new File([blob], "transcript.txt", { type: "text/plain" });
	await query(file);
	};

	startBtn.addEventListener("click", () => {
	updateLanguage();
	recognition.start();
	});
	stopBtn.addEventListener("click", () => recognition.stop());

	async function query(file) {
	const formData = new FormData();
	formData.append("file", file);
	formData.append("task", "automatic-speech-recognition");
	formData.append("generate_kwargs", JSON.stringify({"task": "transcribe"}));
	formData.append("return_timestamps", "true");

	const model = modelSelect.value;

	const response = await fetch(
	`https://api-inference.huggingface.co/models/${model}`,
	{
	headers: {
	Authorization: "Bearer hf_" + "tbvcBbtoRnhEkOjPAnjEgKLDAlBKflTBSe"
	},
	method: "POST",
	body: formData,
	}
	);
	const result = await response.json();

	if (result.text) {
	output.innerText = result.text;
	}

	console.log(JSON.stringify(result));
	}
	</script>
	</body>
	</html>