Spaces:

ibrahim313
/

phonix

Sleeping

App Files Files Community

phonix / app.py

ibrahim313

Update app.py

d7a9f29 verified 11 months ago

raw

history blame contribute delete

3.55 kB

	import streamlit as st
	from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
	import torch
	import numpy as np
	import tempfile
	import wave

	# Load Wav2Vec2 model and processor
	processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
	model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")

	# Streamlit App
	st.title("Phonics/Personalized Reading App")
	st.write("Record your audio and we will transcribe it.")

	# Audio recording using HTML5
	record_button = st.button("Record Audio")

	if record_button:
	st.markdown("""
	<audio id="audio" controls></audio>
	<button id="start" onclick="startRecording()">Start Recording</button>
	<button id="stop" onclick="stopRecording()" disabled>Stop Recording</button>
	<script>
	let mediaRecorder;
	let audioChunks = [];

	async function startRecording() {
	const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
	mediaRecorder = new MediaRecorder(stream);
	mediaRecorder.ondataavailable = event => {
	audioChunks.push(event.data);
	};

	mediaRecorder.onstop = () => {
	const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
	const audioUrl = URL.createObjectURL(audioBlob);
	const audioElement = document.getElementById('audio');
	audioElement.src = audioUrl;

	// Prepare to send audio to server
	const formData = new FormData();
	formData.append('audio', audioBlob, 'recording.wav');

	fetch('/upload', {
	method: 'POST',
	body: formData
	}).then(response => response.json()).then(data => {
	st.session_state.transcription = data.transcription;
	st.experimental_rerun(); // Refresh the app to show the transcription
	});
	};

	mediaRecorder.start();
	document.getElementById('start').disabled = true;
	document.getElementById('stop').disabled = false;
	}

	function stopRecording() {
	mediaRecorder.stop();
	document.getElementById('start').disabled = false;
	document.getElementById('stop').disabled = true;
	}
	</script>
	""", unsafe_allow_html=True)

	# Display the transcription
	if 'transcription' in st.session_state:
	st.write("Transcription:")
	st.write(st.session_state.transcription)

	# Handle audio file upload
	uploaded_file = st.file_uploader("Or upload your audio file", type=["wav", "mp3"])

	if uploaded_file is not None:
	# Save uploaded audio file to a temporary file
	with tempfile.NamedTemporaryFile(delete=True) as temp_file:
	temp_file.write(uploaded_file.read())
	temp_file.flush()

	# Process the audio file for transcription
	audio_input = processor(temp_file.name, sampling_rate=16000, return_tensors="pt", padding=True)

	with torch.no_grad():
	logits = model(audio_input.input_values).logits

	predicted_ids = torch.argmax(logits, dim=-1)
	transcription = processor.batch_decode(predicted_ids)

	st.session_state.transcription = transcription[0] # Store transcription
	st.experimental_rerun() # Refresh the app to show the transcription