Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor | |
import torch | |
import numpy as np | |
import tempfile | |
import wave | |
# Load Wav2Vec2 model and processor | |
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h") | |
# Streamlit App | |
st.title("Phonics/Personalized Reading App") | |
st.write("Record your audio and we will transcribe it.") | |
# Audio recording using HTML5 | |
record_button = st.button("Record Audio") | |
if record_button: | |
st.markdown(""" | |
<audio id="audio" controls></audio> | |
<button id="start" onclick="startRecording()">Start Recording</button> | |
<button id="stop" onclick="stopRecording()" disabled>Stop Recording</button> | |
<script> | |
let mediaRecorder; | |
let audioChunks = []; | |
async function startRecording() { | |
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); | |
mediaRecorder = new MediaRecorder(stream); | |
mediaRecorder.ondataavailable = event => { | |
audioChunks.push(event.data); | |
}; | |
mediaRecorder.onstop = () => { | |
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); | |
const audioUrl = URL.createObjectURL(audioBlob); | |
const audioElement = document.getElementById('audio'); | |
audioElement.src = audioUrl; | |
// Prepare to send audio to server | |
const formData = new FormData(); | |
formData.append('audio', audioBlob, 'recording.wav'); | |
fetch('/upload', { | |
method: 'POST', | |
body: formData | |
}).then(response => response.json()).then(data => { | |
st.session_state.transcription = data.transcription; | |
st.experimental_rerun(); // Refresh the app to show the transcription | |
}); | |
}; | |
mediaRecorder.start(); | |
document.getElementById('start').disabled = true; | |
document.getElementById('stop').disabled = false; | |
} | |
function stopRecording() { | |
mediaRecorder.stop(); | |
document.getElementById('start').disabled = false; | |
document.getElementById('stop').disabled = true; | |
} | |
</script> | |
""", unsafe_allow_html=True) | |
# Display the transcription | |
if 'transcription' in st.session_state: | |
st.write("Transcription:") | |
st.write(st.session_state.transcription) | |
# Handle audio file upload | |
uploaded_file = st.file_uploader("Or upload your audio file", type=["wav", "mp3"]) | |
if uploaded_file is not None: | |
# Save uploaded audio file to a temporary file | |
with tempfile.NamedTemporaryFile(delete=True) as temp_file: | |
temp_file.write(uploaded_file.read()) | |
temp_file.flush() | |
# Process the audio file for transcription | |
audio_input = processor(temp_file.name, sampling_rate=16000, return_tensors="pt", padding=True) | |
with torch.no_grad(): | |
logits = model(audio_input.input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
transcription = processor.batch_decode(predicted_ids) | |
st.session_state.transcription = transcription[0] # Store transcription | |
st.experimental_rerun() # Refresh the app to show the transcription | |