Spaces:
Sleeping
Sleeping
File size: 3,551 Bytes
a9e7f37 d7a9f29 c570be6 d7a9f29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import streamlit as st
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import torch
import numpy as np
import tempfile
import wave
# Load Wav2Vec2 model and processor
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
# Streamlit App
st.title("Phonics/Personalized Reading App")
st.write("Record your audio and we will transcribe it.")
# Audio recording using HTML5
record_button = st.button("Record Audio")
if record_button:
st.markdown("""
<audio id="audio" controls></audio>
<button id="start" onclick="startRecording()">Start Recording</button>
<button id="stop" onclick="stopRecording()" disabled>Stop Recording</button>
<script>
let mediaRecorder;
let audioChunks = [];
async function startRecording() {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorder = new MediaRecorder(stream);
mediaRecorder.ondataavailable = event => {
audioChunks.push(event.data);
};
mediaRecorder.onstop = () => {
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
const audioUrl = URL.createObjectURL(audioBlob);
const audioElement = document.getElementById('audio');
audioElement.src = audioUrl;
// Prepare to send audio to server
const formData = new FormData();
formData.append('audio', audioBlob, 'recording.wav');
fetch('/upload', {
method: 'POST',
body: formData
}).then(response => response.json()).then(data => {
st.session_state.transcription = data.transcription;
st.experimental_rerun(); // Refresh the app to show the transcription
});
};
mediaRecorder.start();
document.getElementById('start').disabled = true;
document.getElementById('stop').disabled = false;
}
function stopRecording() {
mediaRecorder.stop();
document.getElementById('start').disabled = false;
document.getElementById('stop').disabled = true;
}
</script>
""", unsafe_allow_html=True)
# Display the transcription
if 'transcription' in st.session_state:
st.write("Transcription:")
st.write(st.session_state.transcription)
# Handle audio file upload
uploaded_file = st.file_uploader("Or upload your audio file", type=["wav", "mp3"])
if uploaded_file is not None:
# Save uploaded audio file to a temporary file
with tempfile.NamedTemporaryFile(delete=True) as temp_file:
temp_file.write(uploaded_file.read())
temp_file.flush()
# Process the audio file for transcription
audio_input = processor(temp_file.name, sampling_rate=16000, return_tensors="pt", padding=True)
with torch.no_grad():
logits = model(audio_input.input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.batch_decode(predicted_ids)
st.session_state.transcription = transcription[0] # Store transcription
st.experimental_rerun() # Refresh the app to show the transcription
|