File size: 3,551 Bytes
a9e7f37
d7a9f29
 
c570be6
d7a9f29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import streamlit as st
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import torch
import numpy as np
import tempfile
import wave

# Load Wav2Vec2 model and processor
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")

# Streamlit App
st.title("Phonics/Personalized Reading App")
st.write("Record your audio and we will transcribe it.")

# Audio recording using HTML5
record_button = st.button("Record Audio")

if record_button:
    st.markdown("""
        <audio id="audio" controls></audio>
        <button id="start" onclick="startRecording()">Start Recording</button>
        <button id="stop" onclick="stopRecording()" disabled>Stop Recording</button>
        <script>
            let mediaRecorder;
            let audioChunks = [];

            async function startRecording() {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                mediaRecorder = new MediaRecorder(stream);
                mediaRecorder.ondataavailable = event => {
                    audioChunks.push(event.data);
                };

                mediaRecorder.onstop = () => {
                    const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
                    const audioUrl = URL.createObjectURL(audioBlob);
                    const audioElement = document.getElementById('audio');
                    audioElement.src = audioUrl;

                    // Prepare to send audio to server
                    const formData = new FormData();
                    formData.append('audio', audioBlob, 'recording.wav');

                    fetch('/upload', {
                        method: 'POST',
                        body: formData
                    }).then(response => response.json()).then(data => {
                        st.session_state.transcription = data.transcription;
                        st.experimental_rerun();  // Refresh the app to show the transcription
                    });
                };

                mediaRecorder.start();
                document.getElementById('start').disabled = true;
                document.getElementById('stop').disabled = false;
            }

            function stopRecording() {
                mediaRecorder.stop();
                document.getElementById('start').disabled = false;
                document.getElementById('stop').disabled = true;
            }
        </script>
    """, unsafe_allow_html=True)

# Display the transcription
if 'transcription' in st.session_state:
    st.write("Transcription:")
    st.write(st.session_state.transcription)

# Handle audio file upload
uploaded_file = st.file_uploader("Or upload your audio file", type=["wav", "mp3"])

if uploaded_file is not None:
    # Save uploaded audio file to a temporary file
    with tempfile.NamedTemporaryFile(delete=True) as temp_file:
        temp_file.write(uploaded_file.read())
        temp_file.flush()
        
        # Process the audio file for transcription
        audio_input = processor(temp_file.name, sampling_rate=16000, return_tensors="pt", padding=True)

        with torch.no_grad():
            logits = model(audio_input.input_values).logits

        predicted_ids = torch.argmax(logits, dim=-1)
        transcription = processor.batch_decode(predicted_ids)

        st.session_state.transcription = transcription[0]  # Store transcription
        st.experimental_rerun()  # Refresh the app to show the transcription