File size: 952 Bytes
c71e312
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# presentation/infrastructure/services/audio/transcriber_vosk.py
from vosk import Model, KaldiRecognizer
import wave
import json
import os
from audio_converter import convert_to_pcm16_mono

model = Model("models/vosk-model-small-es-0.42")

def transcribe_audio(file_path: str) -> str:
    converted_path = convert_to_pcm16_mono(file_path)

    wf = wave.open(converted_path, "rb")
    rec = KaldiRecognizer(model, wf.getframerate())
    result_text = []

    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            res = json.loads(rec.Result())
            result_text.append(res.get("text", ""))

    final = json.loads(rec.FinalResult())
    result_text.append(final.get("text", ""))

    wf.close()
    if converted_path != file_path:
        try:
            os.remove(converted_path)
        except Exception:
            pass

    return " ".join(result_text)