MemoirAI / transcrriber.py
gaur3009's picture
Update transcrriber.py
0c5c3aa verified
import speech_recognition as sr
import numpy as np
import io
import config
class SpeechTranscriber:
def __init__(self):
self.recognizer = sr.Recognizer()
self.recognizer.energy_threshold = config.ENERGY_THRESHOLD
self.recognizer.dynamic_energy_threshold = config.DYNAMIC_ENERGY_THRESHOLD
self.recognizer.pause_threshold = config.PAUSE_THRESHOLD
self.audio_buffer = bytearray()
def add_audio_chunk(self, audio_chunk):
# Convert numpy array to bytes
self.audio_buffer.extend(audio_chunk.tobytes())
def get_transcript_chunk(self):
# Only process if we have enough audio
min_bytes = config.SAMPLE_RATE * config.MIN_PROCESSING_DURATION * 2 # 2 bytes per sample
if len(self.audio_buffer) < min_bytes:
return None
# Create AudioData object
audio_data = sr.AudioData(
bytes(self.audio_buffer),
config.SAMPLE_RATE,
2 # Sample width in bytes
)
try:
# Use Google Web Speech API for best accuracy
text = self.recognizer.recognize_google(audio_data)
# Clear buffer after successful recognition
self.audio_buffer = bytearray()
return text
except sr.UnknownValueError:
# Clear buffer even if we couldn't recognize
self.audio_buffer = bytearray()
return None
except sr.RequestError as e:
print(f"Speech recognition error: {str(e)}")
return None