Spaces:
Sleeping
Sleeping
| from typing import Iterator | |
| from langchain_core.documents import Document | |
| from langchain_community.document_loaders.base import BaseBlobParser | |
| from langchain_community.document_loaders.blob_loaders import Blob | |
| import speech_recognition as sr | |
| from pydub import AudioSegment | |
| import io | |
| class AudioParser(BaseBlobParser): | |
| """Parse audio files from a blob and convert them to text.""" | |
| def lazy_parse(self, blob: Blob) -> Iterator[Document]: | |
| """Parse an audio file into the Document iterator. | |
| Args: | |
| blob: The blob to parse. | |
| Returns: An iterator of Documents. | |
| """ | |
| supported_mime_types = [ | |
| "audio/wav", # .wav | |
| "audio/mpeg", # .mp3 | |
| "audio/ogg", # .ogg | |
| "audio/flac", # .flac | |
| "audio/x-aiff" # .aiff | |
| ] | |
| # Debugging: Print MIME type | |
| print(f"Blob MIME type: {blob.mimetype}") | |
| if blob.mimetype not in supported_mime_types: | |
| raise ValueError( | |
| f"This blob type is not supported for this parser. Supported types are: {supported_mime_types}" | |
| ) | |
| recognizer = sr.Recognizer() | |
| try: | |
| # Convert to PCM WAV if necessary | |
| with blob.as_bytes_io() as audio_file: | |
| audio_bytes = audio_file.read() | |
| # e.g., "mpeg" from "audio/mpeg" | |
| audio_format = blob.mimetype.split('/')[1] | |
| print(f"Attempting to process audio format: {audio_format}") | |
| if audio_format in ["wav", "flac", "aiff"]: | |
| # Directly use AudioFile for these formats | |
| audio_file.seek(0) | |
| audio_stream = audio_file | |
| else: | |
| # Convert to PCM WAV using pydub | |
| audio_segment = AudioSegment.from_file( | |
| io.BytesIO(audio_bytes), format=audio_format) | |
| audio_stream = io.BytesIO() | |
| audio_segment.export(audio_stream, format="wav") | |
| audio_stream.seek(0) | |
| with sr.AudioFile(audio_stream) as source: | |
| audio_data = recognizer.record(source) | |
| try: | |
| text = recognizer.recognize_google(audio_data) | |
| metadata = {"source": blob.source} | |
| yield Document(page_content=text, metadata=metadata) | |
| except sr.UnknownValueError: | |
| print( | |
| "Google Speech Recognition could not understand the audio.") | |
| raise | |
| except sr.RequestError as e: | |
| print( | |
| f"Could not request results from Google Speech Recognition service; {e}") | |
| raise | |
| except Exception as e: | |
| print(f"Error processing audio file: {e}") | |
| raise | |