File size: 1,712 Bytes
3469f37 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import speech_recognition as sr
import os
from pydub import AudioSegment
from smolagents import tool
@tool
def transcribe_audio(mp3_path: str) -> str:
"""
Transcribes text from an MP3 audio file using speech recognition.
Args:
mp3_path (str): Path to the MP3 file to be transcribed.
Returns:
str: The transcribed text from the audio file.
Raises:
FileNotFoundError: If the MP3 file does not exist at the specified path.
ValueError: If the file is not a valid MP3 file or audio cannot be processed.
Exception: For other unexpected errors during transcription.
Example:
>>> text = transcribe_audio("sample.mp3")
>>> print(text)
"Hello, this is a sample audio."
"""
# Check if file exists
if not os.path.exists(mp3_path):
raise FileNotFoundError(f"The file {mp3_path} does not exist.")
# Initialize recognizer
recognizer = sr.Recognizer()
try:
# Convert MP3 to WAV
audio = AudioSegment.from_mp3(mp3_path)
wav_path = mp3_path.replace(".mp3", ".wav")
audio.export(wav_path, format="wav")
# Load audio file
with sr.AudioFile(wav_path) as source:
# Adjust for ambient noise
recognizer.adjust_for_ambient_noise(source)
# Record the audio
audio_data = recognizer.record(source)
# Clean up temporary WAV file
os.remove(wav_path)
# Perform speech recognition
text = recognizer.recognize_google(audio_data)
return text
except sr.UnknownValueError:
raise ValueError("Could not understand the audio.")
except sr.RequestError as e:
raise ValueError(f"Could not process audio; {e}")
except Exception as e:
raise Exception(f"An error occurred during transcription: {e}")
|