File size: 1,712 Bytes
3469f37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import speech_recognition as sr
import os
from pydub import AudioSegment
from smolagents import tool


@tool
def transcribe_audio(mp3_path: str) -> str:
  """
  Transcribes text from an MP3 audio file using speech recognition.

  Args:
    mp3_path (str): Path to the MP3 file to be transcribed.

  Returns:
    str: The transcribed text from the audio file.

  Raises:
    FileNotFoundError: If the MP3 file does not exist at the specified path.
    ValueError: If the file is not a valid MP3 file or audio cannot be processed.
    Exception: For other unexpected errors during transcription.

  Example:
    >>> text = transcribe_audio("sample.mp3")
    >>> print(text)
    "Hello, this is a sample audio."
  """
  # Check if file exists
  if not os.path.exists(mp3_path):
    raise FileNotFoundError(f"The file {mp3_path} does not exist.")

  # Initialize recognizer
  recognizer = sr.Recognizer()

  try:
    # Convert MP3 to WAV
    audio = AudioSegment.from_mp3(mp3_path)
    wav_path = mp3_path.replace(".mp3", ".wav")
    audio.export(wav_path, format="wav")

    # Load audio file
    with sr.AudioFile(wav_path) as source:
      # Adjust for ambient noise
      recognizer.adjust_for_ambient_noise(source)
      # Record the audio
      audio_data = recognizer.record(source)

    # Clean up temporary WAV file
    os.remove(wav_path)

    # Perform speech recognition
    text = recognizer.recognize_google(audio_data)
    return text

  except sr.UnknownValueError:
    raise ValueError("Could not understand the audio.")
  except sr.RequestError as e:
    raise ValueError(f"Could not process audio; {e}")
  except Exception as e:
    raise Exception(f"An error occurred during transcription: {e}")