File size: 1,775 Bytes
0711651 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import whisper
from gtts import gTTS
import soundfile as sf
class WhisperTTS:
def __init__(self, model_size="base"):
"""Initialize the Whisper model."""
self.model = whisper.load_model(model_size)
def transcribe_audio(self, input_audio, output_text_file="transcription.txt"):
"""Transcribes audio and saves text."""
result = self.model.transcribe(input_audio)
with open(output_text_file, "w") as f:
f.write(result["text"])
print("\nTranscription Saved:", output_text_file)
return result["text"]
def text_to_speech(self, text, output_audio="output.wav"):
"""Converts transcribed text to speech and saves it as WAV."""
tts = gTTS(text, lang="en") # Convert text to speech
tts.save("temp.mp3") # Save as temporary MP3
# Convert MP3 to WAV
data, samplerate = sf.read("temp.mp3")
sf.write(output_audio, data, samplerate)
print("\nTTS Audio Saved:", output_audio)
def process_audio(self, input_audio):
"""Full pipeline: Transcribe and generate speech."""
transcribed_text = self.transcribe_audio(input_audio)
print("\nTranscribed Text:\n", transcribed_text)
output_wav = "transcribed_audio.wav"
self.text_to_speech(transcribed_text, output_wav)
return transcribed_text, output_wav
# Usage Example
""" if __name__ == "__main__":
whisper_tts = WhisperTTS()
input_audio_file = "sample_audio/signal-2025-03-29-153916.mp3" # Change this to your actual file
text, wav_file = whisper_tts.process_audio(input_audio_file)
print("\nFinal Output:\nText File: transcription.txt\nWAV File:", wav_file)
""" |