Spaces:
Running
on
Zero
Running
on
Zero
| import os | |
| from io import BytesIO | |
| from typing import IO, Optional | |
| import time | |
| import uuid | |
| from pathlib import Path | |
| from pydub import AudioSegment | |
| import gradio as gr | |
| from elevenlabs import Voice, VoiceSettings, save | |
| from elevenlabs.client import ElevenLabs | |
| def generate_random_filename(parent, extension="txt"): | |
| """ | |
| Generates a random filename using UUID and current timestamp. | |
| Args: | |
| extension (str): The file extension for the generated filename. Default is 'txt'. | |
| Returns: | |
| str: A random filename with the specified extension. | |
| """ | |
| # Generate a random UUID | |
| random_uuid = uuid.uuid4() | |
| # Get the current timestamp | |
| timestamp = int(time.time()) | |
| # Combine UUID and timestamp to create a unique filename | |
| filename = f"{random_uuid}_{timestamp}.{extension}" | |
| file_path = os.path.join(parent, filename) | |
| return file_path | |
| ELEVEN_LABS_MODEL = os.getenv("ELEVEN_LABS_MODEL", "eleven_multilingual_v2") | |
| ELEVEN_LABS_LANGUAGE_SUPPORTS = [ | |
| "English", | |
| "Chinese", | |
| "Spanish", | |
| "Hindi", | |
| "Portuguese", | |
| "French", | |
| "German", | |
| "Japanese", | |
| "Arabic", | |
| "Korean", | |
| "Indonesian", | |
| "Italian", | |
| "Dutch", | |
| "Turkish", | |
| "Polish", | |
| "Swedish", | |
| "Filipino", | |
| "Malay", | |
| "Russian", | |
| "Romanian", | |
| "Ukrainian", | |
| "Greek", | |
| "Czech", | |
| "Danish", | |
| "Finnish", | |
| "Bulgarian", | |
| "Croatian", | |
| "Slovak", | |
| "Tamil", | |
| ] | |
| class ElevenLabsPipeline: | |
| def __init__(self): | |
| eleven_labs_api_key = os.getenv("ELEVENLABS_API_KEY", "sk_f4f7d77bc8065b15824cf52ea46c7d99e0e5db2a0f93b673") | |
| if eleven_labs_api_key is None: | |
| raise Exception("ELEVENLABS_API_KEY ํ๊ฒฝ๋ณ์๋ฅผ ์ค์ ํด์ฃผ์ธ์.") | |
| self.client = ElevenLabs( | |
| api_key=eleven_labs_api_key, # Defaults to ELEVEN_API_KEY | |
| ) | |
| os.makedirs("./tmp", exist_ok=True) | |
| def clone_voice(self, audio, name, description=None): | |
| response = self.client.voices.get_all() | |
| for voice in response.voices: | |
| if voice.name == name: | |
| return "์กด์ฌํ๋ ์์ฑ์ ๋๋ค. ์์ฑ ์์ฑ์ ์์ํด์ฃผ์ธ์." | |
| try: | |
| voice = self.client.clone( | |
| name=name, | |
| description=description, # Optional | |
| files=[audio], | |
| ) | |
| return "Voice Clone์ ์ฑ๊ณต์ ์ผ๋ก ์์ฑํ์ต๋๋ค." | |
| except Exception as e: | |
| return str(e) | |
| def _get_voice(self, name: str): | |
| response = self.client.voices.get_all() | |
| current_voice = None | |
| for voice in response.voices: | |
| if voice.name == name: | |
| current_voice = voice | |
| break | |
| return current_voice | |
| def generate_voice( | |
| self, | |
| text: str, | |
| audio: str = None, | |
| language: str = "ko", | |
| mute_before_ms: Optional[int] = 0, | |
| mute_after_ms: Optional[int] = 0, | |
| stability: float = 0.5, | |
| similarity_boost: float = 0.75, | |
| style: float = 0.0, | |
| use_speaker_boost=True, | |
| ) -> str: | |
| if audio is not None: | |
| name = Path(audio).stem | |
| self.clone_voice(audio, name) | |
| else: | |
| gr.Info("์์ฑ์ด ์์ฃผ์ด์ก์ต๋๋ค. ๊ธฐ๋ณธ ์์ฑ์ผ๋ก ์์ฑํ๊ฒ ์ต๋๋ค.", duration=2) | |
| name = "Laura" | |
| current_voice = self._get_voice(name) | |
| if current_voice is None: | |
| current_voice = self._get_voice(name) | |
| response = self.client.generate( | |
| text=text, | |
| model=ELEVEN_LABS_MODEL, | |
| voice=Voice( | |
| voice_id=current_voice.voice_id, | |
| settings=VoiceSettings( | |
| stability=stability, | |
| similarity_boost=similarity_boost, | |
| style=style, | |
| use_speaker_boost=use_speaker_boost, | |
| language=language, | |
| ), | |
| ), | |
| ) | |
| # Create a BytesIO object to hold the audio data in memory | |
| audio_stream = BytesIO() | |
| # Write each chunk of audio data to the stream | |
| for chunk in response: | |
| if chunk: | |
| audio_stream.write(chunk) | |
| # Reset stream position to the beginning | |
| audio_stream.seek(0) | |
| # Load the audio stream into an AudioSegment | |
| audio_segment = AudioSegment.from_file(audio_stream, format="mp3") | |
| # Create silent segments for before and after | |
| mute_before = AudioSegment.silent(duration=mute_before_ms) | |
| mute_after = AudioSegment.silent(duration=mute_after_ms) | |
| # Concatenate the segments | |
| combined_segment = mute_before + audio_segment + mute_after | |
| tmp_file = generate_random_filename("./tmp", "mp3") | |
| # Export the combined audio to the specified file | |
| combined_segment.export(tmp_file, format="mp3", bitrate="128k") | |
| return tmp_file |