Edge-TTS-Text-to-Speech

Running

File size: 3,933 Bytes

63f1d6d
 
 
 
87e895d
4a8a7d7
87e895d
4a8a7d7
 
 
 
 
63f1d6d
 
 
 
 
 
8529fe9
63f1d6d
8529fe9
63f1d6d
 
 
 
 
4a8a7d7
 
 
 
 
 
 
 
 
 
 
 
 
87e895d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63f1d6d
8529fe9
4a8a7d7
 
 
8529fe9
 
 
 
d81bde6
63f1d6d
 
 
d81bde6
 
1d10660
d81bde6
 
63f1d6d
 
 
 
 
 
 
 
 
 
 
 
 
d81bde6
1d10660
63f1d6d
4f5115c
c3bcb97
63f1d6d
 
 
8529fe9
 
aa39831
 
8529fe9
63f1d6d
87e895d

import gradio as gr
import edge_tts
import asyncio
import os
import datetime
from pathlib import Path
import hashlib

# Crea una directory per i file audio
AUDIO_DIR = Path("audio_files")
AUDIO_DIR.mkdir(exist_ok=True)

async def get_voices():
    voices = await edge_tts.list_voices()
    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}

async def text_to_speech(text, voice, rate, pitch):
    if not text.strip():
        return None, "Please enter text to convert."
    if not voice:
        return None, "Please select a voice."
    
    voice_short_name = voice.split(" - ")[0]
    rate_str = f"{rate:+d}%"
    pitch_str = f"{pitch:+d}Hz"
    communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
    
    # Crea un nome file univoco basato sul contenuto
    content_hash = hashlib.md5(f"{text}{voice}{rate}{pitch}".encode()).hexdigest()
    output_path = AUDIO_DIR / f"{content_hash}.mp3"
    
    # Genera l'audio solo se non esiste già
    if not output_path.exists():
        await communicate.save(str(output_path))
    
    return str(output_path), None

# Funzione di pulizia per rimuovere i file più vecchi
def cleanup_old_files(directory: Path, max_files: int = 100, max_age_hours: int = 24):
    try:
        files = list(directory.glob("*.mp3"))
        
        # Rimuovi i file più vecchi di max_age_hours
        current_time = datetime.datetime.now()
        for file in files:
            try:
                file_age = current_time - datetime.datetime.fromtimestamp(file.stat().st_mtime)
                if file_age.total_seconds() > (max_age_hours * 3600):
                    try:
                        file.unlink()
                    except (PermissionError, OSError):
                        continue
            except (OSError, ValueError):
                continue
        
        # Se ci sono ancora troppi file, rimuovi i più vecchi
        files = list(directory.glob("*.mp3"))
        if len(files) > max_files:
            files.sort(key=lambda x: x.stat().st_mtime)
            for file in files[:-max_files]:
                try:
                    file.unlink()
                except (PermissionError, OSError):
                    continue
    except Exception as e:
        print(f"Error during cleanup: {e}")

async def tts_interface(text, voice, rate, pitch):
    # Esegui la pulizia prima di generare un nuovo file
    cleanup_old_files(AUDIO_DIR)
    
    audio, warning = await text_to_speech(text, voice, rate, pitch)
    if warning:
        return audio, gr.Warning(warning)
    return audio, None

async def create_demo():
    voices = await get_voices()
    
    description = """
    Convert text to speech using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
    Original Space by innoai
    """
    
    demo = gr.Interface(
        fn=tts_interface,
        inputs=[
            gr.Textbox(label="Input Text", lines=5),
            gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
            gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
            gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
        ],
        outputs=[
            gr.Audio(label="Generated Audio", type="filepath"),
            gr.Markdown(label="Warning", visible=False)
        ],
        title="Edge TTS Text-to-Speech",
        description=description,
        article="Experience the power of Edge TTS for text-to-speech conversion!",
        analytics_enabled=False,
        allow_flagging="manual",
        api_name="predict"
    )
    return demo

async def main():
    demo = await create_demo()
    demo.queue(default_concurrency_limit=25)
    demo.launch(show_api=True)

if __name__ == "__main__":
    asyncio.run(main())