File size: 3,933 Bytes
63f1d6d 87e895d 4a8a7d7 87e895d 4a8a7d7 63f1d6d 8529fe9 63f1d6d 8529fe9 63f1d6d 4a8a7d7 87e895d 63f1d6d 8529fe9 4a8a7d7 8529fe9 d81bde6 63f1d6d d81bde6 1d10660 d81bde6 63f1d6d d81bde6 1d10660 63f1d6d 4f5115c c3bcb97 63f1d6d 8529fe9 aa39831 8529fe9 63f1d6d 87e895d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import gradio as gr
import edge_tts
import asyncio
import os
import datetime
from pathlib import Path
import hashlib
# Crea una directory per i file audio
AUDIO_DIR = Path("audio_files")
AUDIO_DIR.mkdir(exist_ok=True)
async def get_voices():
voices = await edge_tts.list_voices()
return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
async def text_to_speech(text, voice, rate, pitch):
if not text.strip():
return None, "Please enter text to convert."
if not voice:
return None, "Please select a voice."
voice_short_name = voice.split(" - ")[0]
rate_str = f"{rate:+d}%"
pitch_str = f"{pitch:+d}Hz"
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
# Crea un nome file univoco basato sul contenuto
content_hash = hashlib.md5(f"{text}{voice}{rate}{pitch}".encode()).hexdigest()
output_path = AUDIO_DIR / f"{content_hash}.mp3"
# Genera l'audio solo se non esiste già
if not output_path.exists():
await communicate.save(str(output_path))
return str(output_path), None
# Funzione di pulizia per rimuovere i file più vecchi
def cleanup_old_files(directory: Path, max_files: int = 100, max_age_hours: int = 24):
try:
files = list(directory.glob("*.mp3"))
# Rimuovi i file più vecchi di max_age_hours
current_time = datetime.datetime.now()
for file in files:
try:
file_age = current_time - datetime.datetime.fromtimestamp(file.stat().st_mtime)
if file_age.total_seconds() > (max_age_hours * 3600):
try:
file.unlink()
except (PermissionError, OSError):
continue
except (OSError, ValueError):
continue
# Se ci sono ancora troppi file, rimuovi i più vecchi
files = list(directory.glob("*.mp3"))
if len(files) > max_files:
files.sort(key=lambda x: x.stat().st_mtime)
for file in files[:-max_files]:
try:
file.unlink()
except (PermissionError, OSError):
continue
except Exception as e:
print(f"Error during cleanup: {e}")
async def tts_interface(text, voice, rate, pitch):
# Esegui la pulizia prima di generare un nuovo file
cleanup_old_files(AUDIO_DIR)
audio, warning = await text_to_speech(text, voice, rate, pitch)
if warning:
return audio, gr.Warning(warning)
return audio, None
async def create_demo():
voices = await get_voices()
description = """
Convert text to speech using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
Original Space by innoai
"""
demo = gr.Interface(
fn=tts_interface,
inputs=[
gr.Textbox(label="Input Text", lines=5),
gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
],
outputs=[
gr.Audio(label="Generated Audio", type="filepath"),
gr.Markdown(label="Warning", visible=False)
],
title="Edge TTS Text-to-Speech",
description=description,
article="Experience the power of Edge TTS for text-to-speech conversion!",
analytics_enabled=False,
allow_flagging="manual",
api_name="predict"
)
return demo
async def main():
demo = await create_demo()
demo.queue(default_concurrency_limit=25)
demo.launch(show_api=True)
if __name__ == "__main__":
asyncio.run(main()) |