|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
readme = ''' |
|
# π½ Kuchiuya Voice Synthesizer |
|
|
|
Este proyecto permite entrenar e inferir voces sintΓ©ticas al estilo Kuchiuya. |
|
|
|
## Requisitos |
|
```bash |
|
pip install -r requirements.txt |
|
``` |
|
|
|
## Entrenamiento |
|
```bash |
|
python src/train.py |
|
``` |
|
|
|
## Inferencia |
|
```bash |
|
python src/inference.py --text "El barro no miente." |
|
``` |
|
|
|
## Demo Web |
|
```bash |
|
python app_gradio.py |
|
``` |
|
''' |
|
|
|
|
|
|
|
|
|
requirements = ''' |
|
torch |
|
torchaudio |
|
numpy |
|
scipy |
|
librosa |
|
soundfile |
|
gradio |
|
pyttsx3 |
|
''' |
|
|
|
|
|
|
|
|
|
model_py = ''' |
|
import torch.nn as nn |
|
|
|
class SimpleTTSModel(nn.Module): |
|
def __init__(self, input_dim=256, hidden_dim=512, output_dim=80): |
|
super(SimpleTTSModel, self).__init__() |
|
self.model = nn.Sequential( |
|
nn.Linear(input_dim, hidden_dim), |
|
nn.ReLU(), |
|
nn.Linear(hidden_dim, output_dim) |
|
) |
|
|
|
def forward(self, x): |
|
return self.model(x) |
|
''' |
|
|
|
|
|
|
|
|
|
train_py = ''' |
|
import torch |
|
import torch.nn as nn |
|
import torch.optim as optim |
|
from src.model import SimpleTTSModel |
|
import numpy as np |
|
|
|
print("π· Entrenando modelo Kuchiuya...") |
|
|
|
# Datos simulados (para demostrar el flujo) |
|
x = torch.randn((100, 256)) |
|
y = torch.randn((100, 80)) |
|
|
|
model = SimpleTTSModel() |
|
criterion = nn.MSELoss() |
|
optimizer = optim.Adam(model.parameters(), lr=0.001) |
|
|
|
for epoch in range(10): |
|
optimizer.zero_grad() |
|
outputs = model(x) |
|
loss = criterion(outputs, y) |
|
loss.backward() |
|
optimizer.step() |
|
print(f"Epoch {epoch+1}/10, PΓ©rdida: {loss.item():.4f}") |
|
|
|
# Guardar modelo |
|
torch.save(model.state_dict(), "tts_model.pth") |
|
print("β
Modelo guardado como 'tts_model.pth'") |
|
''' |
|
|
|
|
|
|
|
|
|
inference_py = ''' |
|
import argparse |
|
import pyttsx3 |
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument('--text', type=str, required=True, help='Texto a sintetizar') |
|
args = parser.parse_args() |
|
|
|
engine = pyttsx3.init() |
|
engine.setProperty('rate', 150) |
|
engine.save_to_file(args.text, 'output.wav') |
|
engine.runAndWait() |
|
|
|
print(f"π Texto sintetizado: '{args.text}' β guardado como 'output.wav'") |
|
''' |
|
|
|
|
|
|
|
|
|
app_gradio_py = ''' |
|
import gradio as gr |
|
import pyttsx3 |
|
import os |
|
|
|
def sintetizar(texto): |
|
engine = pyttsx3.init() |
|
engine.setProperty('rate', 150) |
|
output_path = "voz_kuchiuya.wav" |
|
engine.save_to_file(texto, output_path) |
|
engine.runAndWait() |
|
return output_path |
|
|
|
demo = gr.Interface( |
|
fn=sintetizar, |
|
inputs=gr.Textbox(lines=2, placeholder="Escribe tu frase Kuchiuya aquΓ..."), |
|
outputs=gr.Audio(type="filepath"), |
|
title="Voz Kuchiuya", |
|
description="Convierte texto en una voz ritual cyberpunk del universo Ariplanteater." |
|
) |
|
demo.launch() |
|
''' |
|
|