# πŸ“ Estructura del proyecto generado para 'voice' # πŸ“¦ voice/ # β”œβ”€β”€ README.md # β”œβ”€β”€ requirements.txt # β”œβ”€β”€ src/ # β”‚ β”œβ”€β”€ model.py # β”‚ β”œβ”€β”€ train.py # β”‚ └── inference.py # β”œβ”€β”€ data/ # β”‚ └── samples/ # └── app_gradio.py # ======================================== # βœ… README.md (inicial) readme = ''' # 🐽 Kuchiuya Voice Synthesizer Este proyecto permite entrenar e inferir voces sintΓ©ticas al estilo Kuchiuya. ## Requisitos ```bash pip install -r requirements.txt ``` ## Entrenamiento ```bash python src/train.py ``` ## Inferencia ```bash python src/inference.py --text "El barro no miente." ``` ## Demo Web ```bash python app_gradio.py ``` ''' # ======================================== # βœ… requirements.txt requirements = ''' torch torchaudio numpy scipy librosa soundfile gradio pyttsx3 ''' # ======================================== # βœ… model.py model_py = ''' import torch.nn as nn class SimpleTTSModel(nn.Module): def __init__(self, input_dim=256, hidden_dim=512, output_dim=80): super(SimpleTTSModel, self).__init__() self.model = nn.Sequential( nn.Linear(input_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, output_dim) ) def forward(self, x): return self.model(x) ''' # ======================================== # βœ… train.py train_py = ''' import torch import torch.nn as nn import torch.optim as optim from src.model import SimpleTTSModel import numpy as np print("🐷 Entrenando modelo Kuchiuya...") # Datos simulados (para demostrar el flujo) x = torch.randn((100, 256)) y = torch.randn((100, 80)) model = SimpleTTSModel() criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=0.001) for epoch in range(10): optimizer.zero_grad() outputs = model(x) loss = criterion(outputs, y) loss.backward() optimizer.step() print(f"Epoch {epoch+1}/10, PΓ©rdida: {loss.item():.4f}") # Guardar modelo torch.save(model.state_dict(), "tts_model.pth") print("βœ… Modelo guardado como 'tts_model.pth'") ''' # ======================================== # βœ… inference.py inference_py = ''' import argparse import pyttsx3 parser = argparse.ArgumentParser() parser.add_argument('--text', type=str, required=True, help='Texto a sintetizar') args = parser.parse_args() engine = pyttsx3.init() engine.setProperty('rate', 150) engine.save_to_file(args.text, 'output.wav') engine.runAndWait() print(f"πŸ”Š Texto sintetizado: '{args.text}' β†’ guardado como 'output.wav'") ''' # ======================================== # βœ… app_gradio.py app_gradio_py = ''' import gradio as gr import pyttsx3 import os def sintetizar(texto): engine = pyttsx3.init() engine.setProperty('rate', 150) output_path = "voz_kuchiuya.wav" engine.save_to_file(texto, output_path) engine.runAndWait() return output_path demo = gr.Interface( fn=sintetizar, inputs=gr.Textbox(lines=2, placeholder="Escribe tu frase Kuchiuya aquΓ­..."), outputs=gr.Audio(type="filepath"), title="Voz Kuchiuya", description="Convierte texto en una voz ritual cyberpunk del universo Ariplanteater." ) demo.launch() '''