voice / src /train.py.save
DRDELATV's picture
Upload folder using huggingface_hub
70e5c60 verified
# πŸ“ Estructura del proyecto generado para 'voice'
# πŸ“¦ voice/
# β”œβ”€β”€ README.md
# β”œβ”€β”€ requirements.txt
# β”œβ”€β”€ src/
# β”‚ β”œβ”€β”€ model.py
# β”‚ β”œβ”€β”€ train.py
# β”‚ └── inference.py
# β”œβ”€β”€ data/
# β”‚ └── samples/
# └── app_gradio.py
# ========================================
# βœ… README.md (inicial)
readme = '''
# 🐽 Kuchiuya Voice Synthesizer
Este proyecto permite entrenar e inferir voces sintΓ©ticas al estilo Kuchiuya.
## Requisitos
```bash
pip install -r requirements.txt
```
## Entrenamiento
```bash
python src/train.py
```
## Inferencia
```bash
python src/inference.py --text "El barro no miente."
```
## Demo Web
```bash
python app_gradio.py
```
'''
# ========================================
# βœ… requirements.txt
requirements = '''
torch
torchaudio
numpy
scipy
librosa
soundfile
gradio
pyttsx3
'''
# ========================================
# βœ… model.py
model_py = '''
import torch.nn as nn
class SimpleTTSModel(nn.Module):
def __init__(self, input_dim=256, hidden_dim=512, output_dim=80):
super(SimpleTTSModel, self).__init__()
self.model = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, output_dim)
)
def forward(self, x):
return self.model(x)
'''
# ========================================
# βœ… train.py
train_py = '''
import torch
import torch.nn as nn
import torch.optim as optim
from src.model import SimpleTTSModel
import numpy as np
print("🐷 Entrenando modelo Kuchiuya...")
# Datos simulados (para demostrar el flujo)
x = torch.randn((100, 256))
y = torch.randn((100, 80))
model = SimpleTTSModel()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(10):
optimizer.zero_grad()
outputs = model(x)
loss = criterion(outputs, y)
loss.backward()
optimizer.step()
print(f"Epoch {epoch+1}/10, PΓ©rdida: {loss.item():.4f}")
# Guardar modelo
torch.save(model.state_dict(), "tts_model.pth")
print("βœ… Modelo guardado como 'tts_model.pth'")
'''
# ========================================
# βœ… inference.py
inference_py = '''
import argparse
import pyttsx3
parser = argparse.ArgumentParser()
parser.add_argument('--text', type=str, required=True, help='Texto a sintetizar')
args = parser.parse_args()
engine = pyttsx3.init()
engine.setProperty('rate', 150)
engine.save_to_file(args.text, 'output.wav')
engine.runAndWait()
print(f"πŸ”Š Texto sintetizado: '{args.text}' β†’ guardado como 'output.wav'")
'''
# ========================================
# βœ… app_gradio.py
app_gradio_py = '''
import gradio as gr
import pyttsx3
import os
def sintetizar(texto):
engine = pyttsx3.init()
engine.setProperty('rate', 150)
output_path = "voz_kuchiuya.wav"
engine.save_to_file(texto, output_path)
engine.runAndWait()
return output_path
demo = gr.Interface(
fn=sintetizar,
inputs=gr.Textbox(lines=2, placeholder="Escribe tu frase Kuchiuya aquΓ­..."),
outputs=gr.Audio(type="filepath"),
title="Voz Kuchiuya",
description="Convierte texto en una voz ritual cyberpunk del universo Ariplanteater."
)
demo.launch()
'''