|
import os |
|
import csv |
|
import difflib |
|
from pydub import AudioSegment |
|
from pydub.playback import play |
|
import unicodedata |
|
|
|
def limpiar_texto(texto): |
|
texto = texto.lower().strip() |
|
texto = unicodedata.normalize("NFKD", texto).encode("ascii", "ignore").decode("utf-8") |
|
return texto |
|
|
|
|
|
dataset = [] |
|
with open("dataset.csv", newline='', encoding='utf-8') as f: |
|
reader = csv.DictReader(f) |
|
for row in reader: |
|
dataset.append({ |
|
"path": row["path"], |
|
"text": limpiar_texto(row["text"]) |
|
}) |
|
|
|
|
|
print("📝 Escribe tu texto y buscaré el audio más parecido con tu voz grabada:") |
|
texto_input = input("👉 ").strip() |
|
texto_clean = limpiar_texto(texto_input) |
|
|
|
|
|
frases = [d["text"] for d in dataset] |
|
matches = difflib.get_close_matches(texto_clean, frases, n=3, cutoff=0.5) |
|
|
|
if not matches: |
|
print("❌ No encontré coincidencias suficientes. Intenta con otra frase.") |
|
exit() |
|
|
|
|
|
print("\n🎯 Frase(s) encontrada(s):") |
|
for i, frase in enumerate(matches): |
|
print(f"{i+1}. {frase}") |
|
|
|
|
|
match_texto = matches[0] |
|
|
|
|
|
ruta_audio = next(d["path"] for d in dataset if d["text"] == match_texto) |
|
|
|
|
|
print(f"\n🔊 Reproduciendo audio: {ruta_audio}") |
|
audio = AudioSegment.from_wav(ruta_audio) |
|
play(audio) |
|
|