Spaces:

Actual-Innocence
/

2nd-Host-Ai

Runtime error

App Files Files

xet

Community

2nd-Host-Ai / app.py

Actual-Innocence

Update app.py

8af4f63 verified about 8 hours ago

raw

history blame contribute delete

8.41 kB

	import gradio as gr
	import os
	import tempfile
	import requests
	import soundfile as sf
	import json
	import shutil
	from pathlib import Path
	import numpy as np
	import re
	from typing import Generator

	# ===== NEUTTS IMPORTS =====
	try:
	# Try multiple import approaches for NeuTTS
	try:
	# Approach 1: Direct import from the structure
	from neutts import NeuTTSAir
	except ImportError:
	try:
	# Approach 2: Import from the module directly
	import sys
	sys.path.append('/usr/local/lib/python3.10/site-packages')
	from neutts import NeuTTSAir
	except ImportError:
	# Approach 3: Use the components directly
	from phonemizer.backend import EspeakBackend
	import perth
	from neucodec import NeuCodec
	from llama_cpp import Llama

	# Define NeuTTSAir class manually
	class NeuTTSAir:
	def __init__(self, backbone_repo="neuphonic/neutts-air-q4-gguf", backbone_device="cpu", codec_repo="neuphonic/neucodec", codec_device="cpu"):
	self.sample_rate = 24_000
	self.max_context = 2048
	self.hop_length = 480

	print("🧠 Loading phonemizer...")
	self.phonemizer = EspeakBackend(language="en-us", preserve_punctuation=True, with_stress=True)
	self._load_backbone(backbone_repo, backbone_device)
	self._load_codec(codec_repo, codec_device)
	self.watermarker = perth.PerthImplicitWatermarker()
	print("✅ NeuTTS-Air initialized!")

	def _load_backbone(self, backbone_repo, backbone_device):
	print(f"🔧 Loading Q4 GGUF backbone: {backbone_repo}")
	self.backbone = Llama.from_pretrained(
	repo_id=backbone_repo,
	filename="*.gguf",
	n_ctx=self.max_context,
	n_gpu_layers=0,
	verbose=False,
	use_mlock=False,
	n_threads=2,
	low_vram=True
	)

	def _load_codec(self, codec_repo, codec_device):
	print(f"🔧 Loading codec: {codec_repo}")
	self.codec = NeuCodec.from_pretrained(codec_repo)
	self.codec.eval().to(codec_device)

	def infer(self, text: str, ref_codes: np.ndarray \| torch.Tensor, ref_text: str) -> np.ndarray:
	output_str = self._infer_gguf(ref_codes, ref_text, text)
	wav = self._decode(output_str)
	watermarked_wav = self.watermarker.apply_watermark(wav, sample_rate=24000)
	return watermarked_wav

	def encode_reference(self, ref_audio_path: str \| Path):
	import torch
	import librosa
	wav, _ = librosa.load(ref_audio_path, sr=16000, mono=True)
	wav_tensor = torch.from_numpy(wav).float().unsqueeze(0).unsqueeze(0)
	with torch.no_grad():
	ref_codes = self.codec.encode_code(audio_or_path=wav_tensor).squeeze(0).squeeze(0)
	return ref_codes.numpy() if isinstance(ref_codes, torch.Tensor) else ref_codes

	def _decode(self, codes: str):
	speech_ids = [int(num) for num in re.findall(r"<\\|speech_(\d+)\\|>", codes)]
	if len(speech_ids) > 0:
	import torch
	with torch.no_grad():
	codes_tensor = torch.tensor(speech_ids, dtype=torch.long)[None, None, :].to(self.codec.device)
	recon = self.codec.decode_code(codes_tensor).cpu().numpy()
	return recon[0, 0, :]
	else:
	raise ValueError("No speech tokens found")

	def _to_phones(self, text: str) -> str:
	phones = self.phonemizer.phonemize([text])
	return " ".join(phones[0].split())

	def _infer_gguf(self, ref_codes: list, ref_text: str, input_text: str) -> str:
	ref_text_phones = self._to_phones(ref_text)
	input_text_phones = self._to_phones(input_text)

	if isinstance(ref_codes, (torch.Tensor, np.ndarray)):
	ref_codes = ref_codes.tolist()

	codes_str = "".join([f"<\|speech_{idx}\|>" for idx in ref_codes])

	prompt = f"user: Convert the text to speech:<\|TEXT_PROMPT_START\|>{ref_text_phones} {input_text_phones}<\|TEXT_PROMPT_END\|>\nassistant:<\|SPEECH_GENERATION_START\|>{codes_str}"

	output = self.backbone(
	prompt,
	max_tokens=self.max_context,
	temperature=1.0,
	top_k=50,
	stop=["<\|SPEECH_GENERATION_END\|>"],
	echo=False
	)

	return output["choices"][0]["text"]

	NEUTTS_AVAILABLE = True
	print("✅ NeuTTS-Air loaded successfully!")

	except Exception as e:
	NEUTTS_AVAILABLE = False
	print(f"❌ NeuTTS-Air import failed: {e}")

	# ===== CONFIGURATION =====
	CONFIG_FILE = "voice_profiles.json"
	SAMPLE_DIR = "samples"
	os.makedirs(SAMPLE_DIR, exist_ok=True)

	# ===== VOICE PROFILE MANAGEMENT =====
	class VoiceProfileManager:
	def __init__(self, config_file=CONFIG_FILE):
	self.config_file = config_file
	self.profiles = self.load_profiles()

	def load_profiles(self):
	if os.path.exists(self.config_file):
	with open(self.config_file, 'r') as f:
	return json.load(f)
	return {
	"dave": {
	"audio_path": "samples/dave.wav",
	"text": "Hey there, this is Dave speaking.",
	"created_at": "default"
	},
	"andrea": {
	"audio_path": "samples/andrea.wav",
	"text": "Hello, my name is Andrea.",
	"created_at": "default"
	}
	}

	def save_profiles(self):
	with open(self.config_file, 'w') as f:
	json.dump(self.profiles, f, indent=2)

	def add_profile(self, name, audio_path, text):
	self.profiles[name] = {
	"audio_path": audio_path,
	"text": text,
	"created_at": str(np.datetime64('now'))
	}
	self.save_profiles()
	return f"✅ Voice profile '{name}' saved!"

	def get_profile(self, name):
	return self.profiles.get(name)

	def list_profiles(self):
	return list(self.profiles.keys())

	# ===== SAMPLE MANAGEMENT =====
	def download_default_samples():
	"""Download default sample voices"""
	samples = {
	"dave": {
	"audio": "https://github.com/neophonic/neutts-air/raw/main/samples/dave.wav",
	"text": "Hey there, this is Dave speaking."
	},
	"andrea": {
	"audio": "https://github.com/neophonic/neutts-air/raw/main/samples/andrea.wav",
	"text": "Hello, my name is Andrea."
	}
	}

	for name, urls in samples.items():
	audio_path = f"{SAMPLE_DIR}/{name}.wav"
	text_path = f"{SAMPLE_DIR}/{name}.txt"

	if not os.path.exists(audio_path):
	print(f"📥 Downloading {name} sample...")
	try:
	# Download audio
	response = requests.get(urls["audio"], timeout=60)
	response.raise_for_status() # Check for download errors
	with open(audio_path, 'wb') as f:
	f.write(response.content)

	# Write text
	with open(text_path, 'w') as f:
	f.write(urls["text"])

	print(f"✅ Finished downloading {name}.") # <-- Corrected line

	except requests.exceptions.RequestException as e:
	print(f"❌ Failed to download {name}: {e}"))