Spaces:

Actual-Innocence
/

2nd-Host-Ai

Runtime error

App Files Files

xet

Community

2nd-Host-Ai / app.py

Actual-Innocence

Update app.py

aac2889 verified 4 days ago

raw

history blame

16.1 kB

	import gradio as gr
	import os
	import tempfile
	import requests
	import soundfile as sf
	import json
	import shutil
	from pathlib import Path
	import numpy as np

	# ===== NEUTTS IMPORTS =====
	from neuttsair.neutts import NeuTTSAir

	# ===== CONFIGURATION =====
	CONFIG_FILE = "voice_profiles.json"
	SAMPLE_DIR = "samples"
	os.makedirs(SAMPLE_DIR, exist_ok=True)

	# ===== VOICE PROFILE MANAGEMENT =====
	class VoiceProfileManager:
	def __init__(self, config_file=CONFIG_FILE):
	self.config_file = config_file
	self.profiles = self.load_profiles()

	def load_profiles(self):
	if os.path.exists(self.config_file):
	with open(self.config_file, 'r') as f:
	return json.load(f)
	return {}

	def save_profiles(self):
	with open(self.config_file, 'w') as f:
	json.dump(self.profiles, f, indent=2)

	def add_profile(self, name, audio_path, text):
	self.profiles[name] = {
	"audio_path": audio_path,
	"text": text,
	"created_at": str(np.datetime64('now'))
	}
	self.save_profiles()
	return f"✅ Voice profile '{name}' saved!"

	def get_profile(self, name):
	return self.profiles.get(name)

	def list_profiles(self):
	return list(self.profiles.keys())

	# ===== SAMPLE MANAGEMENT =====
	def download_default_samples():
	"""Download default sample voices"""
	samples = {
	"dave": {
	"audio": "https://github.com/neophonic/neutts-air/raw/main/samples/dave.wav",
	"text": "https://raw.githubusercontent.com/neophonic/neutts-air/main/samples/dave.txt"
	},
	"andrea": {
	"audio": "https://github.com/neophonic/neutts-air/raw/main/samples/andrea.wav",
	"text": "https://raw.githubusercontent.com/neophonic/neutts-air/main/samples/andrea.txt"
	}
	}

	for name, urls in samples.items():
	audio_path = f"{SAMPLE_DIR}/{name}.wav"
	text_path = f"{SAMPLE_DIR}/{name}.txt"

	if not os.path.exists(audio_path):
	try:
	response = requests.get(urls["audio"])
	with open(audio_path, 'wb') as f:
	f.write(response.content)

	response = requests.get(urls["text"])
	with open(text_path, 'w') as f:
	f.write(response.text)

	print(f"✅ Downloaded {name} sample")
	except Exception as e:
	print(f"❌ Failed to download {name}: {e}")

	# ===== TTS ENGINE =====
	class TTSEngine:
	def __init__(self):
	self.tts = None
	self.voice_manager = VoiceProfileManager()
	download_default_samples()

	def initialize_tts(self):
	if self.tts is None:
	print("🚀 Initializing NeuTTS Q4 GGUF...")
	self.tts = NeuTTSAir(
	backbone_repo="neuphonic/neutts-air-q4-gguf",
	backbone_device="cpu",
	codec_repo="neuphonic/neucodec",
	codec_device="cpu"
	)
	return self.tts

	def generate_speech(self, text, voice_name):
	try:
	tts = self.initialize_tts()
	profile = self.voice_manager.get_profile(voice_name)

	if not profile:
	return None, f"❌ Voice profile '{voice_name}' not found"

	ref_codes = tts.encode_reference(profile["audio_path"])
	ref_text = profile["text"]

	wav = tts.infer(text, ref_codes, ref_text)
	return wav, None

	except Exception as e:
	return None, f"❌ Generation error: {str(e)}"

	# ===== SCRIPT PARSING =====
	def parse_conversation_script(script_text):
	"""Parse script with speaker labels"""
	lines = []
	for line in script_text.strip().split('\n'):
	line = line.strip()
	if ':' in line:
	speaker, dialogue = line.split(':', 1)
	lines.append({
	"speaker": speaker.strip(),
	"text": dialogue.strip()
	})
	elif line:
	# Default to Speaker A if no label
	lines.append({
	"speaker": "Speaker A",
	"text": line
	})
	return lines

	def generate_script_from_prompt(prompt, style="conversational"):
	"""Generate a podcast script from a prompt"""
	# Simple template-based generation
	templates = {
	"conversational": [
	"Host: Welcome to our podcast! Today we're discussing {prompt}",
	"Co-host: That's right! It's a fascinating topic that affects many people.",
	"Host: Let's start with the basics. What should our audience know about this?",
	"Co-host: Well, first of all, it's important to understand the key concepts.",
	"Host: And what about the practical applications? How can people use this in their daily lives?",
	"Co-host: Great question! There are several ways to apply this knowledge effectively."
	],
	"interview": [
	"Interviewer: Thanks for joining us today to talk about {prompt}",
	"Guest: Happy to be here! It's a topic I'm very passionate about.",
	"Interviewer: Could you share some background on how you got involved in this field?",
	"Guest: Absolutely. It all started several years ago when I first discovered this area.",
	"Interviewer: What are the most exciting developments you're seeing right now?",
	"Guest: There are some incredible advancements happening that will change everything."
	],
	"debate": [
	"Moderator: Welcome to our debate on {prompt}",
	"Proponent: I believe this is one of the most important issues of our time.",
	"Opponent: While I respect that view, I have some serious concerns about the approach.",
	"Proponent: Let me address those concerns with some concrete evidence.",
	"Opponent: The evidence is compelling, but we must consider the broader implications.",
	"Moderator: Let's hear from both sides about potential solutions."
	]
	}

	template = templates.get(style, templates["conversational"])
	script = "\n".join([line.format(prompt=prompt) for line in template])
	return script

	# ===== MAIN GENERATION FUNCTIONS =====
	tts_engine = TTSEngine()

	def clone_voice(voice_name, upload_audio, reference_text):
	"""Clone a voice from uploaded audio"""
	if not voice_name or not upload_audio:
	return "❌ Please provide a voice name and audio file"

	try:
	# Save uploaded audio
	audio_ext = Path(upload_audio).suffix
	audio_path = f"{SAMPLE_DIR}/{voice_name}{audio_ext}"
	shutil.copy2(upload_audio, audio_path)

	# Save voice profile
	result = tts_engine.voice_manager.add_profile(voice_name, audio_path, reference_text)
	return result
	except Exception as e:
	return f"❌ Error cloning voice: {str(e)}"

	def generate_podcast(script_input, speaker_a, speaker_b, prompt_input, script_style):
	"""Generate a complete podcast with two speakers"""
	try:
	# Generate script if prompt is provided
	if prompt_input and (not script_input or script_input.strip() == ""):
	script_input = generate_script_from_prompt(prompt_input, script_style)

	if not script_input or script_input.strip() == "":
	return None, "❌ Please provide either a script or a prompt"

	# Parse conversation
	conversation = parse_conversation_script(script_input)
	if not conversation:
	return None, "❌ Could not parse script"

	# Generate audio for each line
	combined_audio = None
	current_sample_rate = 24000

	for i, line in enumerate(conversation):
	speaker = line["speaker"]
	text = line["text"]

	# Choose voice based on speaker label or A/B assignment
	if "host" in speaker.lower() or "a" in speaker.lower() or "interviewer" in speaker.lower():
	voice = speaker_a
	elif "co-host" in speaker.lower() or "b" in speaker.lower() or "guest" in speaker.lower():
	voice = speaker_b
	else:
	# Default assignment
	voice = speaker_a if i % 2 == 0 else speaker_b

	print(f"🎙️ {speaker} ({voice}): {text}")

	# Generate speech
	wav, error = tts_engine.generate_speech(text, voice)
	if error:
	return None, error

	# Combine audio
	if combined_audio is None:
	combined_audio = wav
	else:
	# Add a small pause between speakers
	pause = np.zeros(int(0.5 * current_sample_rate)) # 0.5 second pause
	combined_audio = np.concatenate([combined_audio, pause, wav])

	# Save final audio
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
	sf.write(f.name, combined_audio, current_sample_rate)
	audio_file = f.name

	# Save script
	script_file = audio_file.replace(".wav", "_script.txt")
	with open(script_file, 'w') as f:
	f.write(script_input)

	return audio_file, script_file, "✅ Podcast generated successfully!"

	except Exception as e:
	return None, None, f"❌ Error: {str(e)}"

	# ===== GRADIO UI =====
	css = """
	.container { max-width: 1400px; margin: 0 auto; }
	.header { background: linear-gradient(135deg, #32CD32 0%, #1E90FF 100%); color: white; padding: 30px; border-radius: 12px; margin-bottom: 25px; text-align: center; border: 3px solid #1E90FF; }
	.section { border: 2px solid #32CD32; border-radius: 10px; padding: 20px; margin-bottom: 20px; background: white; }
	.output-section { background: linear-gradient(135deg, #F0FFF0 0%, #F0F8FF 100%); border: 2px dashed #1E90FF; border-radius: 10px; padding: 20px; margin-top: 20px; }
	.btn-primary { background: linear-gradient(135deg, #32CD32 0%, #1E90FF 100%) !important; border: 2px solid #1E90FF !important; color: white !important; font-weight: bold !important; }
	.btn-secondary { background: linear-gradient(135deg, #FFA500 0%, #FF6347 100%) !important; border: 2px solid #FF6347 !important; color: white !important; }
	.tab { background: #f0f8ff; padding: 15px; border-radius: 8px; margin: 10px 0; }
	"""

	with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
	gr.HTML("""
	<div class="header">
	<h1>🎙️ 2nd-Host AI - Complete Podcast Studio</h1>
	<h3>Voice Cloning • 2-Speaker Podcasts • Script Generation • Export</h3>
	</div>
	""")

	# Initialize voice manager
	voice_manager = VoiceProfileManager()
	available_voices = voice_manager.list_profiles()

	with gr.Tab("🎭 Voice Cloning"):
	gr.Markdown("### Clone New Voices")
	with gr.Row():
	with gr.Column():
	voice_name = gr.Textbox(label="Voice Name", placeholder="e.g., 'David', 'Sarah', 'Expert'")
	upload_audio = gr.Audio(label="Reference Audio", type="filepath")
	reference_text = gr.Textbox(
	label="Reference Text",
	value="Hey there, this is my voice for cloning.",
	placeholder="Text spoken in the reference audio"
	)
	clone_btn = gr.Button("🎯 Clone Voice", variant="primary")

	with gr.Column():
	clone_status = gr.Textbox(label="Cloning Status", interactive=False)
	available_voices_display = gr.Dropdown(
	label="Available Voices",
	choices=available_voices,
	value=available_voices[0] if available_voices else None
	)
	refresh_btn = gr.Button("🔄 Refresh Voices")

	with gr.Tab("🎬 Podcast Studio"):
	gr.Markdown("### Create 2-Speaker Podcast")

	with gr.Row():
	with gr.Column():
	# Script input
	script_input = gr.Textbox(
	label="Podcast Script",
	lines=6,
	placeholder="""Format: Speaker: Dialogue
	Example:
	Host: Welcome to our show!
	Co-host: Thanks for having me!
	Host: Let's discuss AI voice technology...
	Co-host: It's revolutionizing content creation!""",
	value=""
	)

	# Script generation
	prompt_input = gr.Textbox(
	label="Or Generate from Prompt",
	placeholder="e.g., 'The future of AI in education'"
	)
	script_style = gr.Radio(
	choices=["conversational", "interview", "debate"],
	label="Script Style",
	value="conversational"
	)
	generate_script_btn = gr.Button("📝 Generate Script", variant="secondary")

	with gr.Column():
	# Speaker selection
	speaker_a = gr.Dropdown(
	choices=available_voices,
	label="🎤 Speaker A (Host)",
	value=available_voices[0] if available_voices else None
	)
	speaker_b = gr.Dropdown(
	choices=available_voices,
	label="🎤 Speaker B (Co-host/Guest)",
	value=available_voices[1] if len(available_voices) > 1 else available_voices[0] if available_voices else None
	)

	generate_btn = gr.Button("🚀 Generate Podcast", variant="primary", size="lg")

	with gr.Tab("📤 Output"):
	gr.Markdown("### Generated Podcast")
	with gr.Row():
	with gr.Column():
	audio_output = gr.Audio(label="🎧 Podcast Audio", type="filepath")
	script_output = gr.File(label="📄 Script File", file_types=[".txt"])

	with gr.Column():
	generation_status = gr.Textbox(label="Generation Status", lines=3)
	download_btn = gr.Button("💾 Download All", variant="primary")

	# ===== EVENT HANDLERS =====
	def refresh_voices():
	voice_manager = VoiceProfileManager()
	voices = voice_manager.list_profiles()
	return gr.Dropdown(choices=voices, value=voices[0] if voices else None), gr.Dropdown(choices=voices, value=voices[1] if len(voices) > 1 else voices[0] if voices else None)

	def handle_clone_voice(voice_name, audio_path, text):
	result = clone_voice(voice_name, audio_path, text)
	return result, *refresh_voices()

	def handle_generate_script(prompt, style):
	if not prompt:
	return "❌ Please enter a prompt"
	script = generate_script_from_prompt(prompt, style)
	return script

	def handle_generate_podcast(script, speaker_a, speaker_b, prompt, style):
	return generate_podcast(script, speaker_a, speaker_b, prompt, style)

	# Connect events
	clone_btn.click(
	handle_clone_voice,
	inputs=[voice_name, upload_audio, reference_text],
	outputs=[clone_status, speaker_a, speaker_b]
	)

	refresh_btn.click(
	refresh_voices,
	outputs=[speaker_a, speaker_b]
	)

	generate_script_btn.click(
	handle_generate_script,
	inputs=[prompt_input, script_style],
	outputs=[script_input]
	)

	generate_btn.click(
	handle_generate_podcast,
	inputs=[script_input, speaker_a, speaker_b, prompt_input, script_style],
	outputs=[audio_output, script_output, generation_status]
	)

	if __name__ == "__main__":
	demo.launch(share=True)