Spaces:

phxdev
/

podcaster

Runtime error

File size: 9,648 Bytes

import asyncio
import os
import time
from dataclasses import dataclass
from typing import List, Optional, AsyncGenerator
import gradio as gr
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from rich.console import Console
from rich.panel import Panel
from rich.text import Text
from logger import setup_logger, log_execution_time, log_async_execution_time

from browser_use import Agent, Browser
from browser_use.browser.browser import BrowserContext
from api_clients import OpenRouterClient, ElevenLabsClient

load_dotenv()

console = Console()
logger = setup_logger("interface")

@dataclass
class ActionResult:
	is_done: bool
	extracted_content: Optional[str]
	error: Optional[str]
	include_in_memory: bool


@dataclass
class AgentHistoryList:
	all_results: List[ActionResult]
	all_model_outputs: List[dict]


def parse_agent_history(history_str: str) -> None:
	# Split the content into sections based on ActionResult entries
	sections = history_str.split('ActionResult(')

	for i, section in enumerate(sections[1:], 1):  # Skip first empty section
		# Extract relevant information
		content = ''
		if 'extracted_content=' in section:
			content = section.split('extracted_content=')[1].split(',')[0].strip("'")

		if content:
			header = Text(f'Step {i}', style='bold blue')
			panel = Panel(content, title=header, border_style='blue')
			console.print(panel)
			console.print()


async def run_browser_task(
	task: str,
	api_key: str,
	provider: str = 'openai',
	model: str = 'gpt-4-vision',
	headless: bool = True,
) -> str:
	if not api_key.strip():
		return 'Please provide an API key'

	if provider == 'openai':
		os.environ['OPENAI_API_KEY'] = api_key
		llm = ChatOpenAI(model=model)
	elif provider == 'anthropic':
		os.environ['ANTHROPIC_API_KEY'] = api_key
		llm = ChatAnthropic(model=model)
	else:  # google
		os.environ['GOOGLE_API_KEY'] = api_key
		llm = ChatGoogleGenerativeAI(model=model)

	try:
		agent = Agent(
			task=task,
			llm=llm,
			browser=Browser(BrowserContext(headless=True))
		)
		result = await agent.run()
		#  TODO: The result cloud be parsed better
		return result
	except Exception as e:
		return f'Error: {str(e)}'


@log_async_execution_time(logger)
async def scrape_content(url: str) -> str:
    """
    Scrape and summarize content from the given URL using browser automation
    
    This function performs the following steps:
    1. Validates the input URL
    2. Initializes the browser agent
    3. Extracts and summarizes the content
    
    Args:
        url: Target URL to scrape
        
    Returns:
        Summarized content suitable for podcast generation
        
    Raises:
        ValueError: If URL is invalid or content extraction fails
    """
    logger.info(f"Starting content scrape for URL: {url}")
    
    # Input validation
    if not url.startswith(('http://', 'https://')):
        logger.error(f"Invalid URL format: {url}")
        raise ValueError("URL must start with http:// or https://")
    
    try:
        logger.debug("Initializing LLM and browser agent")
        llm = ChatOpenAI(model="gpt-4")
        agent = Agent(
            task=f"Visit this URL: {url} and extract the main content. Summarize it in a clear and concise way.",
            llm=llm,
            browser=Browser(BrowserContext(headless=True))
        )
        
        logger.info("Executing content extraction")
        result = await agent.run()
        
        logger.debug(f"Content extraction successful. Length: {len(result)} chars")
        logger.debug(f"Content preview: {result[:200]}...")
        
        return result
    except Exception as e:
        logger.error(f"Content extraction failed for {url}", exc_info=True)
        raise

@log_async_execution_time(logger)
async def create_podcast(
    url: str,
    prompt: str,
    elevenlabs_key: str,
    voice_id: str,
    openrouter_key: str,
    model_id: str,
) -> AsyncGenerator[tuple[Optional[str], str], None]:
    """
    Create a podcast through a multi-step process:
    1. Content extraction from URL
    2. Script generation using AI
    3. Voice synthesis
    
    Progress updates are yielded at each step for UI feedback.
    """
    logger.info(f"Starting podcast creation for URL: {url}")
    logger.debug(f"Parameters - Voice: {voice_id}, Model: {model_id}")
    logger.debug(f"Prompt length: {len(prompt)} chars")
    
    try:
        # Initialize clients with validation
        logger.debug("Initializing API clients")
        openrouter = OpenRouterClient(openrouter_key)
        elevenlabs = ElevenLabsClient(elevenlabs_key)
        
        # Phase 1: Content scraping
        logger.info("Phase 1/3: Content scraping")
        yield None, "Scraping website content..."
        content = await scrape_content(url)
        logger.debug(f"Scraped content length: {len(content)} chars")
        
        # Phase 2: Script generation
        logger.info("Phase 2/3: Script generation")
        yield None, "Generating podcast script..."
        script = await openrouter.generate_script(content, prompt, model_id)
        logger.debug(f"Generated script length: {len(script)} chars")
        
        # Phase 3: Audio synthesis
        logger.info("Phase 3/3: Audio generation")
        yield None, "Converting to audio..."
        audio = elevenlabs.generate_audio(script, voice_id)
        logger.debug(f"Generated audio size: {len(audio)} bytes")
        
        # Save output
        audio_path = f"podcast_{int(time.time())}.mp3"
        logger.debug(f"Saving audio to: {audio_path}")
        with open(audio_path, "wb") as f:
            f.write(audio)
        
        logger.info("Podcast creation completed successfully")
        yield audio_path, "Podcast created successfully!"
        
    except Exception as e:
        logger.error("Podcast creation failed", exc_info=True)
        yield None, f"Error: {str(e)}"

def create_ui():
    logger.info("Initializing Gradio interface")
    
    # Default choices for dropdowns
    default_voices = [("", "Enter API key to load voices")]
    default_models = [("", "Enter API key to load models")]
    
    with gr.Blocks(title='PodcastCreator', theme=gr.themes.Soft()) as interface:
        with gr.Row():
            with gr.Column(scale=2):
                url_input = gr.Textbox(label='Source URL', placeholder='Enter the URL...')
                prompt = gr.Textbox(label='Podcast Topic', lines=3)
                
                with gr.Row():
                    with gr.Column():
                        elevenlabs_key = gr.Textbox(
                            label='ElevenLabs API Key',
                            type='password',
                            placeholder='Enter key...'
                        )
                        voice = gr.Dropdown(
                            label='Voice',
                            choices=default_voices,
                            value=None,
                            allow_custom_value=True
                        )
                    
                    with gr.Column():
                        openrouter_key = gr.Textbox(
                            label='OpenRouter API Key',
                            type='password',
                            placeholder='Enter key...'
                        )
                        model = gr.Dropdown(
                            label='AI Model',
                            choices=default_models,
                            value=None,
                            allow_custom_value=True
                        )
                
                submit_btn = gr.Button('Create Podcast', variant='primary')

            with gr.Column(scale=1):
                audio_output = gr.Audio(label="Generated Podcast")
                status = gr.Textbox(label='Status', interactive=False)

        # Event handlers
        def update_voices(key):
            if not key:
                return gr.Dropdown(choices=default_voices, value=default_voices[0][0])
            try:
                client = ElevenLabsClient(key)
                voices = client.get_voices()
                return gr.Dropdown(choices=voices, value=voices[0][0] if voices else None)
            except Exception as e:
                logger.error(f"Failed to load voices: {e}")
                return gr.Dropdown(choices=[(None, f"Error: {str(e)}")], value=None)

        async def update_models(key):
            if not key:
                return gr.Dropdown(choices=default_models, value=default_models[0][0])
            try:
                client = OpenRouterClient(key)
                models = await client.get_models()
                return gr.Dropdown(choices=models, value=models[0][0] if models else None)
            except Exception as e:
                logger.error(f"Failed to load models: {e}")
                return gr.Dropdown(choices=[(None, f"Error: {str(e)}")], value=None)

        # Add error handling for the event handlers
        try:
            elevenlabs_key.change(fn=update_voices, inputs=elevenlabs_key, outputs=voice)
            openrouter_key.change(fn=update_models, inputs=openrouter_key, outputs=model)
            
            submit_btn.click(
                fn=create_podcast,
                inputs=[url_input, prompt, elevenlabs_key, voice, openrouter_key, model],
                outputs=[audio_output, status]
            )
        except Exception as e:
            logger.error(f"Failed to set up event handlers: {e}")
            raise

    logger.info("Gradio interface initialized successfully")
    return interface

if __name__ == '__main__':
    demo = create_ui()
    demo.launch()