podcaster / interface.py
marks
Fixes
c405952
import asyncio
import os
import time
from dataclasses import dataclass
from typing import List, Optional, AsyncGenerator
import gradio as gr
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from rich.console import Console
from rich.panel import Panel
from rich.text import Text
from logger import setup_logger, log_execution_time, log_async_execution_time
from browser_use import Agent, Browser
from browser_use.browser.browser import BrowserContext
from api_clients import OpenRouterClient, ElevenLabsClient
load_dotenv()
console = Console()
logger = setup_logger("interface")
@dataclass
class ActionResult:
is_done: bool
extracted_content: Optional[str]
error: Optional[str]
include_in_memory: bool
@dataclass
class AgentHistoryList:
all_results: List[ActionResult]
all_model_outputs: List[dict]
def parse_agent_history(history_str: str) -> None:
# Split the content into sections based on ActionResult entries
sections = history_str.split('ActionResult(')
for i, section in enumerate(sections[1:], 1): # Skip first empty section
# Extract relevant information
content = ''
if 'extracted_content=' in section:
content = section.split('extracted_content=')[1].split(',')[0].strip("'")
if content:
header = Text(f'Step {i}', style='bold blue')
panel = Panel(content, title=header, border_style='blue')
console.print(panel)
console.print()
async def run_browser_task(
task: str,
api_key: str,
provider: str = 'openai',
model: str = 'gpt-4-vision',
headless: bool = True,
) -> str:
if not api_key.strip():
return 'Please provide an API key'
if provider == 'openai':
os.environ['OPENAI_API_KEY'] = api_key
llm = ChatOpenAI(model=model)
elif provider == 'anthropic':
os.environ['ANTHROPIC_API_KEY'] = api_key
llm = ChatAnthropic(model=model)
else: # google
os.environ['GOOGLE_API_KEY'] = api_key
llm = ChatGoogleGenerativeAI(model=model)
try:
agent = Agent(
task=task,
llm=llm,
browser=Browser(BrowserContext(headless=True))
)
result = await agent.run()
# TODO: The result cloud be parsed better
return result
except Exception as e:
return f'Error: {str(e)}'
@log_async_execution_time(logger)
async def scrape_content(url: str) -> str:
"""
Scrape and summarize content from the given URL using browser automation
This function performs the following steps:
1. Validates the input URL
2. Initializes the browser agent
3. Extracts and summarizes the content
Args:
url: Target URL to scrape
Returns:
Summarized content suitable for podcast generation
Raises:
ValueError: If URL is invalid or content extraction fails
"""
logger.info(f"Starting content scrape for URL: {url}")
# Input validation
if not url.startswith(('http://', 'https://')):
logger.error(f"Invalid URL format: {url}")
raise ValueError("URL must start with http:// or https://")
try:
logger.debug("Initializing LLM and browser agent")
llm = ChatOpenAI(model="gpt-4")
agent = Agent(
task=f"Visit this URL: {url} and extract the main content. Summarize it in a clear and concise way.",
llm=llm,
browser=Browser(BrowserContext(headless=True))
)
logger.info("Executing content extraction")
result = await agent.run()
logger.debug(f"Content extraction successful. Length: {len(result)} chars")
logger.debug(f"Content preview: {result[:200]}...")
return result
except Exception as e:
logger.error(f"Content extraction failed for {url}", exc_info=True)
raise
@log_async_execution_time(logger)
async def create_podcast(
url: str,
prompt: str,
elevenlabs_key: str,
voice_id: str,
openrouter_key: str,
model_id: str,
) -> AsyncGenerator[tuple[Optional[str], str], None]:
"""
Create a podcast through a multi-step process:
1. Content extraction from URL
2. Script generation using AI
3. Voice synthesis
Progress updates are yielded at each step for UI feedback.
"""
logger.info(f"Starting podcast creation for URL: {url}")
logger.debug(f"Parameters - Voice: {voice_id}, Model: {model_id}")
logger.debug(f"Prompt length: {len(prompt)} chars")
try:
# Initialize clients with validation
logger.debug("Initializing API clients")
openrouter = OpenRouterClient(openrouter_key)
elevenlabs = ElevenLabsClient(elevenlabs_key)
# Phase 1: Content scraping
logger.info("Phase 1/3: Content scraping")
yield None, "Scraping website content..."
content = await scrape_content(url)
logger.debug(f"Scraped content length: {len(content)} chars")
# Phase 2: Script generation
logger.info("Phase 2/3: Script generation")
yield None, "Generating podcast script..."
script = await openrouter.generate_script(content, prompt, model_id)
logger.debug(f"Generated script length: {len(script)} chars")
# Phase 3: Audio synthesis
logger.info("Phase 3/3: Audio generation")
yield None, "Converting to audio..."
audio = elevenlabs.generate_audio(script, voice_id)
logger.debug(f"Generated audio size: {len(audio)} bytes")
# Save output
audio_path = f"podcast_{int(time.time())}.mp3"
logger.debug(f"Saving audio to: {audio_path}")
with open(audio_path, "wb") as f:
f.write(audio)
logger.info("Podcast creation completed successfully")
yield audio_path, "Podcast created successfully!"
except Exception as e:
logger.error("Podcast creation failed", exc_info=True)
yield None, f"Error: {str(e)}"
def create_ui():
logger.info("Initializing Gradio interface")
# Default choices for dropdowns
default_voices = [("", "Enter API key to load voices")]
default_models = [("", "Enter API key to load models")]
with gr.Blocks(title='PodcastCreator', theme=gr.themes.Soft()) as interface:
with gr.Row():
with gr.Column(scale=2):
url_input = gr.Textbox(label='Source URL', placeholder='Enter the URL...')
prompt = gr.Textbox(label='Podcast Topic', lines=3)
with gr.Row():
with gr.Column():
elevenlabs_key = gr.Textbox(
label='ElevenLabs API Key',
type='password',
placeholder='Enter key...'
)
voice = gr.Dropdown(
label='Voice',
choices=default_voices,
value=None,
allow_custom_value=True
)
with gr.Column():
openrouter_key = gr.Textbox(
label='OpenRouter API Key',
type='password',
placeholder='Enter key...'
)
model = gr.Dropdown(
label='AI Model',
choices=default_models,
value=None,
allow_custom_value=True
)
submit_btn = gr.Button('Create Podcast', variant='primary')
with gr.Column(scale=1):
audio_output = gr.Audio(label="Generated Podcast")
status = gr.Textbox(label='Status', interactive=False)
# Event handlers
def update_voices(key):
if not key:
return gr.Dropdown(choices=default_voices, value=default_voices[0][0])
try:
client = ElevenLabsClient(key)
voices = client.get_voices()
return gr.Dropdown(choices=voices, value=voices[0][0] if voices else None)
except Exception as e:
logger.error(f"Failed to load voices: {e}")
return gr.Dropdown(choices=[(None, f"Error: {str(e)}")], value=None)
async def update_models(key):
if not key:
return gr.Dropdown(choices=default_models, value=default_models[0][0])
try:
client = OpenRouterClient(key)
models = await client.get_models()
return gr.Dropdown(choices=models, value=models[0][0] if models else None)
except Exception as e:
logger.error(f"Failed to load models: {e}")
return gr.Dropdown(choices=[(None, f"Error: {str(e)}")], value=None)
# Add error handling for the event handlers
try:
elevenlabs_key.change(fn=update_voices, inputs=elevenlabs_key, outputs=voice)
openrouter_key.change(fn=update_models, inputs=openrouter_key, outputs=model)
submit_btn.click(
fn=create_podcast,
inputs=[url_input, prompt, elevenlabs_key, voice, openrouter_key, model],
outputs=[audio_output, status]
)
except Exception as e:
logger.error(f"Failed to set up event handlers: {e}")
raise
logger.info("Gradio interface initialized successfully")
return interface
if __name__ == '__main__':
demo = create_ui()
demo.launch()