# app5.py – Interactive Bedtime Stories with Poster 🖼️ and TTS 🔊 # ------------------------------------------------------------------ # After the final scene (Scene 3) we automatically generate a DALL·E-3 # poster summarising the whole story, and you can listen to any scene # via OpenAI TTS (streaming, synchronous – no asyncio required). # ------------------------------------------------------------------ from __future__ import annotations import os import re import textwrap import tempfile from pathlib import Path from typing import List import base64 import hashlib import openai import gradio as gr from openai import OpenAI, OpenAIError # ---------- CONFIG --------------------------------------------------------- openai.api_key = os.getenv("OPENAI_API_KEY") MODEL = "gpt-4o-mini" TTS_MODEL = "tts-1" # correct OpenAI TTS model TEMPERATURE = 0.4 # Voice options for narration VOICE_OPTIONS = { "fable": "👨 Dad (Default)", "shimmer": "👩 Mom", "nova": "👧 Sister", "onyx": "👴 Grandad" } DEFAULT_VOICE = "fable" # TTS narration instructions for bedtime story atmosphere TTS_INSTRUCTIONS = ( "Speak slowly and softly, like a bedtime storyteller. " "Put a tiny pause after each sentence. " "Smile in your voice; sound friendly and reassuring. " "Keep overall volume low so it won't startle a sleepy child." ) # Seven core genres CATEGORIES = [ "Animal Adventures", "Fantasy & Magic", "Friendship & Emotional Growth", "Mystery & Problem-Solving", "Humor & Silly Situations", "Science & Space Exploration", "Values & Morals (Fables)", ] DEFAULT_CATEGORY = CATEGORIES[0] # ---------- PROMPT TEMPLATES ---------------------------------------------- SCENE_TEMPLATE = ''' You are a children's storyteller. Write **SCENE {scene_no}/3** of an age-5-to-10 bedtime story (≈ 150 words). **Category:** {category} **Child's idea:** "{idea}" 👉 *Work the idea into the first two sentences.* ### Story-Arc Requirements - **Scene 1** – introduce the main character and their WANT/PROBLEM. - **Scene 2** – raise the stakes; a challenge appears. - **Scene 3** – climax and satisfying resolution. No numbered choices. ### Style Rules 1. Use vivid language and **relevant emojis** (😀🐉🍪🌟🚀 …). 2. Keep sentences short and clear. 3. Leave a blank line between paragraphs. 4. **Scenes 1 & 2:** end with *exactly two* **bold** numbered choices ("1." & "2."). 5. **Scene 3:** wrap up the tale (no choices). Do **not** write "The end." before Scene 3. 6. Each scene should clearly advance the arc. Story so far: """{story_so_far}""" `last_choice` = "{last_choice}" If `last_choice` == "N/A" this is the opening scene, otherwise nod to the child's choice in one friendly sentence before continuing. ''' REVISION_TEMPLATE = ''' You previously wrote SCENE {scene_no}/3 … Rewrite the scene so it satisfies the feedback below. **Change at least two sentences visibly** and keep to the style rules (including **bold** choice text). Feedback: "{feedback}" Original scene: """{original_scene}""" ''' # ---------- LLM CORE ------------------------------------------------------- def _chat(prompt: str) -> str: resp = openai.chat.completions.create( model=MODEL, messages=[{"role": "user", "content": prompt}], temperature=TEMPERATURE, max_tokens=600, ) return resp.choices[0].message.content.strip() # ---------- TTS (TEXT-TO-SPEECH) ------------------------------------------ _client = OpenAI() # uses OPENAI_API_KEY env-var _audio_cache: dict[str, str] = {} # md5(clean_text) ➜ data-URL def _clean_for_tts(raw: str) -> str: """Remove markdown markers and numbered options; truncate at 4096 chars.""" no_md = re.sub(r"[*_`#🌟]", "", raw) no_opts = "\n".join( ln for ln in no_md.splitlines() if not ln.strip().startswith(("1.", "2.")) ) return no_opts[:4096] def _generate_audio(text: str) -> str: """Return a base-64 data-URL (audio/mp3) for Gradio's