Spaces:

nihalaninihal
/

storygen

Sleeping

File size: 7,528 Bytes

07d440e



import os
import gradio as gr
import google.generativeai as genai
from gtts import gTTS
import io
from PIL import Image
import httpx
import base64

# Configure Google AI API
def configure_google_ai():
    # Hardcoded API key for testing (REPLACE WITH YOUR ACTUAL KEY)
    GOOGLE_API_KEY = "AIzaSyA8Xus9BrHPEzKB_t1yPjCDfn6cOPbX8XE"

    if not GOOGLE_API_KEY:
        raise ValueError("No API key found. Please provide a valid Google AI API key.")

    genai.configure(api_key=GOOGLE_API_KEY)

# Image-to-text function using Gemini
def img2txt(image):
    """Generate a description for the uploaded image using Gemini 1.5 Pro"""
    configure_google_ai()

    try:
        # Choose Gemini model
        model = genai.GenerativeModel('gemini-exp-1121')

        # Ensure image is in PIL format
        if not isinstance(image, Image.Image):
            image = Image.fromarray(image)

        # Convert image to bytes
        image_bytes = io.BytesIO()
        image.save(image_bytes, format='PNG')
        image_bytes.seek(0)

        # Create prompt and generate content
        prompt = "Provide a detailed, creative description of this image. Capture the key elements, mood, and potential narrative elements."
        response = model.generate_content(
            contents=[
                {
                    'mime_type': 'image/png',
                    'data': image_bytes.getvalue()
                },
                prompt
            ],
            generation_config=genai.types.GenerationConfig(
                max_output_tokens=200,
                temperature=0.7
            )
        )

        return response.text.strip()

    except Exception as e:
        print(f"Error in image description: {e}")
        return "Error processing image. Please try again."

# Text-to-story generation function using Gemini
def txt2story(prompt, genre, setting, continent, tone, theme, conflict, twist, ending):
    """Generate a story using Gemini 1.5 Flash"""
    configure_google_ai()

    try:
        model = genai.GenerativeModel('gemini-1.5-flash')

        # Enhanced story generation prompt
        full_prompt = f"""You are an experienced and masterful storyteller tasked with crafting an immersive, complex narrative. Create an elaborate and compelling story based on the following detailed specifications:

Story Framework:
- Genre: {genre}
- Setting: {setting} in {continent}
- Narrative Tone: {tone}
- Central Theme: {theme}
- Primary Conflict: {conflict}
- Narrative Twist: {twist}
- Story Ending: {ending} ending

Comprehensive Storytelling Guidelines:
1. Story Structure:
   - Develop a multi-layered narrative with rich character development
   - Create intricate plot progression with meaningful character arcs
   - Integrate deep psychological and emotional dimensions
   - Explore nuanced motivations and complex interpersonal dynamics

2. Narrative Depth:
   - Provide comprehensive background context
   - Develop multiple plot layers and subplots
   - Include detailed character histories and motivations
   - Demonstrate cause-and-effect relationships between events

3. Thematic Exploration:
   - Deeply explore the chosen theme of {theme}
   - Use symbolism and metaphorical elements
   - Connect character experiences to broader philosophical or existential questions
   - Demonstrate subtle and profound insights into human nature

4. Stylistic Elements:
   - Use vivid, evocative language
   - Create immersive sensory descriptions
   - Balance dialogue, internal monologue, and narrative exposition
   - Maintain consistent narrative voice

5. Emotional Complexity:
   - Portray nuanced emotional landscapes
   - Show character growth and transformation
   - Create moments of genuine emotional resonance
   - Balance tension, conflict, and moments of reflection

Story Prompt Details: {prompt}

Additional Context: Craft a narrative that transcends typical genre constraints. Aim for a story that is not just entertaining, but thought-provoking and emotionally impactful. The story should feel like a complete, self-contained narrative journey with depth, complexity, and meaningful resolution.

Expected Outcome:
- Comprehensive narrative arc
- Fully developed characters
- Profound thematic exploration
- Engaging and immersive storytelling
- Meaningful resolution that resonates with the story's core themes
"""

        response = model.generate_content(
            contents=full_prompt,
            generation_config=genai.types.GenerationConfig(
                max_output_tokens=1000,
                temperature=1.7,
                top_p=0.9,  # More diverse word selection
              top_k=40  # Broader vocabulary range
            )
        )

        return response.text.strip()

    except Exception as e:
        print(f"Error generating story: {e}")
        return "Error generating story. Please try again."

# Text-to-speech function
def txt2speech(text):
    """Convert text to speech and save to a file"""
    tts = gTTS(text=text, lang='en')
    audio_path = "story_audio.mp3"
    tts.save(audio_path)
    return audio_path

# Main generation function
def generate_story(image, genre, setting, continent, tone, theme, conflict, twist, ending):
    """Main function to generate story and audio from image"""
    # Ensure image is provided
    if image is None:
        return "", "", None

    # Generate image description
    image_description = img2txt(image)

    # Generate story
    story = txt2story(
        prompt=image_description,
        genre=genre,
        setting=setting,
        continent=continent,
        tone=tone,
        theme=theme,
        conflict=conflict,
        twist=twist,
        ending=ending
    )

    # Generate audio
    audio = txt2speech(story)

    return image_description, story, audio

# Gradio interface setup
def create_gradio_app():
    # Dropdown options
    genre_opts = ["Science Fiction", "Fantasy", "Mystery", "Romance"]
    setting_opts = ["Future", "Medieval times", "Modern day", "Alternate reality"]
    continent_opts = ["North America", "Europe", "Asia", "Africa", "Australia"]
    tone_opts = ["Serious", "Light-hearted", "Humorous", "Dark"]
    theme_opts = ["Self-discovery", "Redemption", "Love", "Justice"]
    conflict_opts = ["Person vs. Society", "Internal struggle", "Person vs. Nature", "Person vs. Person"]
    twist_opts = ["Plot twist", "Hidden identity", "Unexpected ally/enemy", "Time paradox"]
    ending_opts = ["Happy", "Bittersweet", "Open-ended", "Tragic"]

    # Create Gradio interface
    demo = gr.Interface(
        fn=generate_story,
        inputs=[
            gr.Image(type="pil", label="Upload Image"),
            gr.Dropdown(genre_opts, label="Genre"),
            gr.Dropdown(setting_opts, label="Setting"),
            gr.Dropdown(continent_opts, label="Continent"),
            gr.Dropdown(tone_opts, label="Tone"),
            gr.Dropdown(theme_opts, label="Theme"),
            gr.Dropdown(conflict_opts, label="Conflict Type"),
            gr.Dropdown(twist_opts, label="Mystery/Twist"),
            gr.Dropdown(ending_opts, label="Ending")
        ],
        outputs=[
            gr.Textbox(label="Image Description"),
            gr.Textbox(label="Generated Story"),
            gr.Audio(label="Story Audio")
        ],
        title="🎨 Image to Story Generator 📖",
        description="Upload an image and generate a unique story!"
    )

    return demo

# Launch the app
if __name__ == "__main__":
    demo = create_gradio_app()
    demo.launch(debug=True)