storygen / app.py
nihalaninihal's picture
Create app.py
07d440e verified
import os
import gradio as gr
import google.generativeai as genai
from gtts import gTTS
import io
from PIL import Image
import httpx
import base64
# Configure Google AI API
def configure_google_ai():
# Hardcoded API key for testing (REPLACE WITH YOUR ACTUAL KEY)
GOOGLE_API_KEY = "AIzaSyA8Xus9BrHPEzKB_t1yPjCDfn6cOPbX8XE"
if not GOOGLE_API_KEY:
raise ValueError("No API key found. Please provide a valid Google AI API key.")
genai.configure(api_key=GOOGLE_API_KEY)
# Image-to-text function using Gemini
def img2txt(image):
"""Generate a description for the uploaded image using Gemini 1.5 Pro"""
configure_google_ai()
try:
# Choose Gemini model
model = genai.GenerativeModel('gemini-exp-1121')
# Ensure image is in PIL format
if not isinstance(image, Image.Image):
image = Image.fromarray(image)
# Convert image to bytes
image_bytes = io.BytesIO()
image.save(image_bytes, format='PNG')
image_bytes.seek(0)
# Create prompt and generate content
prompt = "Provide a detailed, creative description of this image. Capture the key elements, mood, and potential narrative elements."
response = model.generate_content(
contents=[
{
'mime_type': 'image/png',
'data': image_bytes.getvalue()
},
prompt
],
generation_config=genai.types.GenerationConfig(
max_output_tokens=200,
temperature=0.7
)
)
return response.text.strip()
except Exception as e:
print(f"Error in image description: {e}")
return "Error processing image. Please try again."
# Text-to-story generation function using Gemini
def txt2story(prompt, genre, setting, continent, tone, theme, conflict, twist, ending):
"""Generate a story using Gemini 1.5 Flash"""
configure_google_ai()
try:
model = genai.GenerativeModel('gemini-1.5-flash')
# Enhanced story generation prompt
full_prompt = f"""You are an experienced and masterful storyteller tasked with crafting an immersive, complex narrative. Create an elaborate and compelling story based on the following detailed specifications:
Story Framework:
- Genre: {genre}
- Setting: {setting} in {continent}
- Narrative Tone: {tone}
- Central Theme: {theme}
- Primary Conflict: {conflict}
- Narrative Twist: {twist}
- Story Ending: {ending} ending
Comprehensive Storytelling Guidelines:
1. Story Structure:
- Develop a multi-layered narrative with rich character development
- Create intricate plot progression with meaningful character arcs
- Integrate deep psychological and emotional dimensions
- Explore nuanced motivations and complex interpersonal dynamics
2. Narrative Depth:
- Provide comprehensive background context
- Develop multiple plot layers and subplots
- Include detailed character histories and motivations
- Demonstrate cause-and-effect relationships between events
3. Thematic Exploration:
- Deeply explore the chosen theme of {theme}
- Use symbolism and metaphorical elements
- Connect character experiences to broader philosophical or existential questions
- Demonstrate subtle and profound insights into human nature
4. Stylistic Elements:
- Use vivid, evocative language
- Create immersive sensory descriptions
- Balance dialogue, internal monologue, and narrative exposition
- Maintain consistent narrative voice
5. Emotional Complexity:
- Portray nuanced emotional landscapes
- Show character growth and transformation
- Create moments of genuine emotional resonance
- Balance tension, conflict, and moments of reflection
Story Prompt Details: {prompt}
Additional Context: Craft a narrative that transcends typical genre constraints. Aim for a story that is not just entertaining, but thought-provoking and emotionally impactful. The story should feel like a complete, self-contained narrative journey with depth, complexity, and meaningful resolution.
Expected Outcome:
- Comprehensive narrative arc
- Fully developed characters
- Profound thematic exploration
- Engaging and immersive storytelling
- Meaningful resolution that resonates with the story's core themes
"""
response = model.generate_content(
contents=full_prompt,
generation_config=genai.types.GenerationConfig(
max_output_tokens=1000,
temperature=1.7,
top_p=0.9, # More diverse word selection
top_k=40 # Broader vocabulary range
)
)
return response.text.strip()
except Exception as e:
print(f"Error generating story: {e}")
return "Error generating story. Please try again."
# Text-to-speech function
def txt2speech(text):
"""Convert text to speech and save to a file"""
tts = gTTS(text=text, lang='en')
audio_path = "story_audio.mp3"
tts.save(audio_path)
return audio_path
# Main generation function
def generate_story(image, genre, setting, continent, tone, theme, conflict, twist, ending):
"""Main function to generate story and audio from image"""
# Ensure image is provided
if image is None:
return "", "", None
# Generate image description
image_description = img2txt(image)
# Generate story
story = txt2story(
prompt=image_description,
genre=genre,
setting=setting,
continent=continent,
tone=tone,
theme=theme,
conflict=conflict,
twist=twist,
ending=ending
)
# Generate audio
audio = txt2speech(story)
return image_description, story, audio
# Gradio interface setup
def create_gradio_app():
# Dropdown options
genre_opts = ["Science Fiction", "Fantasy", "Mystery", "Romance"]
setting_opts = ["Future", "Medieval times", "Modern day", "Alternate reality"]
continent_opts = ["North America", "Europe", "Asia", "Africa", "Australia"]
tone_opts = ["Serious", "Light-hearted", "Humorous", "Dark"]
theme_opts = ["Self-discovery", "Redemption", "Love", "Justice"]
conflict_opts = ["Person vs. Society", "Internal struggle", "Person vs. Nature", "Person vs. Person"]
twist_opts = ["Plot twist", "Hidden identity", "Unexpected ally/enemy", "Time paradox"]
ending_opts = ["Happy", "Bittersweet", "Open-ended", "Tragic"]
# Create Gradio interface
demo = gr.Interface(
fn=generate_story,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Dropdown(genre_opts, label="Genre"),
gr.Dropdown(setting_opts, label="Setting"),
gr.Dropdown(continent_opts, label="Continent"),
gr.Dropdown(tone_opts, label="Tone"),
gr.Dropdown(theme_opts, label="Theme"),
gr.Dropdown(conflict_opts, label="Conflict Type"),
gr.Dropdown(twist_opts, label="Mystery/Twist"),
gr.Dropdown(ending_opts, label="Ending")
],
outputs=[
gr.Textbox(label="Image Description"),
gr.Textbox(label="Generated Story"),
gr.Audio(label="Story Audio")
],
title="🎨 Image to Story Generator 📖",
description="Upload an image and generate a unique story!"
)
return demo
# Launch the app
if __name__ == "__main__":
demo = create_gradio_app()
demo.launch(debug=True)