Spaces:
Sleeping
Sleeping
File size: 7,528 Bytes
07d440e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
import os
import gradio as gr
import google.generativeai as genai
from gtts import gTTS
import io
from PIL import Image
import httpx
import base64
# Configure Google AI API
def configure_google_ai():
# Hardcoded API key for testing (REPLACE WITH YOUR ACTUAL KEY)
GOOGLE_API_KEY = "AIzaSyA8Xus9BrHPEzKB_t1yPjCDfn6cOPbX8XE"
if not GOOGLE_API_KEY:
raise ValueError("No API key found. Please provide a valid Google AI API key.")
genai.configure(api_key=GOOGLE_API_KEY)
# Image-to-text function using Gemini
def img2txt(image):
"""Generate a description for the uploaded image using Gemini 1.5 Pro"""
configure_google_ai()
try:
# Choose Gemini model
model = genai.GenerativeModel('gemini-exp-1121')
# Ensure image is in PIL format
if not isinstance(image, Image.Image):
image = Image.fromarray(image)
# Convert image to bytes
image_bytes = io.BytesIO()
image.save(image_bytes, format='PNG')
image_bytes.seek(0)
# Create prompt and generate content
prompt = "Provide a detailed, creative description of this image. Capture the key elements, mood, and potential narrative elements."
response = model.generate_content(
contents=[
{
'mime_type': 'image/png',
'data': image_bytes.getvalue()
},
prompt
],
generation_config=genai.types.GenerationConfig(
max_output_tokens=200,
temperature=0.7
)
)
return response.text.strip()
except Exception as e:
print(f"Error in image description: {e}")
return "Error processing image. Please try again."
# Text-to-story generation function using Gemini
def txt2story(prompt, genre, setting, continent, tone, theme, conflict, twist, ending):
"""Generate a story using Gemini 1.5 Flash"""
configure_google_ai()
try:
model = genai.GenerativeModel('gemini-1.5-flash')
# Enhanced story generation prompt
full_prompt = f"""You are an experienced and masterful storyteller tasked with crafting an immersive, complex narrative. Create an elaborate and compelling story based on the following detailed specifications:
Story Framework:
- Genre: {genre}
- Setting: {setting} in {continent}
- Narrative Tone: {tone}
- Central Theme: {theme}
- Primary Conflict: {conflict}
- Narrative Twist: {twist}
- Story Ending: {ending} ending
Comprehensive Storytelling Guidelines:
1. Story Structure:
- Develop a multi-layered narrative with rich character development
- Create intricate plot progression with meaningful character arcs
- Integrate deep psychological and emotional dimensions
- Explore nuanced motivations and complex interpersonal dynamics
2. Narrative Depth:
- Provide comprehensive background context
- Develop multiple plot layers and subplots
- Include detailed character histories and motivations
- Demonstrate cause-and-effect relationships between events
3. Thematic Exploration:
- Deeply explore the chosen theme of {theme}
- Use symbolism and metaphorical elements
- Connect character experiences to broader philosophical or existential questions
- Demonstrate subtle and profound insights into human nature
4. Stylistic Elements:
- Use vivid, evocative language
- Create immersive sensory descriptions
- Balance dialogue, internal monologue, and narrative exposition
- Maintain consistent narrative voice
5. Emotional Complexity:
- Portray nuanced emotional landscapes
- Show character growth and transformation
- Create moments of genuine emotional resonance
- Balance tension, conflict, and moments of reflection
Story Prompt Details: {prompt}
Additional Context: Craft a narrative that transcends typical genre constraints. Aim for a story that is not just entertaining, but thought-provoking and emotionally impactful. The story should feel like a complete, self-contained narrative journey with depth, complexity, and meaningful resolution.
Expected Outcome:
- Comprehensive narrative arc
- Fully developed characters
- Profound thematic exploration
- Engaging and immersive storytelling
- Meaningful resolution that resonates with the story's core themes
"""
response = model.generate_content(
contents=full_prompt,
generation_config=genai.types.GenerationConfig(
max_output_tokens=1000,
temperature=1.7,
top_p=0.9, # More diverse word selection
top_k=40 # Broader vocabulary range
)
)
return response.text.strip()
except Exception as e:
print(f"Error generating story: {e}")
return "Error generating story. Please try again."
# Text-to-speech function
def txt2speech(text):
"""Convert text to speech and save to a file"""
tts = gTTS(text=text, lang='en')
audio_path = "story_audio.mp3"
tts.save(audio_path)
return audio_path
# Main generation function
def generate_story(image, genre, setting, continent, tone, theme, conflict, twist, ending):
"""Main function to generate story and audio from image"""
# Ensure image is provided
if image is None:
return "", "", None
# Generate image description
image_description = img2txt(image)
# Generate story
story = txt2story(
prompt=image_description,
genre=genre,
setting=setting,
continent=continent,
tone=tone,
theme=theme,
conflict=conflict,
twist=twist,
ending=ending
)
# Generate audio
audio = txt2speech(story)
return image_description, story, audio
# Gradio interface setup
def create_gradio_app():
# Dropdown options
genre_opts = ["Science Fiction", "Fantasy", "Mystery", "Romance"]
setting_opts = ["Future", "Medieval times", "Modern day", "Alternate reality"]
continent_opts = ["North America", "Europe", "Asia", "Africa", "Australia"]
tone_opts = ["Serious", "Light-hearted", "Humorous", "Dark"]
theme_opts = ["Self-discovery", "Redemption", "Love", "Justice"]
conflict_opts = ["Person vs. Society", "Internal struggle", "Person vs. Nature", "Person vs. Person"]
twist_opts = ["Plot twist", "Hidden identity", "Unexpected ally/enemy", "Time paradox"]
ending_opts = ["Happy", "Bittersweet", "Open-ended", "Tragic"]
# Create Gradio interface
demo = gr.Interface(
fn=generate_story,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Dropdown(genre_opts, label="Genre"),
gr.Dropdown(setting_opts, label="Setting"),
gr.Dropdown(continent_opts, label="Continent"),
gr.Dropdown(tone_opts, label="Tone"),
gr.Dropdown(theme_opts, label="Theme"),
gr.Dropdown(conflict_opts, label="Conflict Type"),
gr.Dropdown(twist_opts, label="Mystery/Twist"),
gr.Dropdown(ending_opts, label="Ending")
],
outputs=[
gr.Textbox(label="Image Description"),
gr.Textbox(label="Generated Story"),
gr.Audio(label="Story Audio")
],
title="🎨 Image to Story Generator 📖",
description="Upload an image and generate a unique story!"
)
return demo
# Launch the app
if __name__ == "__main__":
demo = create_gradio_app()
demo.launch(debug=True) |