File size: 7,528 Bytes
07d440e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217


import os
import gradio as gr
import google.generativeai as genai
from gtts import gTTS
import io
from PIL import Image
import httpx
import base64

# Configure Google AI API
def configure_google_ai():
    # Hardcoded API key for testing (REPLACE WITH YOUR ACTUAL KEY)
    GOOGLE_API_KEY = "AIzaSyA8Xus9BrHPEzKB_t1yPjCDfn6cOPbX8XE"

    if not GOOGLE_API_KEY:
        raise ValueError("No API key found. Please provide a valid Google AI API key.")

    genai.configure(api_key=GOOGLE_API_KEY)

# Image-to-text function using Gemini
def img2txt(image):
    """Generate a description for the uploaded image using Gemini 1.5 Pro"""
    configure_google_ai()

    try:
        # Choose Gemini model
        model = genai.GenerativeModel('gemini-exp-1121')

        # Ensure image is in PIL format
        if not isinstance(image, Image.Image):
            image = Image.fromarray(image)

        # Convert image to bytes
        image_bytes = io.BytesIO()
        image.save(image_bytes, format='PNG')
        image_bytes.seek(0)

        # Create prompt and generate content
        prompt = "Provide a detailed, creative description of this image. Capture the key elements, mood, and potential narrative elements."
        response = model.generate_content(
            contents=[
                {
                    'mime_type': 'image/png',
                    'data': image_bytes.getvalue()
                },
                prompt
            ],
            generation_config=genai.types.GenerationConfig(
                max_output_tokens=200,
                temperature=0.7
            )
        )

        return response.text.strip()

    except Exception as e:
        print(f"Error in image description: {e}")
        return "Error processing image. Please try again."

# Text-to-story generation function using Gemini
def txt2story(prompt, genre, setting, continent, tone, theme, conflict, twist, ending):
    """Generate a story using Gemini 1.5 Flash"""
    configure_google_ai()

    try:
        model = genai.GenerativeModel('gemini-1.5-flash')

        # Enhanced story generation prompt
        full_prompt = f"""You are an experienced and masterful storyteller tasked with crafting an immersive, complex narrative. Create an elaborate and compelling story based on the following detailed specifications:

Story Framework:
- Genre: {genre}
- Setting: {setting} in {continent}
- Narrative Tone: {tone}
- Central Theme: {theme}
- Primary Conflict: {conflict}
- Narrative Twist: {twist}
- Story Ending: {ending} ending

Comprehensive Storytelling Guidelines:
1. Story Structure:
   - Develop a multi-layered narrative with rich character development
   - Create intricate plot progression with meaningful character arcs
   - Integrate deep psychological and emotional dimensions
   - Explore nuanced motivations and complex interpersonal dynamics

2. Narrative Depth:
   - Provide comprehensive background context
   - Develop multiple plot layers and subplots
   - Include detailed character histories and motivations
   - Demonstrate cause-and-effect relationships between events

3. Thematic Exploration:
   - Deeply explore the chosen theme of {theme}
   - Use symbolism and metaphorical elements
   - Connect character experiences to broader philosophical or existential questions
   - Demonstrate subtle and profound insights into human nature

4. Stylistic Elements:
   - Use vivid, evocative language
   - Create immersive sensory descriptions
   - Balance dialogue, internal monologue, and narrative exposition
   - Maintain consistent narrative voice

5. Emotional Complexity:
   - Portray nuanced emotional landscapes
   - Show character growth and transformation
   - Create moments of genuine emotional resonance
   - Balance tension, conflict, and moments of reflection

Story Prompt Details: {prompt}

Additional Context: Craft a narrative that transcends typical genre constraints. Aim for a story that is not just entertaining, but thought-provoking and emotionally impactful. The story should feel like a complete, self-contained narrative journey with depth, complexity, and meaningful resolution.

Expected Outcome:
- Comprehensive narrative arc
- Fully developed characters
- Profound thematic exploration
- Engaging and immersive storytelling
- Meaningful resolution that resonates with the story's core themes
"""

        response = model.generate_content(
            contents=full_prompt,
            generation_config=genai.types.GenerationConfig(
                max_output_tokens=1000,
                temperature=1.7,
                top_p=0.9,  # More diverse word selection
              top_k=40  # Broader vocabulary range
            )
        )

        return response.text.strip()

    except Exception as e:
        print(f"Error generating story: {e}")
        return "Error generating story. Please try again."

# Text-to-speech function
def txt2speech(text):
    """Convert text to speech and save to a file"""
    tts = gTTS(text=text, lang='en')
    audio_path = "story_audio.mp3"
    tts.save(audio_path)
    return audio_path

# Main generation function
def generate_story(image, genre, setting, continent, tone, theme, conflict, twist, ending):
    """Main function to generate story and audio from image"""
    # Ensure image is provided
    if image is None:
        return "", "", None

    # Generate image description
    image_description = img2txt(image)

    # Generate story
    story = txt2story(
        prompt=image_description,
        genre=genre,
        setting=setting,
        continent=continent,
        tone=tone,
        theme=theme,
        conflict=conflict,
        twist=twist,
        ending=ending
    )

    # Generate audio
    audio = txt2speech(story)

    return image_description, story, audio

# Gradio interface setup
def create_gradio_app():
    # Dropdown options
    genre_opts = ["Science Fiction", "Fantasy", "Mystery", "Romance"]
    setting_opts = ["Future", "Medieval times", "Modern day", "Alternate reality"]
    continent_opts = ["North America", "Europe", "Asia", "Africa", "Australia"]
    tone_opts = ["Serious", "Light-hearted", "Humorous", "Dark"]
    theme_opts = ["Self-discovery", "Redemption", "Love", "Justice"]
    conflict_opts = ["Person vs. Society", "Internal struggle", "Person vs. Nature", "Person vs. Person"]
    twist_opts = ["Plot twist", "Hidden identity", "Unexpected ally/enemy", "Time paradox"]
    ending_opts = ["Happy", "Bittersweet", "Open-ended", "Tragic"]

    # Create Gradio interface
    demo = gr.Interface(
        fn=generate_story,
        inputs=[
            gr.Image(type="pil", label="Upload Image"),
            gr.Dropdown(genre_opts, label="Genre"),
            gr.Dropdown(setting_opts, label="Setting"),
            gr.Dropdown(continent_opts, label="Continent"),
            gr.Dropdown(tone_opts, label="Tone"),
            gr.Dropdown(theme_opts, label="Theme"),
            gr.Dropdown(conflict_opts, label="Conflict Type"),
            gr.Dropdown(twist_opts, label="Mystery/Twist"),
            gr.Dropdown(ending_opts, label="Ending")
        ],
        outputs=[
            gr.Textbox(label="Image Description"),
            gr.Textbox(label="Generated Story"),
            gr.Audio(label="Story Audio")
        ],
        title="🎨 Image to Story Generator 📖",
        description="Upload an image and generate a unique story!"
    )

    return demo

# Launch the app
if __name__ == "__main__":
    demo = create_gradio_app()
    demo.launch(debug=True)