import os import gradio as gr import google.generativeai as genai from gtts import gTTS import io from PIL import Image import httpx import base64 # Configure Google AI API def configure_google_ai(): # Hardcoded API key for testing (REPLACE WITH YOUR ACTUAL KEY) GOOGLE_API_KEY = "AIzaSyA8Xus9BrHPEzKB_t1yPjCDfn6cOPbX8XE" if not GOOGLE_API_KEY: raise ValueError("No API key found. Please provide a valid Google AI API key.") genai.configure(api_key=GOOGLE_API_KEY) # Image-to-text function using Gemini def img2txt(image): """Generate a description for the uploaded image using Gemini 1.5 Pro""" configure_google_ai() try: # Choose Gemini model model = genai.GenerativeModel('gemini-exp-1121') # Ensure image is in PIL format if not isinstance(image, Image.Image): image = Image.fromarray(image) # Convert image to bytes image_bytes = io.BytesIO() image.save(image_bytes, format='PNG') image_bytes.seek(0) # Create prompt and generate content prompt = "Provide a detailed, creative description of this image. Capture the key elements, mood, and potential narrative elements." response = model.generate_content( contents=[ { 'mime_type': 'image/png', 'data': image_bytes.getvalue() }, prompt ], generation_config=genai.types.GenerationConfig( max_output_tokens=200, temperature=0.7 ) ) return response.text.strip() except Exception as e: print(f"Error in image description: {e}") return "Error processing image. Please try again." # Text-to-story generation function using Gemini def txt2story(prompt, genre, setting, continent, tone, theme, conflict, twist, ending): """Generate a story using Gemini 1.5 Flash""" configure_google_ai() try: model = genai.GenerativeModel('gemini-1.5-flash') # Enhanced story generation prompt full_prompt = f"""You are an experienced and masterful storyteller tasked with crafting an immersive, complex narrative. Create an elaborate and compelling story based on the following detailed specifications: Story Framework: - Genre: {genre} - Setting: {setting} in {continent} - Narrative Tone: {tone} - Central Theme: {theme} - Primary Conflict: {conflict} - Narrative Twist: {twist} - Story Ending: {ending} ending Comprehensive Storytelling Guidelines: 1. Story Structure: - Develop a multi-layered narrative with rich character development - Create intricate plot progression with meaningful character arcs - Integrate deep psychological and emotional dimensions - Explore nuanced motivations and complex interpersonal dynamics 2. Narrative Depth: - Provide comprehensive background context - Develop multiple plot layers and subplots - Include detailed character histories and motivations - Demonstrate cause-and-effect relationships between events 3. Thematic Exploration: - Deeply explore the chosen theme of {theme} - Use symbolism and metaphorical elements - Connect character experiences to broader philosophical or existential questions - Demonstrate subtle and profound insights into human nature 4. Stylistic Elements: - Use vivid, evocative language - Create immersive sensory descriptions - Balance dialogue, internal monologue, and narrative exposition - Maintain consistent narrative voice 5. Emotional Complexity: - Portray nuanced emotional landscapes - Show character growth and transformation - Create moments of genuine emotional resonance - Balance tension, conflict, and moments of reflection Story Prompt Details: {prompt} Additional Context: Craft a narrative that transcends typical genre constraints. Aim for a story that is not just entertaining, but thought-provoking and emotionally impactful. The story should feel like a complete, self-contained narrative journey with depth, complexity, and meaningful resolution. Expected Outcome: - Comprehensive narrative arc - Fully developed characters - Profound thematic exploration - Engaging and immersive storytelling - Meaningful resolution that resonates with the story's core themes """ response = model.generate_content( contents=full_prompt, generation_config=genai.types.GenerationConfig( max_output_tokens=1000, temperature=1.7, top_p=0.9, # More diverse word selection top_k=40 # Broader vocabulary range ) ) return response.text.strip() except Exception as e: print(f"Error generating story: {e}") return "Error generating story. Please try again." # Text-to-speech function def txt2speech(text): """Convert text to speech and save to a file""" tts = gTTS(text=text, lang='en') audio_path = "story_audio.mp3" tts.save(audio_path) return audio_path # Main generation function def generate_story(image, genre, setting, continent, tone, theme, conflict, twist, ending): """Main function to generate story and audio from image""" # Ensure image is provided if image is None: return "", "", None # Generate image description image_description = img2txt(image) # Generate story story = txt2story( prompt=image_description, genre=genre, setting=setting, continent=continent, tone=tone, theme=theme, conflict=conflict, twist=twist, ending=ending ) # Generate audio audio = txt2speech(story) return image_description, story, audio # Gradio interface setup def create_gradio_app(): # Dropdown options genre_opts = ["Science Fiction", "Fantasy", "Mystery", "Romance"] setting_opts = ["Future", "Medieval times", "Modern day", "Alternate reality"] continent_opts = ["North America", "Europe", "Asia", "Africa", "Australia"] tone_opts = ["Serious", "Light-hearted", "Humorous", "Dark"] theme_opts = ["Self-discovery", "Redemption", "Love", "Justice"] conflict_opts = ["Person vs. Society", "Internal struggle", "Person vs. Nature", "Person vs. Person"] twist_opts = ["Plot twist", "Hidden identity", "Unexpected ally/enemy", "Time paradox"] ending_opts = ["Happy", "Bittersweet", "Open-ended", "Tragic"] # Create Gradio interface demo = gr.Interface( fn=generate_story, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Dropdown(genre_opts, label="Genre"), gr.Dropdown(setting_opts, label="Setting"), gr.Dropdown(continent_opts, label="Continent"), gr.Dropdown(tone_opts, label="Tone"), gr.Dropdown(theme_opts, label="Theme"), gr.Dropdown(conflict_opts, label="Conflict Type"), gr.Dropdown(twist_opts, label="Mystery/Twist"), gr.Dropdown(ending_opts, label="Ending") ], outputs=[ gr.Textbox(label="Image Description"), gr.Textbox(label="Generated Story"), gr.Audio(label="Story Audio") ], title="🎨 Image to Story Generator 📖", description="Upload an image and generate a unique story!" ) return demo # Launch the app if __name__ == "__main__": demo = create_gradio_app() demo.launch(debug=True)