Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import google.generativeai as genai | |
from gtts import gTTS | |
import io | |
from PIL import Image | |
import httpx | |
import base64 | |
# Configure Google AI API | |
def configure_google_ai(): | |
# Hardcoded API key for testing (REPLACE WITH YOUR ACTUAL KEY) | |
GOOGLE_API_KEY = "AIzaSyA8Xus9BrHPEzKB_t1yPjCDfn6cOPbX8XE" | |
if not GOOGLE_API_KEY: | |
raise ValueError("No API key found. Please provide a valid Google AI API key.") | |
genai.configure(api_key=GOOGLE_API_KEY) | |
# Image-to-text function using Gemini | |
def img2txt(image): | |
"""Generate a description for the uploaded image using Gemini 1.5 Pro""" | |
configure_google_ai() | |
try: | |
# Choose Gemini model | |
model = genai.GenerativeModel('gemini-exp-1121') | |
# Ensure image is in PIL format | |
if not isinstance(image, Image.Image): | |
image = Image.fromarray(image) | |
# Convert image to bytes | |
image_bytes = io.BytesIO() | |
image.save(image_bytes, format='PNG') | |
image_bytes.seek(0) | |
# Create prompt and generate content | |
prompt = "Provide a detailed, creative description of this image. Capture the key elements, mood, and potential narrative elements." | |
response = model.generate_content( | |
contents=[ | |
{ | |
'mime_type': 'image/png', | |
'data': image_bytes.getvalue() | |
}, | |
prompt | |
], | |
generation_config=genai.types.GenerationConfig( | |
max_output_tokens=200, | |
temperature=0.7 | |
) | |
) | |
return response.text.strip() | |
except Exception as e: | |
print(f"Error in image description: {e}") | |
return "Error processing image. Please try again." | |
# Text-to-story generation function using Gemini | |
def txt2story(prompt, genre, setting, continent, tone, theme, conflict, twist, ending): | |
"""Generate a story using Gemini 1.5 Flash""" | |
configure_google_ai() | |
try: | |
model = genai.GenerativeModel('gemini-1.5-flash') | |
# Enhanced story generation prompt | |
full_prompt = f"""You are an experienced and masterful storyteller tasked with crafting an immersive, complex narrative. Create an elaborate and compelling story based on the following detailed specifications: | |
Story Framework: | |
- Genre: {genre} | |
- Setting: {setting} in {continent} | |
- Narrative Tone: {tone} | |
- Central Theme: {theme} | |
- Primary Conflict: {conflict} | |
- Narrative Twist: {twist} | |
- Story Ending: {ending} ending | |
Comprehensive Storytelling Guidelines: | |
1. Story Structure: | |
- Develop a multi-layered narrative with rich character development | |
- Create intricate plot progression with meaningful character arcs | |
- Integrate deep psychological and emotional dimensions | |
- Explore nuanced motivations and complex interpersonal dynamics | |
2. Narrative Depth: | |
- Provide comprehensive background context | |
- Develop multiple plot layers and subplots | |
- Include detailed character histories and motivations | |
- Demonstrate cause-and-effect relationships between events | |
3. Thematic Exploration: | |
- Deeply explore the chosen theme of {theme} | |
- Use symbolism and metaphorical elements | |
- Connect character experiences to broader philosophical or existential questions | |
- Demonstrate subtle and profound insights into human nature | |
4. Stylistic Elements: | |
- Use vivid, evocative language | |
- Create immersive sensory descriptions | |
- Balance dialogue, internal monologue, and narrative exposition | |
- Maintain consistent narrative voice | |
5. Emotional Complexity: | |
- Portray nuanced emotional landscapes | |
- Show character growth and transformation | |
- Create moments of genuine emotional resonance | |
- Balance tension, conflict, and moments of reflection | |
Story Prompt Details: {prompt} | |
Additional Context: Craft a narrative that transcends typical genre constraints. Aim for a story that is not just entertaining, but thought-provoking and emotionally impactful. The story should feel like a complete, self-contained narrative journey with depth, complexity, and meaningful resolution. | |
Expected Outcome: | |
- Comprehensive narrative arc | |
- Fully developed characters | |
- Profound thematic exploration | |
- Engaging and immersive storytelling | |
- Meaningful resolution that resonates with the story's core themes | |
""" | |
response = model.generate_content( | |
contents=full_prompt, | |
generation_config=genai.types.GenerationConfig( | |
max_output_tokens=1000, | |
temperature=1.7, | |
top_p=0.9, # More diverse word selection | |
top_k=40 # Broader vocabulary range | |
) | |
) | |
return response.text.strip() | |
except Exception as e: | |
print(f"Error generating story: {e}") | |
return "Error generating story. Please try again." | |
# Text-to-speech function | |
def txt2speech(text): | |
"""Convert text to speech and save to a file""" | |
tts = gTTS(text=text, lang='en') | |
audio_path = "story_audio.mp3" | |
tts.save(audio_path) | |
return audio_path | |
# Main generation function | |
def generate_story(image, genre, setting, continent, tone, theme, conflict, twist, ending): | |
"""Main function to generate story and audio from image""" | |
# Ensure image is provided | |
if image is None: | |
return "", "", None | |
# Generate image description | |
image_description = img2txt(image) | |
# Generate story | |
story = txt2story( | |
prompt=image_description, | |
genre=genre, | |
setting=setting, | |
continent=continent, | |
tone=tone, | |
theme=theme, | |
conflict=conflict, | |
twist=twist, | |
ending=ending | |
) | |
# Generate audio | |
audio = txt2speech(story) | |
return image_description, story, audio | |
# Gradio interface setup | |
def create_gradio_app(): | |
# Dropdown options | |
genre_opts = ["Science Fiction", "Fantasy", "Mystery", "Romance"] | |
setting_opts = ["Future", "Medieval times", "Modern day", "Alternate reality"] | |
continent_opts = ["North America", "Europe", "Asia", "Africa", "Australia"] | |
tone_opts = ["Serious", "Light-hearted", "Humorous", "Dark"] | |
theme_opts = ["Self-discovery", "Redemption", "Love", "Justice"] | |
conflict_opts = ["Person vs. Society", "Internal struggle", "Person vs. Nature", "Person vs. Person"] | |
twist_opts = ["Plot twist", "Hidden identity", "Unexpected ally/enemy", "Time paradox"] | |
ending_opts = ["Happy", "Bittersweet", "Open-ended", "Tragic"] | |
# Create Gradio interface | |
demo = gr.Interface( | |
fn=generate_story, | |
inputs=[ | |
gr.Image(type="pil", label="Upload Image"), | |
gr.Dropdown(genre_opts, label="Genre"), | |
gr.Dropdown(setting_opts, label="Setting"), | |
gr.Dropdown(continent_opts, label="Continent"), | |
gr.Dropdown(tone_opts, label="Tone"), | |
gr.Dropdown(theme_opts, label="Theme"), | |
gr.Dropdown(conflict_opts, label="Conflict Type"), | |
gr.Dropdown(twist_opts, label="Mystery/Twist"), | |
gr.Dropdown(ending_opts, label="Ending") | |
], | |
outputs=[ | |
gr.Textbox(label="Image Description"), | |
gr.Textbox(label="Generated Story"), | |
gr.Audio(label="Story Audio") | |
], | |
title="🎨 Image to Story Generator 📖", | |
description="Upload an image and generate a unique story!" | |
) | |
return demo | |
# Launch the app | |
if __name__ == "__main__": | |
demo = create_gradio_app() | |
demo.launch(debug=True) |