# core/gemini_handler.py import google.generativeai as genai import json import re import logging # Added logging logger = logging.getLogger(__name__) # Added logger class GeminiHandler: def __init__(self, api_key): genai.configure(api_key=api_key) # For complex JSON and instruction following, 'gemini-1.0-pro' or 'gemini-1.5-pro-latest' might be more robust. # 'gemini-1.5-flash-latest' is faster and cheaper but might sometimes struggle with very complex formats. self.model_name = 'gemini-1.5-flash-latest' # or 'gemini-1.0-pro' or 'gemini-1.5-pro-latest' self.model = genai.GenerativeModel(self.model_name) logger.info(f"GeminiHandler initialized with model: {self.model_name}") def _clean_json_response(self, text_response): """ Attempts to extract a valid JSON string from Gemini's text response. Prioritizes content within ```json ... ``` blocks. """ if not text_response: return "" # Attempt 1: Find JSON within markdown code blocks match = re.search(r"```json\s*([\s\S]*?)\s*```", text_response, re.DOTALL) if match: json_str = match.group(1).strip() logger.debug("Found JSON in markdown code block.") return json_str # Assume this is the intended JSON # Attempt 2: If no markdown block, try to find the start of a JSON list or object directly # This is more heuristic and might pick up non-JSON if the model doesn't use code blocks. json_str = text_response.strip() first_char = next((char for char in json_str if char in ['[', '{']), None) if first_char == '[': # Find the first '[' and try to match until the last ']' start_index = json_str.find('[') end_index = json_str.rfind(']') if start_index != -1 and end_index != -1 and end_index > start_index: json_str = json_str[start_index : end_index+1] logger.debug("Extracted potential JSON list directly.") return json_str.strip() elif first_char == '{': # Find the first '{' and try to match until the last '}' start_index = json_str.find('{') end_index = json_str.rfind('}') if start_index != -1 and end_index != -1 and end_index > start_index: json_str = json_str[start_index : end_index+1] logger.debug("Extracted potential JSON object directly.") return json_str.strip() logger.warning("Could not clearly identify JSON structure in the response. Returning raw attempt.") return text_response # Return original if no clear JSON structure found by heuristics def _execute_gemini_call(self, prompt_text, expect_json=False): raw_text_content = "" # Initialize to ensure it's defined for logging cleaned_json_attempt = "" # Initialize try: logger.info(f"Executing Gemini call (expect_json={expect_json}). Prompt starts with: {prompt_text[:150]}...") # Safety settings can be adjusted if needed, though defaults are usually fine. # generation_config = genai.types.GenerationConfig( # # temperature=0.7, # Example: Adjust creativity # ) response = self.model.generate_content( prompt_text, # generation_config=generation_config ) # Check for safety ratings or blocks first (if applicable to your SDK version and use case) # if response.prompt_feedback and response.prompt_feedback.block_reason: # logger.error(f"Gemini call blocked. Reason: {response.prompt_feedback.block_reason_message}") # raise Exception(f"Gemini call blocked: {response.prompt_feedback.block_reason_message}") # if not response.candidates: # logger.error("Gemini call returned no candidates. Check prompt or safety settings.") # raise Exception("Gemini call returned no candidates.") raw_text_content = response.text # Assuming .text gives the full string logger.debug(f"Gemini raw response text (first 300 chars): {raw_text_content[:300]}") if expect_json: cleaned_json_attempt = self._clean_json_response(raw_text_content) if not cleaned_json_attempt: # If cleaning returned empty logger.error("JSON cleaning resulted in an empty string.") raise json.JSONDecodeError("Cleaned JSON string is empty", "", 0) logger.debug(f"Attempting to parse cleaned JSON (first 300 chars): {cleaned_json_attempt[:300]}") parsed_json = json.loads(cleaned_json_attempt) logger.info("Gemini call successful, JSON parsed.") return parsed_json else: logger.info("Gemini call successful, returning text.") return raw_text_content.strip() except json.JSONDecodeError as e: logger.error(f"JSONDecodeError: {e}. Failed to parse JSON from Gemini response.") logger.error(f"--- Problematic Gemini Raw Response ---\n{raw_text_content}\n--- End Raw Response ---") logger.error(f"--- Cleaned JSON Attempt ---\n{cleaned_json_attempt}\n--- End Cleaned Attempt ---") raise # Re-raise for the caller to handle (e.g., show error in UI) except AttributeError as ae: # Handles cases where `response.text` might not exist if call failed early logger.error(f"AttributeError during Gemini call processing: {ae}. Likely an issue with the response object structure.", exc_info=True) raise Exception(f"Gemini API response structure error: {ae}") except Exception as e: # This catches other errors from genai.GenerativeModel.generate_content() # e.g., google.api_core.exceptions.PermissionDenied, google.api_core.exceptions.ResourceExhausted logger.error(f"General error during Gemini API call: {type(e).__name__} - {e}", exc_info=True) logger.error(f"--- Problematic Gemini Raw Response (if available) ---\n{raw_text_content}\n--- End Raw Response ---") raise # Re-raise def generate_story_breakdown(self, prompt_text): """ Generates the full cinematic treatment (list of scene JSON objects). """ return self._execute_gemini_call(prompt_text, expect_json=True) def generate_image_prompt(self, prompt_text): """ Generates or refines a DALL-E prompt string (expects text, not JSON). Used by `create_visual_regeneration_prompt` from prompt_engineering. """ return self._execute_gemini_call(prompt_text, expect_json=False) def regenerate_scene_script_details(self, prompt_text): """ Regenerates the JSON object for a single scene based on feedback. """ return self._execute_gemini_call(prompt_text, expect_json=True) # Renamed for clarity, as it refines a DALL-E prompt string based on feedback. def refine_image_prompt_from_feedback(self, prompt_text): """ Refines an existing DALL-E prompt string based on user feedback. Expects Gemini to return a new string (the refined prompt). This method is called by app.py, which uses create_visual_regeneration_prompt. """ return self._execute_gemini_call(prompt_text, expect_json=False) # You might add a new method here if you want Gemini to help construct # the text-to-video prompt specifically, though your current `construct_text_to_video_prompt` # in prompt_engineering.py does this without a Gemini call. # If you did want Gemini to craft it: # def generate_text_to_video_prompt_string(self, prompt_text_for_gemini): # """ # Asks Gemini to craft a detailed text-to-video prompt string. # """ # return self._execute_gemini_call(prompt_text_for_gemini, expect_json=False)