File size: 8,112 Bytes
db99ae5
 
 
4afbb07
3539a49
 
 
db99ae5
 
 
 
3539a49
 
 
 
 
db99ae5
 
3539a49
 
 
 
 
 
 
 
4afbb07
db99ae5
4afbb07
3539a49
 
4afbb07
3539a49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db99ae5
4afbb07
3539a49
 
db99ae5
3539a49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4afbb07
3539a49
 
 
 
 
 
 
 
 
 
 
 
 
4afbb07
3539a49
 
 
 
 
 
 
db99ae5
3539a49
 
 
 
4afbb07
 
 
3539a49
 
 
4afbb07
 
3539a49
 
 
 
 
4afbb07
 
3539a49
 
 
 
4afbb07
 
3539a49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# core/gemini_handler.py
import google.generativeai as genai
import json
import re
import logging # Added logging

logger = logging.getLogger(__name__) # Added logger

class GeminiHandler:
    def __init__(self, api_key):
        genai.configure(api_key=api_key)
        # For complex JSON and instruction following, 'gemini-1.0-pro' or 'gemini-1.5-pro-latest' might be more robust.
        # 'gemini-1.5-flash-latest' is faster and cheaper but might sometimes struggle with very complex formats.
        self.model_name = 'gemini-1.5-flash-latest' # or 'gemini-1.0-pro' or 'gemini-1.5-pro-latest'
        self.model = genai.GenerativeModel(self.model_name)
        logger.info(f"GeminiHandler initialized with model: {self.model_name}")

    def _clean_json_response(self, text_response):
        """
        Attempts to extract a valid JSON string from Gemini's text response.
        Prioritizes content within ```json ... ``` blocks.
        """
        if not text_response:
            return ""

        # Attempt 1: Find JSON within markdown code blocks
        match = re.search(r"```json\s*([\s\S]*?)\s*```", text_response, re.DOTALL)
        if match:
            json_str = match.group(1).strip()
            logger.debug("Found JSON in markdown code block.")
            return json_str # Assume this is the intended JSON

        # Attempt 2: If no markdown block, try to find the start of a JSON list or object directly
        # This is more heuristic and might pick up non-JSON if the model doesn't use code blocks.
        json_str = text_response.strip()
        first_char = next((char for char in json_str if char in ['[', '{']), None)

        if first_char == '[':
            # Find the first '[' and try to match until the last ']'
            start_index = json_str.find('[')
            end_index = json_str.rfind(']')
            if start_index != -1 and end_index != -1 and end_index > start_index:
                json_str = json_str[start_index : end_index+1]
                logger.debug("Extracted potential JSON list directly.")
                return json_str.strip()
        elif first_char == '{':
            # Find the first '{' and try to match until the last '}'
            start_index = json_str.find('{')
            end_index = json_str.rfind('}')
            if start_index != -1 and end_index != -1 and end_index > start_index:
                json_str = json_str[start_index : end_index+1]
                logger.debug("Extracted potential JSON object directly.")
                return json_str.strip()

        logger.warning("Could not clearly identify JSON structure in the response. Returning raw attempt.")
        return text_response # Return original if no clear JSON structure found by heuristics

    def _execute_gemini_call(self, prompt_text, expect_json=False):
        raw_text_content = "" # Initialize to ensure it's defined for logging
        cleaned_json_attempt = "" # Initialize
        try:
            logger.info(f"Executing Gemini call (expect_json={expect_json}). Prompt starts with: {prompt_text[:150]}...")
            # Safety settings can be adjusted if needed, though defaults are usually fine.
            # generation_config = genai.types.GenerationConfig(
            #     # temperature=0.7, # Example: Adjust creativity
            # )
            response = self.model.generate_content(
                prompt_text,
                # generation_config=generation_config
                )

            # Check for safety ratings or blocks first (if applicable to your SDK version and use case)
            # if response.prompt_feedback and response.prompt_feedback.block_reason:
            #     logger.error(f"Gemini call blocked. Reason: {response.prompt_feedback.block_reason_message}")
            #     raise Exception(f"Gemini call blocked: {response.prompt_feedback.block_reason_message}")
            # if not response.candidates:
            #     logger.error("Gemini call returned no candidates. Check prompt or safety settings.")
            #     raise Exception("Gemini call returned no candidates.")

            raw_text_content = response.text # Assuming .text gives the full string
            logger.debug(f"Gemini raw response text (first 300 chars): {raw_text_content[:300]}")

            if expect_json:
                cleaned_json_attempt = self._clean_json_response(raw_text_content)
                if not cleaned_json_attempt: # If cleaning returned empty
                    logger.error("JSON cleaning resulted in an empty string.")
                    raise json.JSONDecodeError("Cleaned JSON string is empty", "", 0)
                
                logger.debug(f"Attempting to parse cleaned JSON (first 300 chars): {cleaned_json_attempt[:300]}")
                parsed_json = json.loads(cleaned_json_attempt)
                logger.info("Gemini call successful, JSON parsed.")
                return parsed_json
            else:
                logger.info("Gemini call successful, returning text.")
                return raw_text_content.strip()

        except json.JSONDecodeError as e:
            logger.error(f"JSONDecodeError: {e}. Failed to parse JSON from Gemini response.")
            logger.error(f"--- Problematic Gemini Raw Response ---\n{raw_text_content}\n--- End Raw Response ---")
            logger.error(f"--- Cleaned JSON Attempt ---\n{cleaned_json_attempt}\n--- End Cleaned Attempt ---")
            raise # Re-raise for the caller to handle (e.g., show error in UI)
        except AttributeError as ae: # Handles cases where `response.text` might not exist if call failed early
            logger.error(f"AttributeError during Gemini call processing: {ae}. Likely an issue with the response object structure.", exc_info=True)
            raise Exception(f"Gemini API response structure error: {ae}")
        except Exception as e:
            # This catches other errors from genai.GenerativeModel.generate_content()
            # e.g., google.api_core.exceptions.PermissionDenied, google.api_core.exceptions.ResourceExhausted
            logger.error(f"General error during Gemini API call: {type(e).__name__} - {e}", exc_info=True)
            logger.error(f"--- Problematic Gemini Raw Response (if available) ---\n{raw_text_content}\n--- End Raw Response ---")
            raise # Re-raise

    def generate_story_breakdown(self, prompt_text):
        """
        Generates the full cinematic treatment (list of scene JSON objects).
        """
        return self._execute_gemini_call(prompt_text, expect_json=True)

    def generate_image_prompt(self, prompt_text):
        """
        Generates or refines a DALL-E prompt string (expects text, not JSON).
        Used by `create_visual_regeneration_prompt` from prompt_engineering.
        """
        return self._execute_gemini_call(prompt_text, expect_json=False)

    def regenerate_scene_script_details(self, prompt_text):
        """
        Regenerates the JSON object for a single scene based on feedback.
        """
        return self._execute_gemini_call(prompt_text, expect_json=True)

    # Renamed for clarity, as it refines a DALL-E prompt string based on feedback.
    def refine_image_prompt_from_feedback(self, prompt_text):
        """
        Refines an existing DALL-E prompt string based on user feedback.
        Expects Gemini to return a new string (the refined prompt).
        This method is called by app.py, which uses create_visual_regeneration_prompt.
        """
        return self._execute_gemini_call(prompt_text, expect_json=False)

    # You might add a new method here if you want Gemini to help construct
    # the text-to-video prompt specifically, though your current `construct_text_to_video_prompt`
    # in prompt_engineering.py does this without a Gemini call.
    # If you did want Gemini to craft it:
    # def generate_text_to_video_prompt_string(self, prompt_text_for_gemini):
    #     """
    #     Asks Gemini to craft a detailed text-to-video prompt string.
    #     """
    #     return self._execute_gemini_call(prompt_text_for_gemini, expect_json=False)