Spaces:

mgbam
/

CingenAI

Running

App Files Files Community

CingenAI / core /gemini_handler.py

mgbam

Update core/gemini_handler.py

3539a49 verified 6 days ago

raw

history blame contribute delete

8.11 kB

	# core/gemini_handler.py
	import google.generativeai as genai
	import json
	import re
	import logging # Added logging

	logger = logging.getLogger(__name__) # Added logger

	class GeminiHandler:
	def __init__(self, api_key):
	genai.configure(api_key=api_key)
	# For complex JSON and instruction following, 'gemini-1.0-pro' or 'gemini-1.5-pro-latest' might be more robust.
	# 'gemini-1.5-flash-latest' is faster and cheaper but might sometimes struggle with very complex formats.
	self.model_name = 'gemini-1.5-flash-latest' # or 'gemini-1.0-pro' or 'gemini-1.5-pro-latest'
	self.model = genai.GenerativeModel(self.model_name)
	logger.info(f"GeminiHandler initialized with model: {self.model_name}")

	def _clean_json_response(self, text_response):
	"""
	Attempts to extract a valid JSON string from Gemini's text response.
	Prioritizes content within ```json ... ``` blocks.
	"""
	if not text_response:
	return ""

	# Attempt 1: Find JSON within markdown code blocks
	match = re.search(r"```json\s([\s\S]?)\s*```", text_response, re.DOTALL)
	if match:
	json_str = match.group(1).strip()
	logger.debug("Found JSON in markdown code block.")
	return json_str # Assume this is the intended JSON

	# Attempt 2: If no markdown block, try to find the start of a JSON list or object directly
	# This is more heuristic and might pick up non-JSON if the model doesn't use code blocks.
	json_str = text_response.strip()
	first_char = next((char for char in json_str if char in ['[', '{']), None)

	if first_char == '[':
	# Find the first '[' and try to match until the last ']'
	start_index = json_str.find('[')
	end_index = json_str.rfind(']')
	if start_index != -1 and end_index != -1 and end_index > start_index:
	json_str = json_str[start_index : end_index+1]
	logger.debug("Extracted potential JSON list directly.")
	return json_str.strip()
	elif first_char == '{':
	# Find the first '{' and try to match until the last '}'
	start_index = json_str.find('{')
	end_index = json_str.rfind('}')
	if start_index != -1 and end_index != -1 and end_index > start_index:
	json_str = json_str[start_index : end_index+1]
	logger.debug("Extracted potential JSON object directly.")
	return json_str.strip()

	logger.warning("Could not clearly identify JSON structure in the response. Returning raw attempt.")
	return text_response # Return original if no clear JSON structure found by heuristics

	def _execute_gemini_call(self, prompt_text, expect_json=False):
	raw_text_content = "" # Initialize to ensure it's defined for logging
	cleaned_json_attempt = "" # Initialize
	try:
	logger.info(f"Executing Gemini call (expect_json={expect_json}). Prompt starts with: {prompt_text[:150]}...")
	# Safety settings can be adjusted if needed, though defaults are usually fine.
	# generation_config = genai.types.GenerationConfig(
	# # temperature=0.7, # Example: Adjust creativity
	# )
	response = self.model.generate_content(
	prompt_text,
	# generation_config=generation_config
	)

	# Check for safety ratings or blocks first (if applicable to your SDK version and use case)
	# if response.prompt_feedback and response.prompt_feedback.block_reason:
	# logger.error(f"Gemini call blocked. Reason: {response.prompt_feedback.block_reason_message}")
	# raise Exception(f"Gemini call blocked: {response.prompt_feedback.block_reason_message}")
	# if not response.candidates:
	# logger.error("Gemini call returned no candidates. Check prompt or safety settings.")
	# raise Exception("Gemini call returned no candidates.")

	raw_text_content = response.text # Assuming .text gives the full string
	logger.debug(f"Gemini raw response text (first 300 chars): {raw_text_content[:300]}")

	if expect_json:
	cleaned_json_attempt = self._clean_json_response(raw_text_content)
	if not cleaned_json_attempt: # If cleaning returned empty
	logger.error("JSON cleaning resulted in an empty string.")
	raise json.JSONDecodeError("Cleaned JSON string is empty", "", 0)

	logger.debug(f"Attempting to parse cleaned JSON (first 300 chars): {cleaned_json_attempt[:300]}")
	parsed_json = json.loads(cleaned_json_attempt)
	logger.info("Gemini call successful, JSON parsed.")
	return parsed_json
	else:
	logger.info("Gemini call successful, returning text.")
	return raw_text_content.strip()

	except json.JSONDecodeError as e:
	logger.error(f"JSONDecodeError: {e}. Failed to parse JSON from Gemini response.")
	logger.error(f"--- Problematic Gemini Raw Response ---\n{raw_text_content}\n--- End Raw Response ---")
	logger.error(f"--- Cleaned JSON Attempt ---\n{cleaned_json_attempt}\n--- End Cleaned Attempt ---")
	raise # Re-raise for the caller to handle (e.g., show error in UI)
	except AttributeError as ae: # Handles cases where `response.text` might not exist if call failed early
	logger.error(f"AttributeError during Gemini call processing: {ae}. Likely an issue with the response object structure.", exc_info=True)
	raise Exception(f"Gemini API response structure error: {ae}")
	except Exception as e:
	# This catches other errors from genai.GenerativeModel.generate_content()
	# e.g., google.api_core.exceptions.PermissionDenied, google.api_core.exceptions.ResourceExhausted
	logger.error(f"General error during Gemini API call: {type(e).__name__} - {e}", exc_info=True)
	logger.error(f"--- Problematic Gemini Raw Response (if available) ---\n{raw_text_content}\n--- End Raw Response ---")
	raise # Re-raise

	def generate_story_breakdown(self, prompt_text):
	"""
	Generates the full cinematic treatment (list of scene JSON objects).
	"""
	return self._execute_gemini_call(prompt_text, expect_json=True)

	def generate_image_prompt(self, prompt_text):
	"""
	Generates or refines a DALL-E prompt string (expects text, not JSON).
	Used by `create_visual_regeneration_prompt` from prompt_engineering.
	"""
	return self._execute_gemini_call(prompt_text, expect_json=False)

	def regenerate_scene_script_details(self, prompt_text):
	"""
	Regenerates the JSON object for a single scene based on feedback.
	"""
	return self._execute_gemini_call(prompt_text, expect_json=True)

	# Renamed for clarity, as it refines a DALL-E prompt string based on feedback.
	def refine_image_prompt_from_feedback(self, prompt_text):
	"""
	Refines an existing DALL-E prompt string based on user feedback.
	Expects Gemini to return a new string (the refined prompt).
	This method is called by app.py, which uses create_visual_regeneration_prompt.
	"""
	return self._execute_gemini_call(prompt_text, expect_json=False)

	# You might add a new method here if you want Gemini to help construct
	# the text-to-video prompt specifically, though your current `construct_text_to_video_prompt`
	# in prompt_engineering.py does this without a Gemini call.
	# If you did want Gemini to craft it:
	# def generate_text_to_video_prompt_string(self, prompt_text_for_gemini):
	# """
	# Asks Gemini to craft a detailed text-to-video prompt string.
	# """
	# return self._execute_gemini_call(prompt_text_for_gemini, expect_json=False)