Spaces:

promptAId
/

Promptaid-VIsion

Running

App Files Files Community

Promptaid-VIsion / py_backend /app /services /gemini_service.py

SCGR

multi upload

351d460 3 days ago

raw

history blame contribute delete

5.21 kB

	from .vlm_service import VLMService, ModelType
	from typing import Dict, Any, List
	import asyncio
	import time
	import re
	import json

	import google.generativeai as genai


	class GeminiService(VLMService):
	"""Google Gemini Vision service implementation"""

	def __init__(self, api_key: str, model: str = "gemini-1.5-flash"):
	super().__init__("Gemini", ModelType.GEMINI_PRO_VISION)
	self.model_name = "GEMINI15"
	genai.configure(api_key=api_key)
	self.model_id = model
	self.model = genai.GenerativeModel(self.model_id)

	async def generate_caption(self, image_bytes: bytes, prompt: str, metadata_instructions: str = "") -> Dict[str, Any]:
	"""Generate caption using Google Gemini Vision"""
	instruction = prompt + "\n\n" + metadata_instructions

	image_part = {
	"mime_type": "image/jpeg",
	"data": image_bytes,
	}

	start = time.time()
	response = await asyncio.to_thread(self.model.generate_content, [instruction, image_part])
	elapsed = time.time() - start

	content = getattr(response, "text", None) or ""

	cleaned_content = content
	if cleaned_content.startswith("```json"):
	cleaned_content = re.sub(r"^```json\s*", "", cleaned_content)
	cleaned_content = re.sub(r"\s*```$", "", cleaned_content)

	try:
	parsed = json.loads(cleaned_content)
	description = parsed.get("description", "")
	analysis = parsed.get("analysis", "")
	recommended_actions = parsed.get("recommended_actions", "")
	metadata = parsed.get("metadata", {})

	# Combine all three parts for backward compatibility
	caption_text = f"Description: {description}\n\nAnalysis: {analysis}\n\nRecommended Actions: {recommended_actions}"

	if metadata.get("epsg"):
	epsg_value = metadata["epsg"]
	allowed_epsg = ["4326", "3857", "32617", "32633", "32634", "OTHER"]
	if epsg_value not in allowed_epsg:
	metadata["epsg"] = "OTHER"
	except json.JSONDecodeError:
	description = ""
	analysis = content
	recommended_actions = ""
	caption_text = content
	metadata = {}

	raw_response: Dict[str, Any] = {"model": self.model_id}

	return {
	"caption": caption_text,
	"metadata": metadata,
	"confidence": None,
	"processing_time": elapsed,
	"raw_response": raw_response,
	"description": description,
	"analysis": analysis,
	"recommended_actions": recommended_actions
	}

	async def generate_multi_image_caption(self, image_bytes_list: List[bytes], prompt: str, metadata_instructions: str = "") -> Dict[str, Any]:
	"""Generate caption for multiple images using Google Gemini Vision"""
	instruction = prompt + "\n\n" + metadata_instructions

	# Create content list with instruction and multiple images
	content = [instruction]
	for image_bytes in image_bytes_list:
	image_part = {
	"mime_type": "image/jpeg",
	"data": image_bytes,
	}
	content.append(image_part)

	start = time.time()
	response = await asyncio.to_thread(self.model.generate_content, content)
	elapsed = time.time() - start

	content = getattr(response, "text", None) or ""

	cleaned_content = content
	if cleaned_content.startswith("```json"):
	cleaned_content = re.sub(r"^```json\s*", "", cleaned_content)
	cleaned_content = re.sub(r"\s*```$", "", cleaned_content)

	try:
	parsed = json.loads(cleaned_content)
	description = parsed.get("description", "")
	analysis = parsed.get("analysis", "")
	recommended_actions = parsed.get("recommended_actions", "")
	metadata = parsed.get("metadata", {})

	# Combine all three parts for backward compatibility
	caption_text = f"Description: {description}\n\nAnalysis: {analysis}\n\nRecommended Actions: {recommended_actions}"

	if metadata.get("epsg"):
	epsg_value = metadata["epsg"]
	allowed_epsg = ["4326", "3857", "32617", "32633", "32634", "OTHER"]
	if epsg_value not in allowed_epsg:
	metadata["epsg"] = "OTHER"
	except json.JSONDecodeError:
	description = ""
	analysis = content
	recommended_actions = ""
	caption_text = content
	metadata = {}

	raw_response: Dict[str, Any] = {
	"model": self.model_id,
	"image_count": len(image_bytes_list)
	}

	return {
	"caption": caption_text,
	"metadata": metadata,
	"confidence": None,
	"processing_time": elapsed,
	"raw_response": raw_response,
	"description": description,
	"analysis": analysis,
	"recommended_actions": recommended_actions
	}