Spaces:
Running
Running
File size: 10,487 Bytes
3cf9fa0 401b092 09ecaf7 1686de5 3cf9fa0 09ecaf7 d7291ef ba5edb0 ab3b988 d7291ef 09ecaf7 d7291ef f503159 d7291ef 3cf9fa0 1686de5 d7291ef d25db6b 5778774 fe5d98f d25db6b 5778774 3cf9fa0 4f6cbcc 3cf9fa0 4f6cbcc 3cf9fa0 5778774 fe5d98f 3cf9fa0 5778774 3cf9fa0 d25db6b 351d460 f359373 779c5c3 5778774 3cf9fa0 5778774 d7291ef 5778774 fe5d98f d7291ef ba5edb0 d7291ef 5778774 351d460 5778774 351d460 3cf9fa0 5778774 3cf9fa0 5778774 3cf9fa0 5778774 d25db6b 5778774 f503159 d7291ef fe5d98f d7291ef 65933cd f503159 1686de5 f503159 65933cd 1686de5 f503159 401b092 1686de5 f503159 65933cd c57d64b 3cf9fa0 f503159 65933cd 1686de5 f503159 1686de5 f503159 1686de5 f503159 65933cd f503159 65933cd f503159 65933cd 23d1df7 f503159 d7291ef f503159 d7291ef f503159 d7291ef f503159 1686de5 23d1df7 65933cd f503159 3cf9fa0 f503159 1686de5 f503159 1686de5 f503159 1686de5 f503159 1686de5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 |
# py_backend/app/routers/caption.py
from fastapi import APIRouter, HTTPException, Depends, Form, Request
from sqlalchemy.orm import Session
from typing import List
from .. import crud, database, schemas, storage
from ..services.vlm_service import vlm_manager
from ..services.schema_validator import schema_validator
from ..config import settings
router = APIRouter()
def get_db():
db = database.SessionLocal()
try:
yield db
finally:
db.close()
@router.post(
"/images/{image_id}/caption",
response_model=schemas.CaptionOut,
)
async def create_caption(
image_id: str,
title: str = Form(...),
prompt: str = Form(None), # optional; will use active prompts if not provided
model_name: str | None = Form(None),
db: Session = Depends(get_db),
):
print(f"DEBUG: Received request - image_id: {image_id}, title: {title}, prompt: {prompt}, model_name: {model_name}")
img = crud.get_image(db, image_id)
if not img:
raise HTTPException(404, "image not found")
# Get the prompt (explicit by code/label, or active for image type)
if prompt:
print(f"Looking for prompt: '{prompt}' (type: {type(prompt)})")
prompt_obj = crud.get_prompt(db, prompt) or crud.get_prompt_by_label(db, prompt)
else:
print(f"Looking for active prompt for image type: {img.image_type}")
prompt_obj = crud.get_active_prompt_by_image_type(db, img.image_type)
print(f"Prompt lookup result: {prompt_obj}")
if not prompt_obj:
raise HTTPException(400, f"No prompt found (requested: '{prompt}' or active for type '{img.image_type}')")
prompt_text = prompt_obj.label
metadata_instructions = prompt_obj.metadata_instructions or ""
print(f"Using prompt text: '{prompt_text}'")
print(f"Using metadata instructions: '{metadata_instructions[:100]}...'")
# Load image bytes (S3 or local)
try:
print(f"DEBUG: About to call VLM service with model_name: {model_name}")
if hasattr(storage, 's3') and settings.STORAGE_PROVIDER != "local":
response = storage.s3.get_object(
Bucket=settings.S3_BUCKET,
Key=img.file_key,
)
img_bytes = response["Body"].read()
else:
import os
file_path = os.path.join(settings.STORAGE_DIR, img.file_key)
with open(file_path, 'rb') as f:
img_bytes = f.read()
except Exception as e:
print(f"Error reading image file: {e}")
# fallback: try presigned/public URL
try:
url = storage.get_object_url(img.file_key)
if url.startswith('/') and settings.STORAGE_PROVIDER == "local":
url = f"http://localhost:8000{url}"
import requests
resp = requests.get(url)
resp.raise_for_status()
img_bytes = resp.content
except Exception as fallback_error:
print(f"Fallback also failed: {fallback_error}")
raise HTTPException(500, f"Could not read image file: {e}")
metadata = {}
try:
result = await vlm_manager.generate_caption(
image_bytes=img_bytes,
prompt=prompt_text,
metadata_instructions=metadata_instructions,
model_name=model_name,
db_session=db,
)
print(f"DEBUG: VLM service result: {result}")
print(f"DEBUG: Result model field: {result.get('model', 'NOT_FOUND')}")
raw = result.get("raw_response", {})
# Validate and clean the data using schema validation
image_type = img.image_type
print(f"DEBUG: Validating data for image type: {image_type}")
print(f"DEBUG: Raw data structure: {list(raw.keys()) if isinstance(raw, dict) else 'Not a dict'}")
cleaned_data, is_valid, validation_error = schema_validator.clean_and_validate_data(raw, image_type)
if is_valid:
print(f"✓ Schema validation passed for {image_type}")
text = cleaned_data.get("analysis", "")
metadata = cleaned_data.get("metadata", {})
else:
print(f"⚠ Schema validation failed for {image_type}: {validation_error}")
text = result.get("caption", "This is a fallback caption due to schema validation error.")
metadata = result.get("metadata", {})
raw["validation_error"] = validation_error
raw["validation_failed"] = True
used_model = result.get("model", model_name) or "STUB_MODEL"
if used_model == "random":
print(f"WARNING: VLM service returned 'random' as model name, using STUB_MODEL fallback")
used_model = "STUB_MODEL"
# Fallback info (if any)
if result.get("fallback_used"):
raw["fallback_info"] = {
"original_model": result.get("original_model"),
"fallback_model": used_model,
"reason": result.get("fallback_reason"),
}
except Exception as e:
print(f"VLM error, using fallback: {e}")
text = "This is a fallback caption due to VLM service error."
used_model = "STUB_MODEL"
raw = {"error": str(e), "fallback": True}
metadata = {}
caption = crud.create_caption(
db,
image_id=image_id,
title=title,
prompt=prompt_obj.p_code,
model_code=used_model,
raw_json=raw,
text=text,
metadata=metadata,
)
db.refresh(caption)
print(f"DEBUG: Caption created, caption object: {caption}")
print(f"DEBUG: caption_id: {caption.caption_id}")
return schemas.CaptionOut.from_orm(caption)
@router.get(
"/captions/legacy",
response_model=List[schemas.ImageOut],
)
def get_all_captions_legacy_format(
request: Request,
db: Session = Depends(get_db),
):
"""Get all images with captions in the old format for backward compatibility"""
print(f"DEBUG: Fetching all captions in legacy format...")
captions = crud.get_all_captions_with_images(db)
print(f"DEBUG: Found {len(captions)} captions")
result = []
for caption in captions:
db.refresh(caption)
if caption.images:
for image in caption.images:
from .upload import convert_image_to_dict
base_url = str(request.base_url).rstrip('/')
url = f"{base_url}/api/images/{image.image_id}/file"
print(f"DEBUG: Generated image URL: {url}")
img_dict = convert_image_to_dict(image, url)
# Overlay caption fields (legacy shape)
img_dict.update({
"title": caption.title,
"prompt": caption.prompt,
"model": caption.model,
"schema_id": caption.schema_id,
"raw_json": caption.raw_json,
"generated": caption.generated,
"edited": caption.edited,
"accuracy": caption.accuracy,
"context": caption.context,
"usability": caption.usability,
"starred": caption.starred,
"created_at": caption.created_at,
"updated_at": caption.updated_at,
})
result.append(schemas.ImageOut(**img_dict))
print(f"DEBUG: Returning {len(result)} legacy format results")
return result
@router.get(
"/captions",
response_model=List[schemas.CaptionOut],
)
def get_all_captions_with_images(
db: Session = Depends(get_db),
):
"""Get all captions"""
print(f"DEBUG: Fetching all captions...")
captions = crud.get_all_captions_with_images(db)
print(f"DEBUG: Found {len(captions)} captions")
result = []
for caption in captions:
print(f"DEBUG: Processing caption {caption.caption_id}, title: {caption.title}, generated: {caption.generated}, model: {caption.model}")
db.refresh(caption)
result.append(schemas.CaptionOut.from_orm(caption))
print(f"DEBUG: Returning {len(result)} formatted results")
return result
@router.get(
"/images/{image_id}/captions",
response_model=List[schemas.CaptionOut],
)
def get_captions_by_image(
image_id: str,
db: Session = Depends(get_db),
):
"""Get all captions for a specific image"""
captions = crud.get_captions_by_image(db, image_id)
result = []
for caption in captions:
db.refresh(caption)
result.append(schemas.CaptionOut.from_orm(caption))
return result
@router.get(
"/captions/{caption_id}",
response_model=schemas.CaptionOut,
)
def get_caption(
caption_id: str,
db: Session = Depends(get_db),
):
caption = crud.get_caption(db, caption_id)
if not caption:
raise HTTPException(404, "caption not found")
db.refresh(caption)
return schemas.CaptionOut.from_orm(caption)
@router.put(
"/captions/{caption_id}",
response_model=schemas.CaptionOut,
)
def update_caption(
caption_id: str,
update: schemas.CaptionUpdate,
db: Session = Depends(get_db),
):
caption = crud.update_caption(db, caption_id, update)
if not caption:
raise HTTPException(404, "caption not found")
db.refresh(caption)
return schemas.CaptionOut.from_orm(caption)
@router.put(
"/images/{image_id}/caption",
response_model=schemas.CaptionOut,
)
def update_caption_by_image(
image_id: str,
update: schemas.CaptionUpdate,
db: Session = Depends(get_db),
):
"""Update the first caption for an image (for backward compatibility)"""
img = crud.get_image(db, image_id)
if not img:
raise HTTPException(404, "image not found")
if not img.captions:
raise HTTPException(404, "no captions found for this image")
caption = crud.update_caption(db, str(img.captions[0].caption_id), update)
if not caption:
raise HTTPException(404, "caption not found")
db.refresh(caption)
return schemas.CaptionOut.from_orm(caption)
@router.delete(
"/captions/{caption_id}",
)
def delete_caption(
caption_id: str,
db: Session = Depends(get_db),
):
"""Delete caption data for a caption"""
success = crud.delete_caption(db, caption_id)
if not success:
raise HTTPException(404, "caption not found")
return {"message": "Caption deleted successfully"}
|