Spaces:

promptAId
/

Promptaid-VIsion

Running

File size: 10,487 Bytes

# py_backend/app/routers/caption.py
from fastapi import APIRouter, HTTPException, Depends, Form, Request
from sqlalchemy.orm import Session
from typing import List

from .. import crud, database, schemas, storage
from ..services.vlm_service import vlm_manager
from ..services.schema_validator import schema_validator
from ..config import settings

router = APIRouter()

def get_db():
    db = database.SessionLocal()
    try:
        yield db
    finally:
        db.close()

@router.post(
    "/images/{image_id}/caption",
    response_model=schemas.CaptionOut,
)
async def create_caption(
    image_id: str,
    title: str = Form(...),
    prompt: str = Form(None),  # optional; will use active prompts if not provided
    model_name: str | None = Form(None),
    db: Session = Depends(get_db),
):
    print(f"DEBUG: Received request - image_id: {image_id}, title: {title}, prompt: {prompt}, model_name: {model_name}")
    
    img = crud.get_image(db, image_id)
    if not img:
        raise HTTPException(404, "image not found")

    # Get the prompt (explicit by code/label, or active for image type)
    if prompt:
        print(f"Looking for prompt: '{prompt}' (type: {type(prompt)})")
        prompt_obj = crud.get_prompt(db, prompt) or crud.get_prompt_by_label(db, prompt)
    else:
        print(f"Looking for active prompt for image type: {img.image_type}")
        prompt_obj = crud.get_active_prompt_by_image_type(db, img.image_type)

    print(f"Prompt lookup result: {prompt_obj}")
    if not prompt_obj:
        raise HTTPException(400, f"No prompt found (requested: '{prompt}' or active for type '{img.image_type}')")

    prompt_text = prompt_obj.label
    metadata_instructions = prompt_obj.metadata_instructions or ""
    print(f"Using prompt text: '{prompt_text}'")
    print(f"Using metadata instructions: '{metadata_instructions[:100]}...'")

    # Load image bytes (S3 or local)
    try:
        print(f"DEBUG: About to call VLM service with model_name: {model_name}")
        if hasattr(storage, 's3') and settings.STORAGE_PROVIDER != "local":
            response = storage.s3.get_object(
                Bucket=settings.S3_BUCKET,
                Key=img.file_key,
            )
            img_bytes = response["Body"].read()
        else:
            import os
            file_path = os.path.join(settings.STORAGE_DIR, img.file_key)
            with open(file_path, 'rb') as f:
                img_bytes = f.read()
    except Exception as e:
        print(f"Error reading image file: {e}")
        # fallback: try presigned/public URL
        try:
            url = storage.get_object_url(img.file_key)
            if url.startswith('/') and settings.STORAGE_PROVIDER == "local":
                url = f"http://localhost:8000{url}"
            import requests
            resp = requests.get(url)
            resp.raise_for_status()
            img_bytes = resp.content
        except Exception as fallback_error:
            print(f"Fallback also failed: {fallback_error}")
            raise HTTPException(500, f"Could not read image file: {e}")

    metadata = {}
    try:
        result = await vlm_manager.generate_caption(
            image_bytes=img_bytes,
            prompt=prompt_text,
            metadata_instructions=metadata_instructions,
            model_name=model_name,
            db_session=db,
        )
        
        print(f"DEBUG: VLM service result: {result}")
        print(f"DEBUG: Result model field: {result.get('model', 'NOT_FOUND')}")
        
        raw = result.get("raw_response", {})
        
        # Validate and clean the data using schema validation
        image_type = img.image_type
        print(f"DEBUG: Validating data for image type: {image_type}")
        print(f"DEBUG: Raw data structure: {list(raw.keys()) if isinstance(raw, dict) else 'Not a dict'}")
        
        cleaned_data, is_valid, validation_error = schema_validator.clean_and_validate_data(raw, image_type)
        
        if is_valid:
            print(f"✓ Schema validation passed for {image_type}")
            text = cleaned_data.get("analysis", "")
            metadata = cleaned_data.get("metadata", {})
        else:
            print(f"⚠ Schema validation failed for {image_type}: {validation_error}")
            text = result.get("caption", "This is a fallback caption due to schema validation error.")
            metadata = result.get("metadata", {})
            raw["validation_error"] = validation_error
            raw["validation_failed"] = True
        
        used_model = result.get("model", model_name) or "STUB_MODEL"
        if used_model == "random":
            print(f"WARNING: VLM service returned 'random' as model name, using STUB_MODEL fallback")
            used_model = "STUB_MODEL"
        
        # Fallback info (if any)
        if result.get("fallback_used"):
            raw["fallback_info"] = {
                "original_model": result.get("original_model"),
                "fallback_model": used_model,
                "reason": result.get("fallback_reason"),
            }
        
    except Exception as e:
        print(f"VLM error, using fallback: {e}")
        text = "This is a fallback caption due to VLM service error."
        used_model = "STUB_MODEL"
        raw = {"error": str(e), "fallback": True}
        metadata = {}

    caption = crud.create_caption(
        db,
        image_id=image_id,
        title=title,
        prompt=prompt_obj.p_code,
        model_code=used_model,
        raw_json=raw,
        text=text,
        metadata=metadata,
    )
    
    db.refresh(caption)
    print(f"DEBUG: Caption created, caption object: {caption}")
    print(f"DEBUG: caption_id: {caption.caption_id}")
    return schemas.CaptionOut.from_orm(caption)

@router.get(
    "/captions/legacy",
    response_model=List[schemas.ImageOut],
)
def get_all_captions_legacy_format(
    request: Request,
    db: Session = Depends(get_db),
):
    """Get all images with captions in the old format for backward compatibility"""
    print(f"DEBUG: Fetching all captions in legacy format...")
    captions = crud.get_all_captions_with_images(db)
    print(f"DEBUG: Found {len(captions)} captions")
    
    result = []
    for caption in captions:
        db.refresh(caption)
        if caption.images:
            for image in caption.images:
                from .upload import convert_image_to_dict
                base_url = str(request.base_url).rstrip('/')
                url = f"{base_url}/api/images/{image.image_id}/file"
                print(f"DEBUG: Generated image URL: {url}")
                img_dict = convert_image_to_dict(image, url)

                # Overlay caption fields (legacy shape)
                img_dict.update({
                    "title": caption.title,
                    "prompt": caption.prompt,
                    "model": caption.model,
                    "schema_id": caption.schema_id,
                    "raw_json": caption.raw_json,
                    "generated": caption.generated,
                    "edited": caption.edited,
                    "accuracy": caption.accuracy,
                    "context": caption.context,
                    "usability": caption.usability,
                    "starred": caption.starred,
                    "created_at": caption.created_at,
                    "updated_at": caption.updated_at,
                })
                result.append(schemas.ImageOut(**img_dict))
    print(f"DEBUG: Returning {len(result)} legacy format results")
    return result

@router.get(
    "/captions",
    response_model=List[schemas.CaptionOut],
)
def get_all_captions_with_images(
    db: Session = Depends(get_db),
):
    """Get all captions"""
    print(f"DEBUG: Fetching all captions...")
    captions = crud.get_all_captions_with_images(db)
    print(f"DEBUG: Found {len(captions)} captions")
    
    result = []
    for caption in captions:
        print(f"DEBUG: Processing caption {caption.caption_id}, title: {caption.title}, generated: {caption.generated}, model: {caption.model}")
        db.refresh(caption)
        result.append(schemas.CaptionOut.from_orm(caption))
    print(f"DEBUG: Returning {len(result)} formatted results")
    return result

@router.get(
    "/images/{image_id}/captions",
    response_model=List[schemas.CaptionOut],
)
def get_captions_by_image(
    image_id: str,
    db: Session = Depends(get_db),
):
    """Get all captions for a specific image"""
    captions = crud.get_captions_by_image(db, image_id)
    result = []
    for caption in captions:
        db.refresh(caption)
        result.append(schemas.CaptionOut.from_orm(caption))
    return result

@router.get(
    "/captions/{caption_id}",
    response_model=schemas.CaptionOut,
)
def get_caption(
    caption_id: str,
    db: Session = Depends(get_db),
):
    caption = crud.get_caption(db, caption_id)
    if not caption:
        raise HTTPException(404, "caption not found")
    db.refresh(caption)
    return schemas.CaptionOut.from_orm(caption)

@router.put(
    "/captions/{caption_id}",
    response_model=schemas.CaptionOut,
)
def update_caption(
    caption_id: str,
    update: schemas.CaptionUpdate,
    db: Session = Depends(get_db),
):
    caption = crud.update_caption(db, caption_id, update)
    if not caption:
        raise HTTPException(404, "caption not found")
    db.refresh(caption)
    return schemas.CaptionOut.from_orm(caption)

@router.put(
    "/images/{image_id}/caption",
    response_model=schemas.CaptionOut,
)
def update_caption_by_image(
    image_id: str,
    update: schemas.CaptionUpdate,
    db: Session = Depends(get_db),
):
    """Update the first caption for an image (for backward compatibility)"""
    img = crud.get_image(db, image_id)
    if not img:
        raise HTTPException(404, "image not found")
    if not img.captions:
        raise HTTPException(404, "no captions found for this image")

    caption = crud.update_caption(db, str(img.captions[0].caption_id), update)
    if not caption:
        raise HTTPException(404, "caption not found")
    db.refresh(caption)
    return schemas.CaptionOut.from_orm(caption)

@router.delete(
    "/captions/{caption_id}",
)
def delete_caption(
    caption_id: str,
    db: Session = Depends(get_db),
):
    """Delete caption data for a caption"""
    success = crud.delete_caption(db, caption_id)
    if not success:
        raise HTTPException(404, "caption not found")
    return {"message": "Caption deleted successfully"}