File size: 10,487 Bytes
3cf9fa0
401b092
09ecaf7
1686de5
3cf9fa0
09ecaf7
d7291ef
ba5edb0
ab3b988
d7291ef
09ecaf7
 
 
 
 
 
 
 
 
d7291ef
 
f503159
d7291ef
 
 
 
3cf9fa0
1686de5
d7291ef
d25db6b
5778774
fe5d98f
d25db6b
 
5778774
 
3cf9fa0
4f6cbcc
 
3cf9fa0
4f6cbcc
 
 
3cf9fa0
5778774
fe5d98f
3cf9fa0
 
5778774
 
 
 
 
3cf9fa0
d25db6b
351d460
f359373
 
 
 
 
 
 
 
 
 
 
779c5c3
5778774
3cf9fa0
5778774
 
 
 
 
 
 
 
 
 
 
 
 
d7291ef
 
5778774
 
fe5d98f
d7291ef
ba5edb0
d7291ef
5778774
351d460
 
 
5778774
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351d460
 
 
 
3cf9fa0
 
5778774
3cf9fa0
5778774
3cf9fa0
5778774
 
d25db6b
5778774
 
 
 
 
 
f503159
d7291ef
 
 
fe5d98f
d7291ef
 
 
 
 
65933cd
 
f503159
 
 
1686de5
 
f503159
65933cd
1686de5
f503159
401b092
1686de5
 
f503159
 
 
 
65933cd
 
 
 
c57d64b
 
 
 
 
 
 
3cf9fa0
 
f503159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65933cd
1686de5
 
 
f503159
1686de5
 
 
 
f503159
 
1686de5
f503159
65933cd
 
 
f503159
65933cd
f503159
65933cd
 
23d1df7
f503159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7291ef
f503159
 
d7291ef
 
f503159
d7291ef
 
 
f503159
1686de5
23d1df7
65933cd
f503159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3cf9fa0
f503159
 
 
 
 
1686de5
 
f503159
1686de5
 
f503159
1686de5
 
f503159
 
1686de5
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# py_backend/app/routers/caption.py
from fastapi import APIRouter, HTTPException, Depends, Form, Request
from sqlalchemy.orm import Session
from typing import List

from .. import crud, database, schemas, storage
from ..services.vlm_service import vlm_manager
from ..services.schema_validator import schema_validator
from ..config import settings

router = APIRouter()

def get_db():
    db = database.SessionLocal()
    try:
        yield db
    finally:
        db.close()

@router.post(
    "/images/{image_id}/caption",
    response_model=schemas.CaptionOut,
)
async def create_caption(
    image_id: str,
    title: str = Form(...),
    prompt: str = Form(None),  # optional; will use active prompts if not provided
    model_name: str | None = Form(None),
    db: Session = Depends(get_db),
):
    print(f"DEBUG: Received request - image_id: {image_id}, title: {title}, prompt: {prompt}, model_name: {model_name}")
    
    img = crud.get_image(db, image_id)
    if not img:
        raise HTTPException(404, "image not found")

    # Get the prompt (explicit by code/label, or active for image type)
    if prompt:
        print(f"Looking for prompt: '{prompt}' (type: {type(prompt)})")
        prompt_obj = crud.get_prompt(db, prompt) or crud.get_prompt_by_label(db, prompt)
    else:
        print(f"Looking for active prompt for image type: {img.image_type}")
        prompt_obj = crud.get_active_prompt_by_image_type(db, img.image_type)

    print(f"Prompt lookup result: {prompt_obj}")
    if not prompt_obj:
        raise HTTPException(400, f"No prompt found (requested: '{prompt}' or active for type '{img.image_type}')")

    prompt_text = prompt_obj.label
    metadata_instructions = prompt_obj.metadata_instructions or ""
    print(f"Using prompt text: '{prompt_text}'")
    print(f"Using metadata instructions: '{metadata_instructions[:100]}...'")

    # Load image bytes (S3 or local)
    try:
        print(f"DEBUG: About to call VLM service with model_name: {model_name}")
        if hasattr(storage, 's3') and settings.STORAGE_PROVIDER != "local":
            response = storage.s3.get_object(
                Bucket=settings.S3_BUCKET,
                Key=img.file_key,
            )
            img_bytes = response["Body"].read()
        else:
            import os
            file_path = os.path.join(settings.STORAGE_DIR, img.file_key)
            with open(file_path, 'rb') as f:
                img_bytes = f.read()
    except Exception as e:
        print(f"Error reading image file: {e}")
        # fallback: try presigned/public URL
        try:
            url = storage.get_object_url(img.file_key)
            if url.startswith('/') and settings.STORAGE_PROVIDER == "local":
                url = f"http://localhost:8000{url}"
            import requests
            resp = requests.get(url)
            resp.raise_for_status()
            img_bytes = resp.content
        except Exception as fallback_error:
            print(f"Fallback also failed: {fallback_error}")
            raise HTTPException(500, f"Could not read image file: {e}")

    metadata = {}
    try:
        result = await vlm_manager.generate_caption(
            image_bytes=img_bytes,
            prompt=prompt_text,
            metadata_instructions=metadata_instructions,
            model_name=model_name,
            db_session=db,
        )
        
        print(f"DEBUG: VLM service result: {result}")
        print(f"DEBUG: Result model field: {result.get('model', 'NOT_FOUND')}")
        
        raw = result.get("raw_response", {})
        
        # Validate and clean the data using schema validation
        image_type = img.image_type
        print(f"DEBUG: Validating data for image type: {image_type}")
        print(f"DEBUG: Raw data structure: {list(raw.keys()) if isinstance(raw, dict) else 'Not a dict'}")
        
        cleaned_data, is_valid, validation_error = schema_validator.clean_and_validate_data(raw, image_type)
        
        if is_valid:
            print(f"✓ Schema validation passed for {image_type}")
            text = cleaned_data.get("analysis", "")
            metadata = cleaned_data.get("metadata", {})
        else:
            print(f"⚠ Schema validation failed for {image_type}: {validation_error}")
            text = result.get("caption", "This is a fallback caption due to schema validation error.")
            metadata = result.get("metadata", {})
            raw["validation_error"] = validation_error
            raw["validation_failed"] = True
        
        used_model = result.get("model", model_name) or "STUB_MODEL"
        if used_model == "random":
            print(f"WARNING: VLM service returned 'random' as model name, using STUB_MODEL fallback")
            used_model = "STUB_MODEL"
        
        # Fallback info (if any)
        if result.get("fallback_used"):
            raw["fallback_info"] = {
                "original_model": result.get("original_model"),
                "fallback_model": used_model,
                "reason": result.get("fallback_reason"),
            }
        
    except Exception as e:
        print(f"VLM error, using fallback: {e}")
        text = "This is a fallback caption due to VLM service error."
        used_model = "STUB_MODEL"
        raw = {"error": str(e), "fallback": True}
        metadata = {}

    caption = crud.create_caption(
        db,
        image_id=image_id,
        title=title,
        prompt=prompt_obj.p_code,
        model_code=used_model,
        raw_json=raw,
        text=text,
        metadata=metadata,
    )
    
    db.refresh(caption)
    print(f"DEBUG: Caption created, caption object: {caption}")
    print(f"DEBUG: caption_id: {caption.caption_id}")
    return schemas.CaptionOut.from_orm(caption)

@router.get(
    "/captions/legacy",
    response_model=List[schemas.ImageOut],
)
def get_all_captions_legacy_format(
    request: Request,
    db: Session = Depends(get_db),
):
    """Get all images with captions in the old format for backward compatibility"""
    print(f"DEBUG: Fetching all captions in legacy format...")
    captions = crud.get_all_captions_with_images(db)
    print(f"DEBUG: Found {len(captions)} captions")
    
    result = []
    for caption in captions:
        db.refresh(caption)
        if caption.images:
            for image in caption.images:
                from .upload import convert_image_to_dict
                base_url = str(request.base_url).rstrip('/')
                url = f"{base_url}/api/images/{image.image_id}/file"
                print(f"DEBUG: Generated image URL: {url}")
                img_dict = convert_image_to_dict(image, url)

                # Overlay caption fields (legacy shape)
                img_dict.update({
                    "title": caption.title,
                    "prompt": caption.prompt,
                    "model": caption.model,
                    "schema_id": caption.schema_id,
                    "raw_json": caption.raw_json,
                    "generated": caption.generated,
                    "edited": caption.edited,
                    "accuracy": caption.accuracy,
                    "context": caption.context,
                    "usability": caption.usability,
                    "starred": caption.starred,
                    "created_at": caption.created_at,
                    "updated_at": caption.updated_at,
                })
                result.append(schemas.ImageOut(**img_dict))
    print(f"DEBUG: Returning {len(result)} legacy format results")
    return result

@router.get(
    "/captions",
    response_model=List[schemas.CaptionOut],
)
def get_all_captions_with_images(
    db: Session = Depends(get_db),
):
    """Get all captions"""
    print(f"DEBUG: Fetching all captions...")
    captions = crud.get_all_captions_with_images(db)
    print(f"DEBUG: Found {len(captions)} captions")
    
    result = []
    for caption in captions:
        print(f"DEBUG: Processing caption {caption.caption_id}, title: {caption.title}, generated: {caption.generated}, model: {caption.model}")
        db.refresh(caption)
        result.append(schemas.CaptionOut.from_orm(caption))
    print(f"DEBUG: Returning {len(result)} formatted results")
    return result

@router.get(
    "/images/{image_id}/captions",
    response_model=List[schemas.CaptionOut],
)
def get_captions_by_image(
    image_id: str,
    db: Session = Depends(get_db),
):
    """Get all captions for a specific image"""
    captions = crud.get_captions_by_image(db, image_id)
    result = []
    for caption in captions:
        db.refresh(caption)
        result.append(schemas.CaptionOut.from_orm(caption))
    return result

@router.get(
    "/captions/{caption_id}",
    response_model=schemas.CaptionOut,
)
def get_caption(
    caption_id: str,
    db: Session = Depends(get_db),
):
    caption = crud.get_caption(db, caption_id)
    if not caption:
        raise HTTPException(404, "caption not found")
    db.refresh(caption)
    return schemas.CaptionOut.from_orm(caption)

@router.put(
    "/captions/{caption_id}",
    response_model=schemas.CaptionOut,
)
def update_caption(
    caption_id: str,
    update: schemas.CaptionUpdate,
    db: Session = Depends(get_db),
):
    caption = crud.update_caption(db, caption_id, update)
    if not caption:
        raise HTTPException(404, "caption not found")
    db.refresh(caption)
    return schemas.CaptionOut.from_orm(caption)

@router.put(
    "/images/{image_id}/caption",
    response_model=schemas.CaptionOut,
)
def update_caption_by_image(
    image_id: str,
    update: schemas.CaptionUpdate,
    db: Session = Depends(get_db),
):
    """Update the first caption for an image (for backward compatibility)"""
    img = crud.get_image(db, image_id)
    if not img:
        raise HTTPException(404, "image not found")
    if not img.captions:
        raise HTTPException(404, "no captions found for this image")

    caption = crud.update_caption(db, str(img.captions[0].caption_id), update)
    if not caption:
        raise HTTPException(404, "caption not found")
    db.refresh(caption)
    return schemas.CaptionOut.from_orm(caption)

@router.delete(
    "/captions/{caption_id}",
)
def delete_caption(
    caption_id: str,
    db: Session = Depends(get_db),
):
    """Delete caption data for a caption"""
    success = crud.delete_caption(db, caption_id)
    if not success:
        raise HTTPException(404, "caption not found")
    return {"message": "Caption deleted successfully"}