File size: 12,429 Bytes
186c8e8
3cf9fa0
 
 
 
 
 
d7291ef
 
1686de5
 
351d460
186c8e8
3cf9fa0
 
 
 
 
 
 
 
 
 
 
 
 
 
186c8e8
d7291ef
 
186c8e8
3cf9fa0
 
 
 
186c8e8
3cf9fa0
 
 
 
 
 
 
 
d7291ef
 
351d460
3cf9fa0
 
 
 
 
 
 
186c8e8
 
 
351d460
 
186c8e8
 
351d460
 
186c8e8
 
 
 
3cf9fa0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186c8e8
 
 
 
 
 
 
3cf9fa0
186c8e8
3cf9fa0
 
 
1686de5
186c8e8
 
 
 
 
 
 
 
d7291ef
 
186c8e8
d7291ef
186c8e8
 
 
 
 
 
 
 
 
 
1686de5
186c8e8
 
 
 
 
d7291ef
3cf9fa0
d7291ef
186c8e8
d7291ef
1686de5
d7291ef
186c8e8
d7291ef
3cf9fa0
 
d7291ef
 
3cf9fa0
 
 
 
186c8e8
3cf9fa0
186c8e8
 
3cf9fa0
 
186c8e8
 
d7291ef
186c8e8
 
 
 
 
 
 
 
 
3cf9fa0
186c8e8
 
3cf9fa0
 
 
186c8e8
 
872dec2
3cf9fa0
872dec2
3cf9fa0
186c8e8
3cf9fa0
872dec2
3cf9fa0
872dec2
351d460
 
 
3cf9fa0
 
186c8e8
 
351d460
186c8e8
 
 
 
 
 
 
351d460
3cf9fa0
186c8e8
872dec2
 
3cf9fa0
186c8e8
 
351d460
 
 
 
 
 
186c8e8
3cf9fa0
186c8e8
3cf9fa0
 
 
351d460
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3cf9fa0
 
351d460
 
 
 
3cf9fa0
351d460
 
 
 
 
 
 
3cf9fa0
351d460
 
3cf9fa0
 
 
351d460
 
3cf9fa0
351d460
3cf9fa0
 
 
 
351d460
3cf9fa0
351d460
 
 
 
3cf9fa0
351d460
3cf9fa0
351d460
3cf9fa0
 
 
351d460
 
 
3cf9fa0
351d460
3cf9fa0
351d460
3cf9fa0
351d460
3cf9fa0
351d460
 
 
 
3cf9fa0
 
351d460
 
 
 
 
 
 
 
 
 
3cf9fa0
 
 
351d460
 
 
3cf9fa0
351d460
186c8e8
 
3cf9fa0
186c8e8
 
3cf9fa0
186c8e8
3cf9fa0
186c8e8
 
8f65a47
3cf9fa0
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
# services/huggingface_service.py
from __future__ import annotations


from .vlm_service import VLMService, ModelType, ServiceStatus

from typing import Dict, Any, List, Optional
import aiohttp
import base64
import time
import re
import json
import imghdr
import os


def _env_token() -> Optional[str]:
    return (
        os.getenv("HF_API_KEY")
        or os.getenv("HF_TOKEN")
        or os.getenv("HUGGINGFACEHUB_API_TOKEN")
    )


def _providers_url_default() -> str:
    # OpenAI-compatible gateway on HF Inference Providers
    return os.getenv("HF_PROVIDERS_URL", "https://api-inference.huggingface.co/providers/openai")


class HuggingFaceService(VLMService):
    """
    HuggingFace Inference Providers service implementation (OpenAI-compatible).
    - No network in __init__
    - Short, safe probe()
    - Lazy use during generate_*
    """

    def __init__(self, api_key: str, model_id: str, providers_url: str, public_name: str | None = None):
        super().__init__(
            public_name or (model_id or "HUGGINGFACE"),
            ModelType.CUSTOM,
            provider="huggingface",
            lazy_init=True,
        )
        self.api_key = api_key
        self.model_id = model_id
        self.providers_url = providers_url
            # also keep model_name aligned
        self.model_name = public_name or (model_id or "HUGGINGFACE")
        if not self.api_key or not self.model_id:
            self.is_available = False
            self.status = ServiceStatus.DEGRADED

    # ---------- helpers ----------

    def _guess_mime(self, image_bytes: bytes) -> str:
        kind = imghdr.what(None, h=image_bytes)
        if kind == "jpeg":
            return "image/jpeg"
        if kind == "png":
            return "image/png"
        if kind == "gif":
            return "image/gif"
        if kind == "webp":
            return "image/webp"
        return "image/jpeg"

    # ---------- lifecycle ----------

    async def probe(self) -> bool:
        """
        Lightweight reachability check.
        - Validates token with whoami
        - Checks model endpoint exists/reachable
        Never raises, returns bool.
        """
        if not self.api_key or not self.model_id:
            return False

        try:
            timeout = aiohttp.ClientTimeout(total=5)
            headers_auth = {"Authorization": f"Bearer {self.api_key}"}

            async with aiohttp.ClientSession(timeout=timeout) as session:
                # Token check
                r1 = await session.get("https://huggingface.co/api/whoami-v2", headers=headers_auth)
                if r1.status != 200:
                    return False

                # Model reachability (Inference API — GET is fine)
                r2 = await session.get(f"https://api-inference.huggingface.co/models/{self.model_id}", headers=headers_auth)
                # Consider 200, 503 (loading), 403/404 (exists but gated/private) as "reachable"
                if r2.status in (200, 503, 403, 404):
                    return True
                return False
        except Exception:
            return False

    async def ensure_ready(self) -> bool:
        # Nothing to warm here; we keep it trivial.
        self._initialized = True
        return True

    # ---------- caption APIs ----------

    async def generate_caption(
        self,
        image_bytes: bytes,
        prompt: str,
        metadata_instructions: str = "",
    ) -> Dict[str, Any]:
        """
        Generate caption using HF Inference Providers (OpenAI-style chat).
        """
        if not self.api_key or not self.model_id:
            raise Exception("MODEL_UNAVAILABLE: HuggingFace credentials or model_id missing.")

        start_time = time.time()

        instruction = (prompt or "").strip()
        if metadata_instructions:
            instruction += "\n\n" + metadata_instructions.strip()

        mime = self._guess_mime(image_bytes)
        data_url = f"data:{mime};base64,{base64.b64encode(image_bytes).decode('utf-8')}"

        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
        }

        payload = {
            "model": self.model_id,
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": instruction},
                        {"type": "image_url", "image_url": {"url": data_url}},
                    ],
                }
            ],
            "max_tokens": 512,
            "temperature": 0.2,
        }

        try:
            async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=60)) as session:
                async with session.post(
                    self.providers_url,
                    headers=headers,
                    json=payload,
                ) as resp:
                    raw_text = await resp.text()
                    if resp.status != 200:
                        # Surface a consistent, catchable error for fallback
                        raise Exception(f"MODEL_UNAVAILABLE: {self.model_name} unavailable (HTTP {resp.status}).")
                    result = await resp.json()
        except Exception as e:
            # Never leak aiohttp exceptions outward as-is; normalize to your fallback signal
            if "MODEL_UNAVAILABLE" not in str(e):
                raise Exception(f"MODEL_UNAVAILABLE: {self.model_name} is unavailable due to a network/error.")
            raise

        # ----- Parse response -----
        message = (result.get("choices") or [{}])[0].get("message", {})
        content = message.get("content", "")

        # GLM models sometimes put content in reasoning_content
        if not content and message.get("reasoning_content"):
            content = message.get("reasoning_content", "")

        if isinstance(content, list):
            parts = []
            for block in content:
                if isinstance(block, dict):
                    parts.append(block.get("text") or block.get("content") or "")
                else:
                    parts.append(str(block))
            content = "\n".join([p for p in parts if p])

        caption = (content or "").strip()

        # Strip accidental fenced JSON
        if caption.startswith("```json"):
            caption = re.sub(r"^```json\s*", "", caption)
            caption = re.sub(r"\s*```$", "", caption)

        metadata = {}
        description = ""
        analysis = caption
        recommended_actions = ""

        try:
            parsed = json.loads(caption)
            description = parsed.get("description", "")
            analysis = parsed.get("analysis", caption)
            recommended_actions = parsed.get("recommended_actions", "")
            metadata = parsed.get("metadata", {})
            caption_text = f"Description: {description}\n\nAnalysis: {analysis}\n\nRecommended Actions: {recommended_actions}"
        except json.JSONDecodeError:
            parsed = None
            caption_text = caption

        elapsed = time.time() - start_time

        return {
            "caption": caption_text,
            "metadata": metadata,
            "confidence": None,
            "processing_time": elapsed,
            "raw_response": {
                "model": self.model_id,
                "content": content,
                "parsed": parsed,
            },
            "description": description,
            "analysis": analysis,
            "recommended_actions": recommended_actions,
        }

    async def generate_multi_image_caption(
        self,
        image_bytes_list: List[bytes],
        prompt: str,
        metadata_instructions: str = "",
    ) -> Dict[str, Any]:
        """
        Generate caption for multiple images using HF Inference Providers (OpenAI-style chat).
        """
        if not self.api_key or not self.model_id:
            raise Exception("MODEL_UNAVAILABLE: HuggingFace credentials or model_id missing.")

        start_time = time.time()

        instruction = (prompt or "").strip()
        if metadata_instructions:
            instruction += "\n\n" + metadata_instructions.strip()

        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
        }

        content = [{"type": "text", "text": instruction}]
        for image_bytes in image_bytes_list:
            mime = self._guess_mime(image_bytes)
            data_url = f"data:{mime};base64,{base64.b64encode(image_bytes).decode('utf-8')}"
            content.append({"type": "image_url", "image_url": {"url": data_url}})

        payload = {
            "model": self.model_id,
            "messages": [{"role": "user", "content": content}],
            "max_tokens": 800,
            "temperature": 0.2,
        }

        try:
            async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=60)) as session:
                async with session.post(
                    self.providers_url,
                    headers=headers,
                    json=payload,
                ) as resp:
                    raw_text = await resp.text()
                    if resp.status != 200:
                        raise Exception(f"MODEL_UNAVAILABLE: {self.model_name} unavailable (HTTP {resp.status}).")
                    result = await resp.json()
        except Exception as e:
            if "MODEL_UNAVAILABLE" not in str(e):
                raise Exception(f"MODEL_UNAVAILABLE: {self.model_name} is unavailable due to a network/error.")
            raise

        message = (result.get("choices") or [{}])[0].get("message", {})
        content_out = message.get("content", "")

        if not content_out and message.get("reasoning_content"):
            content_out = message.get("reasoning_content", "")

        if isinstance(content_out, list):
            parts = []
            for block in content_out:
                if isinstance(block, dict):
                    parts.append(block.get("text") or block.get("content") or "")
                else:
                    parts.append(str(block))
            content_out = "\n".join([p for p in parts if p])

        caption = (content_out or "").strip()

        if caption.startswith("```json"):
            caption = re.sub(r"^```json\s*", "", caption)
            caption = re.sub(r"\s*```$", "", caption)

        metadata = {}
        description = ""
        analysis = caption
        recommended_actions = ""

        try:
            parsed = json.loads(caption)
            description = parsed.get("description", "")
            analysis = parsed.get("analysis", caption)
            recommended_actions = parsed.get("recommended_actions", "")
            metadata = parsed.get("metadata", {})
            caption_text = f"Description: {description}\n\nAnalysis: {analysis}\n\nRecommended Actions: {recommended_actions}"
        except json.JSONDecodeError:
            parsed = None
            caption_text = caption

        elapsed = time.time() - start_time

        return {
            "caption": caption_text,
            "metadata": metadata,
            "confidence": None,
            "processing_time": elapsed,
            "raw_response": {
                "model": self.model_id,
                "content": content_out,
                "parsed": parsed,
                "image_count": len(image_bytes_list),
            },
            "description": description,
            "analysis": analysis,
            "recommended_actions": recommended_actions,
        }


# --- Generic wrapper for easy dynamic registration ---
class ProvidersGenericVLMService(HuggingFaceService):
    """
    Generic wrapper so you can register ANY Providers VLM by model_id from config/DB.
    Example:
      ProvidersGenericVLMService(None, "Qwen/Qwen2.5-VL-32B-Instruct", "QWEN2_5_VL_32B")
    """
    def __init__(self, api_key: str, model_id: str, public_name: str | None = None):
        providers_url = "https://api-inference.huggingface.co/providers/openai"
        super().__init__(
            api_key=api_key,
            model_id=model_id,
            providers_url=providers_url,
            public_name=public_name or model_id.replace("/", "_").upper(),
        )
        if not self.api_key or not self.model_id:
            self.is_available = False
            self.status = ServiceStatus.DEGRADED