peterproofpath
/

eagle

Model card Files Files and versions

peterproofpath commited on 26 days ago

Commit

bd33527

·

verified ·

1 Parent(s): d3d5e4f

Update handler.py

Files changed (1) hide show

handler.py +9 -0

handler.py CHANGED Viewed

@@ -4,6 +4,10 @@ Model: nvidia/Eagle2.5-8B
 For ProofPath video assessment - long video understanding with up to 512 frames.
 Ideal for full rubric-based video grading in a single call.
 """
 from typing import Dict, List, Any, Optional, Union
@@ -28,6 +32,9 @@ class EndpointHandler:
         # The repository only contains handler.py and requirements.txt
         model_id = "nvidia/Eagle2.5-8B"
         # Determine device
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -37,6 +44,7 @@ class EndpointHandler:
         self.processor = Qwen2VLProcessor.from_pretrained(
             model_id,
             trust_remote_code=True,
         )
         # Set padding side for batch processing
@@ -49,6 +57,7 @@ class EndpointHandler:
             torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
             attn_implementation="flash_attention_2" if torch.cuda.is_available() else "sdpa",
             device_map="auto" if torch.cuda.is_available() else None,
         )
         if not torch.cuda.is_available():

 For ProofPath video assessment - long video understanding with up to 512 frames.
 Ideal for full rubric-based video grading in a single call.
+REQUIREMENTS:
+1. Set HF_TOKEN environment variable (model is gated)
+2. Accept license at https://huggingface.co/nvidia/Eagle2.5-8B
 """
 from typing import Dict, List, Any, Optional, Union
         # The repository only contains handler.py and requirements.txt
         model_id = "nvidia/Eagle2.5-8B"
+        # Get HF token from environment for gated model access
+        hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
         # Determine device
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.processor = Qwen2VLProcessor.from_pretrained(
             model_id,
             trust_remote_code=True,
+            token=hf_token,
         )
         # Set padding side for batch processing
             torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
             attn_implementation="flash_attention_2" if torch.cuda.is_available() else "sdpa",
             device_map="auto" if torch.cuda.is_available() else None,
+            token=hf_token,
         )
         if not torch.cuda.is_available():