peterproofpath commited on
Commit
bd33527
·
verified ·
1 Parent(s): d3d5e4f

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +9 -0
handler.py CHANGED
@@ -4,6 +4,10 @@ Model: nvidia/Eagle2.5-8B
4
 
5
  For ProofPath video assessment - long video understanding with up to 512 frames.
6
  Ideal for full rubric-based video grading in a single call.
 
 
 
 
7
  """
8
 
9
  from typing import Dict, List, Any, Optional, Union
@@ -28,6 +32,9 @@ class EndpointHandler:
28
  # The repository only contains handler.py and requirements.txt
29
  model_id = "nvidia/Eagle2.5-8B"
30
 
 
 
 
31
  # Determine device
32
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
33
 
@@ -37,6 +44,7 @@ class EndpointHandler:
37
  self.processor = Qwen2VLProcessor.from_pretrained(
38
  model_id,
39
  trust_remote_code=True,
 
40
  )
41
 
42
  # Set padding side for batch processing
@@ -49,6 +57,7 @@ class EndpointHandler:
49
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
50
  attn_implementation="flash_attention_2" if torch.cuda.is_available() else "sdpa",
51
  device_map="auto" if torch.cuda.is_available() else None,
 
52
  )
53
 
54
  if not torch.cuda.is_available():
 
4
 
5
  For ProofPath video assessment - long video understanding with up to 512 frames.
6
  Ideal for full rubric-based video grading in a single call.
7
+
8
+ REQUIREMENTS:
9
+ 1. Set HF_TOKEN environment variable (model is gated)
10
+ 2. Accept license at https://huggingface.co/nvidia/Eagle2.5-8B
11
  """
12
 
13
  from typing import Dict, List, Any, Optional, Union
 
32
  # The repository only contains handler.py and requirements.txt
33
  model_id = "nvidia/Eagle2.5-8B"
34
 
35
+ # Get HF token from environment for gated model access
36
+ hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
37
+
38
  # Determine device
39
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
40
 
 
44
  self.processor = Qwen2VLProcessor.from_pretrained(
45
  model_id,
46
  trust_remote_code=True,
47
+ token=hf_token,
48
  )
49
 
50
  # Set padding side for batch processing
 
57
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
58
  attn_implementation="flash_attention_2" if torch.cuda.is_available() else "sdpa",
59
  device_map="auto" if torch.cuda.is_available() else None,
60
+ token=hf_token,
61
  )
62
 
63
  if not torch.cuda.is_available():