chancharikm
/

qwen2.5-vl-7b-cam-motion

@@ -37,7 +37,7 @@ We have two ways of using our model for this application. The first is the recom
 import t2v_metrics
 ### For a single (video, text) pair:
-qwen_score = t2v_metrics.VQAScore(model='qwen2.5-vl-7b', checkpoint='chancharikm/qwen2.5-vl-7b-cam-motion-preview')
 video = "videos/baby.mp4" # a video path in string format
 text = "a baby crying"
 # Calculate probability of "Yes" response
@@ -55,7 +55,7 @@ import torch
 # Load the model
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    "chancharikm/qwen2.5-vl-7b-cam-motion-preview", torch_dtype="auto", device_map="auto"
 )
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
@@ -128,7 +128,7 @@ We have two ways of using our model for this application. The first is the recom
 import t2v_metrics
 ### For a single (video, text) pair:
-qwen_score = t2v_metrics.VQAScore(model='qwen2.5-vl-7b', checkpoint='chancharikm/qwen2.5-vl-7b-cam-motion-preview')
 video = "videos/baby.mp4" # a video path in string format
 text =  "Please describe this image: "
 # Calculate probability of "Yes" response
@@ -146,12 +146,12 @@ from qwen_vl_utils import process_vision_info
 # default: Load the model on the available device(s)
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    "chancharikm/qwen2.5-vl-7b-cam-motion-preview", torch_dtype="auto", device_map="auto"
 )
 # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
 # model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-#     "chancharikm/qwen2.5-vl-7b-cam-motion-preview",
 #     torch_dtype=torch.bfloat16,
 #     attn_implementation="flash_attention_2",
 #     device_map="auto",

 import t2v_metrics
 ### For a single (video, text) pair:
+qwen_score = t2v_metrics.VQAScore(model='qwen2.5-vl-7b', checkpoint='chancharikm/qwen2.5-vl-7b-cam-motion')
 video = "videos/baby.mp4" # a video path in string format
 text = "a baby crying"
 # Calculate probability of "Yes" response
 # Load the model
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    "chancharikm/qwen2.5-vl-7b-cam-motion", torch_dtype="auto", device_map="auto"
 )
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
 import t2v_metrics
 ### For a single (video, text) pair:
+qwen_score = t2v_metrics.VQAScore(model='qwen2.5-vl-7b', checkpoint='chancharikm/qwen2.5-vl-7b-cam-motion')
 video = "videos/baby.mp4" # a video path in string format
 text =  "Please describe this image: "
 # Calculate probability of "Yes" response
 # default: Load the model on the available device(s)
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    "chancharikm/qwen2.5-vl-7b-cam-motion", torch_dtype="auto", device_map="auto"
 )
 # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
 # model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+#     "chancharikm/qwen2.5-vl-7b-cam-motion",
 #     torch_dtype=torch.bfloat16,
 #     attn_implementation="flash_attention_2",
 #     device_map="auto",