RedHatAI
/

Llama-3.3-70B-Instruct-FP8-block

Text Generation

compressed-tensors

Model card Files Files and versions

krishnateja95 commited on 5 days ago

Commit

f1daf0e

·

verified ·

1 Parent(s): 3f152ba

Update README.md

Files changed (1) hide show

README.md +3 -13

README.md CHANGED Viewed

@@ -58,17 +58,9 @@ client = OpenAI(
 model = "nm-testing/Llama-3.3-70B-Instruct-FP8-block"
 messages = [
-    {
-        "role": "user",
-        "content": [
-            {
-                "type": "image_url",
-                "image_url": {"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"},
-            },
-            {"type": "text", "text": "Describe this image."},
-        ],
-    }
 ]
 outputs = client.chat.completions.create(
@@ -94,7 +86,7 @@ from llmcompressor import oneshot
 from llmcompressor.modeling import replace_modules_for_calibration
 from llmcompressor.modifiers.quantization import QuantizationModifier
-MODEL_ID = "nm-testing/Llama-3.3-70B-Instruct-FP8-block"
 # Load model.
 model = LlamaForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
@@ -139,7 +131,6 @@ The model was evaluated on the OpenLLMv1 leaderboard task, using [lm-evaluation-
     --tasks openllm \
     --write_out \
     --batch_size auto \
-    --output_path $output_path/openllm.json \
     --show_config
   ```
@@ -154,7 +145,6 @@ The model was evaluated on the OpenLLMv1 leaderboard task, using [lm-evaluation-
     --fewshot_as_multiturn \
     --write_out \
     --batch_size auto \
-    --output_path $output_path/leaderboard.json \
     --show_config
   ```

 model = "nm-testing/Llama-3.3-70B-Instruct-FP8-block"
 messages = [
+    {"role": "user", "content": "Explain quantum mechanics clearly and concisely."},
 ]
 outputs = client.chat.completions.create(
 from llmcompressor.modeling import replace_modules_for_calibration
 from llmcompressor.modifiers.quantization import QuantizationModifier
+MODEL_ID = "meta-llama/Llama-3.3-70B-Instruct"
 # Load model.
 model = LlamaForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
     --tasks openllm \
     --write_out \
     --batch_size auto \
     --show_config
   ```
     --fewshot_as_multiturn \
     --write_out \
     --batch_size auto \
     --show_config
   ```