|
``` |
|
model: single_linear |
|
config: Int8DynamicActivationIntxWeightConfig |
|
config version: 1 |
|
torchao version: 0.14.dev |
|
``` |
|
|
|
``` |
|
import torch |
|
import io |
|
|
|
model = torch.nn.Sequential(torch.nn.Linear(32, 256, dtype=torch.bfloat16, device="cuda")) |
|
|
|
from torchao.quantization import Int8DynamicActivationIntxWeightConfig, quantize_ |
|
from torchao.quantization.granularity import PerGroup |
|
|
|
version=1 |
|
|
|
quant_config = Int8DynamicActivationIntxWeightConfig( |
|
weight_dtype=torch.int4, |
|
weight_granularity=PerGroup(32), |
|
version=version |
|
) |
|
quantize_(model, quant_config) |
|
example_inputs = (torch.randn(2, 32, dtype=torch.bfloat16, device="cuda"),) |
|
output = model(*example_inputs) |
|
|
|
# Push to hub |
|
USER_ID = "torchao-testing" |
|
MODEL_NAME = "single-linear" |
|
save_to = f"{USER_ID}/{MODEL_NAME}-Int8DynamicActivationIntxWeightConfig-v{version}-0.14.dev" |
|
|
|
from huggingface_hub import HfApi |
|
api = HfApi() |
|
|
|
buf = io.BytesIO() |
|
torch.save(model.state_dict(), buf) |
|
api.create_repo(save_to, repo_type="model", exist_ok=False) |
|
api.upload_file( |
|
path_or_fileobj=buf, |
|
path_in_repo="model.pt", |
|
repo_id=save_to, |
|
) |
|
|
|
buf = io.BytesIO() |
|
torch.save(example_inputs, buf) |
|
api.upload_file( |
|
path_or_fileobj=buf, |
|
path_in_repo="model_inputs.pt", |
|
repo_id=save_to, |
|
) |
|
|
|
buf = io.BytesIO() |
|
torch.save(output, buf) |
|
api.upload_file( |
|
path_or_fileobj=buf, |
|
path_in_repo="model_output.pt", |
|
repo_id=save_to, |
|
) |
|
``` |