TSEditor / efficiency.py
PeterYu's picture
update
2875fe6
raw
history blame
11.8 kB
import os
import torch
import time
os.environ["WANDB_ENABLED"] = "false"
from engine.solver import Trainer
from data.build_dataloader import build_dataloader
from data.build_dataloader import build_dataloader_cond
from utils.io_utils import load_yaml_config, instantiate_from_config
import warnings
warnings.simplefilter("ignore", UserWarning)
import numpy as np
import pickle
from pathlib import Path
def load_cached_results(cache_dir):
results = {"unconditional": None, "sum_controlled": {}, "anchor_controlled": {}}
for cache_file in cache_dir.glob("*.pkl"):
with open(cache_file, "rb") as f:
key = cache_file.stem
# if key=="unconditional":
# continue
if key == "unconditional":
results["unconditional"] = pickle.load(f)
elif key.startswith("sum_"):
param = key[4:] # Remove 'sum_' prefix
results["sum_controlled"][param] = pickle.load(f)
elif key.startswith("anchor_"):
param = key[7:] # Remove 'anchor_' prefix
results["anchor_controlled"][param] = pickle.load(f)
return results
def save_result(cache_dir, key, subkey, data):
return
if subkey:
filename = f"{key}_{subkey}.pkl"
else:
filename = f"{key}.pkl"
with open(cache_dir / filename, "wb") as f:
pickle.dump(data, f)
class Arguments:
def __init__(self, config_path, gpu=0) -> None:
self.config_path = config_path
# self.config_path = "./config/control/revenue-baseline-sine.yaml"
self.save_dir = (
"../../../data/" + os.path.basename(self.config_path).split(".")[0]
)
self.gpu = gpu
os.makedirs(self.save_dir, exist_ok=True)
self.mode = "infill"
self.missing_ratio = 0.95
self.milestone = 10
import argparse
def parse_args():
parser = argparse.ArgumentParser(description="Controlled Sampling")
parser.add_argument(
"--config_path", type=str, default="./config/modified/energy.yaml"
)
parser.add_argument("--gpu", type=int, default=0)
return parser.parse_args()
def run(run_args):
args = Arguments(run_args.config_path, run_args.gpu)
configs = load_yaml_config(args.config_path)
device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
torch.cuda.set_device(args.gpu)
dl_info = build_dataloader(configs, args)
model = instantiate_from_config(configs["model"]).to(device)
trainer = Trainer(config=configs, args=args, model=model, dataloader=dl_info)
# args.milestone
trainer.load("10")
dataset = dl_info["dataset"]
test_dl_info = build_dataloader_cond(configs, args)
test_dataloader, test_dataset = test_dl_info["dataloader"], test_dl_info["dataset"]
coef = configs["dataloader"]["test_dataset"]["coefficient"]
stepsize = configs["dataloader"]["test_dataset"]["step_size"]
sampling_steps = configs["dataloader"]["test_dataset"]["sampling_steps"]
seq_length, feature_dim = test_dataset.window, test_dataset.var_num
dataset_name = os.path.basename(args.config_path).split(".")[0].split("-")[0]
mapper = {
"sines": "sines",
"revenue": "revenue",
"energy": "energy",
"fmri": "fMRI",
}
gap = seq_length // 5
if seq_length in [96, 192, 384]:
ori_data = np.load(
os.path.join(
"../../../data/train/",str(seq_length),
dataset_name,
"samples",
f'{mapper[dataset_name].replace("sines", "sine")}_norm_truth_{seq_length}_train.npy',
)
)
masks = np.load(
os.path.join(
"../../../data/train/",str(seq_length),
dataset_name,
"samples",
f'{mapper[dataset_name].replace("sines", "sine")}_masking_{seq_length}.npy',
)
)
else:
ori_data = np.load(
os.path.join(
"../../../data/train/",
dataset_name,
"samples",
f"{mapper[dataset_name]}_norm_truth_{seq_length}_train.npy",
)
)
masks = np.load(
os.path.join(
"../../../data/train/",
dataset_name,
"samples",
f"{mapper[dataset_name]}_masking_{seq_length}.npy",
)
)
sample_num, _, _ = masks.shape
# observed = ori_data[:sample_num] * masks
ori_data = ori_data[:sample_num]
sampling_size = min(1000, len(test_dataset), sample_num)
batch_size = 500
print(f"Sampling size: {sampling_size}, Batch size: {batch_size}")
### Cache file path
cache_dir = Path(f"../../../data/cache/{dataset_name}_{seq_length}")
cache_dir.mkdir(exist_ok=True)
# results = load_cached_results(cache_dir)
results = {"unconditional": None, "sum_controlled": {}, "anchor_controlled": {}}
def measure_inference_time(func, *args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
return result, (end_time - start_time)
timing_results = {}
### Unconditional sampling
if results["unconditional"] is None:
print("Generating unconditional data...")
results["unconditional"], timing = measure_inference_time(
trainer.control_sample,
num=sampling_size,
size_every=batch_size,
shape=[seq_length, feature_dim],
model_kwargs={
"gradient_control_signal": {},
"coef": coef,
"learning_rate": stepsize,
},
)
timing_results["unconditional"] = timing / sampling_size
save_result(cache_dir, "unconditional", "", results["unconditional"])
### Different AUC values
auc_weights = [10]
auc_values = [-100, 20, 50, 150] # -200, -150, -100, -50, 0, 20, 30, 50, 100, 150
for auc in auc_values:
for weight in auc_weights:
key = f"auc_{auc}_weight_{weight}"
if key not in results["sum_controlled"]:
print(f"Generating sum controlled data - AUC: {auc}, Weight: {weight}")
results["sum_controlled"][key], timing = measure_inference_time(
trainer.control_sample,
num=sampling_size,
size_every=batch_size,
shape=[seq_length, feature_dim],
model_kwargs={
"gradient_control_signal": {"auc": auc, "auc_weight": weight},
"coef": coef,
"learning_rate": stepsize,
},
)
timing_results[f"sum_controlled_{key}"] = timing / sampling_size
save_result(cache_dir, "sum", key, results["sum_controlled"][key])
### Different AUC weights
auc_weights = [1, 10, 50, 100]
auc_values = [-100]
for auc in auc_values:
for weight in auc_weights:
key = f"auc_{auc}_weight_{weight}"
if key not in results["sum_controlled"]:
print(f"Generating sum controlled data - AUC: {auc}, Weight: {weight}")
results["sum_controlled"][key], timing = measure_inference_time(
trainer.control_sample,
num=sampling_size,
size_every=batch_size,
shape=[seq_length, feature_dim],
model_kwargs={
"gradient_control_signal": {"auc": auc, "auc_weight": weight},
"coef": coef,
"learning_rate": stepsize,
},
)
timing_results[f"sum_controlled_{key}"] = timing / (sampling_size)
save_result(cache_dir, "sum", key, results["sum_controlled"][key])
### Different AUC segments
auc_weights = [10]
auc_values = [150]
auc_average = 10
auc_segments = ((gap, 2 * gap), (2 * gap, 3 * gap), (3 * gap, 4 * gap))
# for auc in auc_values:
# for weight in auc_weights:
# for segment in auc_segments:
auc = auc_values[0]
weight = auc_weights[0]
# segment = auc_segments[0]
for segment in auc_segments:
key = f"auc_{auc}_weight_{weight}_segment_{segment[0]}_{segment[1]}"
if key not in results["sum_controlled"]:
print(
f"Generating sum controlled data - AUC: {auc}, Weight: {weight}, Segment: {segment}"
)
results["sum_controlled"][key], timing = measure_inference_time(
trainer.control_sample,
num=sampling_size,
size_every=batch_size,
shape=[seq_length, feature_dim],
model_kwargs={
"gradient_control_signal": {
"auc": auc_average * (segment[1] - segment[0]), # / seq_length,
"auc_weight": weight,
"segment": [segment],
},
"coef": coef,
"learning_rate": stepsize,
},
)
timing_results[f"sum_controlled_{key}"] = timing / sampling_size
save_result(cache_dir, "sum", key, results["sum_controlled"][key])
# Different anchors
anchor_values = [-0.8, 0.6, 1.0]
anchor_weights = [0.01, 0.01, 0.5, 1.0]
for peak in anchor_values:
for weight in anchor_weights:
key = f"peak_{peak}_weight_{weight}"
if key not in results["anchor_controlled"]:
mask = np.zeros((seq_length, feature_dim), dtype=np.float32)
mask[gap // 2 :: gap, 0] = weight
target = np.zeros((seq_length, feature_dim), dtype=np.float32)
target[gap // 2 :: gap, 0] = peak
print(f"Anchor controlled data - Peak: {peak}, Weight: {weight}")
results["anchor_controlled"][key], timing = measure_inference_time(
trainer.control_sample,
num=sampling_size,
size_every=batch_size,
shape=[seq_length, feature_dim],
model_kwargs={
"gradient_control_signal": {}, # "auc": -50, "auc_weight": 10.0},
"coef": coef,
"learning_rate": stepsize,
},
target=target,
partial_mask=mask,
)
timing_results[f"anchor_controlled_{key}"] = timing / sampling_size
save_result(cache_dir, "anchor", key, results["anchor_controlled"][key])
### Rerun Unconditional sampling
if results["unconditional"] is None:
print("Generating unconditional data...")
results["unconditional"], timing = measure_inference_time(
trainer.control_sample,
num=sampling_size,
size_every=batch_size,
shape=[seq_length, feature_dim],
model_kwargs={
"gradient_control_signal": {},
"coef": coef,
"learning_rate": stepsize,
},
)
timing_results["unconditional"] = timing / sampling_size
save_result(cache_dir, "unconditional", "", results["unconditional"])
# After all sampling is done, print timing results
print("\nAverage Inference Time per Sample:")
print("-" * 40)
for key, time_per_sample in timing_results.items():
print(f"{key}: {time_per_sample:.4f} seconds")
# return results, dataset_name, seq_length
if __name__ == "__main__":
args = parse_args()
run(args)