|
import os |
|
import numpy as np |
|
from sklearn.decomposition import PCA |
|
import joblib |
|
from sklearn.preprocessing import StandardScaler |
|
|
|
|
|
data_dir = "./train_data" |
|
from tqdm.notebook import tqdm |
|
|
|
data_list = [] |
|
for file in tqdm(os.listdir(data_dir)): |
|
if file.endswith(".npz"): |
|
hsi_path = os.path.join(data_dir, file) |
|
with np.load(hsi_path) as npz: |
|
arr = np.ma.MaskedArray(**npz) |
|
data_list.append(arr.reshape(150, -1).transpose()) |
|
|
|
|
|
x = np.vstack(data_list) |
|
print("\n\n") |
|
print(x.shape) |
|
|
|
|
|
scaler = StandardScaler() |
|
X_scaled = scaler.fit_transform(x) |
|
|
|
|
|
pca = PCA(n_components=16) |
|
pca.fit(X_scaled) |
|
|
|
|
|
joblib.dump({"scaler": scaler, "pca": pca}, "pca_pipeline.pkl") |
|
|