|
import os
|
|
|
|
import torch
|
|
import open_clip
|
|
|
|
import numpy as np
|
|
from sklearn.linear_model import LogisticRegression
|
|
from torchvision.datasets import CIFAR100
|
|
from tqdm import tqdm
|
|
from joblib import dump, load
|
|
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
|
|
import torchvision.transforms as transforms
|
|
|
|
import torchvision
|
|
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
from PIL import Image
|
|
from torch.utils.data import Dataset, DataLoader
|
|
import pickle
|
|
|
|
class PHASE(Dataset):
|
|
"""PHASE dataset."""
|
|
|
|
def __init__(self, csv_file, root_dir, transform=None, resolution=224):
|
|
"""
|
|
Arguments:
|
|
csv_file (string): Path to the csv file with annotations.
|
|
root_dir (string): Directory with all the images.
|
|
transform (callable, optional): Optional transform to be applied
|
|
on a sample.
|
|
"""
|
|
self.annotations = pd.read_csv(csv_file, sep=' ', header=None)
|
|
|
|
self.root_dir = root_dir
|
|
self.transform = transform
|
|
self.base_transforms = Compose([
|
|
Resize((resolution, resolution), interpolation=Image.BICUBIC)
|
|
])
|
|
|
|
def __len__(self):
|
|
return len(self.annotations)
|
|
|
|
def __getitem__(self, idx):
|
|
if torch.is_tensor(idx):
|
|
idx = idx.tolist()
|
|
|
|
img_name = os.path.join(self.root_dir,
|
|
self.annotations.iloc[idx, 0])
|
|
image = Image.open(img_name).convert('RGB')
|
|
label = self.annotations.iloc[idx, 1]
|
|
|
|
image = self.base_transforms(image)
|
|
|
|
if self.transform:
|
|
image = self.transform(image)
|
|
|
|
sample = {'image': image, 'label': label}
|
|
|
|
|
|
return image, label
|
|
|
|
|
|
class FACET(Dataset):
|
|
"""Face Landmarks dataset."""
|
|
|
|
def __init__(self, csv_file, root_dir, transform=None):
|
|
"""
|
|
Arguments:
|
|
csv_file (string): Path to the csv file with annotations.
|
|
root_dir (string): Directory with all the images.
|
|
transform (callable, optional): Optional transform to be applied
|
|
on a sample.
|
|
"""
|
|
self.annotations = pd.read_csv(csv_file, sep=' ', header=None)
|
|
self.root_dir = root_dir
|
|
self.transform = transform
|
|
|
|
def __len__(self):
|
|
return len(self.annotations)
|
|
|
|
def __getitem__(self, idx):
|
|
if torch.is_tensor(idx):
|
|
idx = idx.tolist()
|
|
|
|
img_name = os.path.join(self.root_dir,
|
|
self.annotations.iloc[idx, 0])
|
|
image = Image.open(img_name).convert('RGB')
|
|
label = self.annotations.iloc[idx, 1]
|
|
|
|
base_transforms = Compose([
|
|
Resize((224, 224), interpolation=Image.BICUBIC)
|
|
])
|
|
|
|
image = base_transforms(image)
|
|
|
|
if self.transform:
|
|
image = self.transform(image)
|
|
|
|
return image, label
|
|
|
|
|
|
class MORPH(Dataset):
|
|
"""MORPH dataset."""
|
|
|
|
def __init__(self, csv_file, root_dir, transform=None):
|
|
"""
|
|
Arguments:
|
|
csv_file (string): Path to the csv file with annotations.
|
|
root_dir (string): Directory with all the images.
|
|
transform (callable, optional): Optional transform to be applied
|
|
on a sample.
|
|
"""
|
|
self.annotations = pd.read_csv(csv_file, sep=',', header=0)
|
|
self.root_dir = root_dir
|
|
self.transform = transform
|
|
|
|
def __len__(self):
|
|
return len(self.annotations)
|
|
|
|
def __getitem__(self, idx):
|
|
if torch.is_tensor(idx):
|
|
idx = idx.tolist()
|
|
|
|
img_name = os.path.join(self.annotations.iloc[idx]["filepath"])
|
|
|
|
image = Image.open(f"{img_name}").convert('RGB')
|
|
label = self.annotations.iloc[idx]["gender"]
|
|
|
|
base_transforms = Compose([
|
|
Resize((224, 224), interpolation=Image.BICUBIC)
|
|
])
|
|
|
|
image = base_transforms(image)
|
|
|
|
if self.transform:
|
|
image = self.transform(image)
|
|
|
|
|
|
return image, label
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
resnet_model = torchvision.models.resnet50(pretrained=False)
|
|
resnet_model.fc = torch.nn.Identity()
|
|
resnet_model.eval()
|
|
features_root = "features_facet_training_set"
|
|
cls_root = "classifiers_facet"
|
|
models = (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"vit_b_16_cc3m_50_30ep",
|
|
|
|
"vit_b_16_cc3m_original",
|
|
"vit_b_16_cc3m_50_30ep_difficult_batches",
|
|
|
|
|
|
|
|
"rn50_cc3m_mix_000",
|
|
"rn50_cc3m_mix_100",
|
|
)
|
|
|
|
|
|
weights = (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"/home/kis/Desktop/rhome/kis/code/open_clip_latest/open_clip/logs/2024_08_27-11_48_49-model_ViT-B-16-lr_0.001-b_410-j_8-p_amp/checkpoints/epoch_30.pt",
|
|
|
|
"/home/kis/Desktop/rhome/kis/code/open_clip/logs/2024_07_12-19_17_23-model_ViT-B-16-lr_0.001-b_410-j_4-p_amp/checkpoints/epoch_30.pt",
|
|
"/home/kis/Desktop/rhome/kis/code/open_clip_latest/open_clip/logs/2024_09_15-14_07_26-model_ViT-B-16-lr_0.001-b_410-j_8-p_amp/checkpoints/epoch_30.pt",
|
|
|
|
|
|
"/home/kis/code/models/models/cc3m_mix_000/epoch_50.pt",
|
|
"/home/kis/code/models/models/cc3m_mix_100/epoch_50.pt",
|
|
)
|
|
|
|
|
|
|
|
|
|
model_idx=0
|
|
model_type = 'transformer'
|
|
for model_name, weight in zip(models, weights):
|
|
print( "\n\n",model_name)
|
|
|
|
preprocess = None
|
|
clip_like = False
|
|
if model_name == 'dino_v1_cnn':
|
|
model = torch.hub.load('facebookresearch/dino:main', 'dino_resnet50')
|
|
model.fc = torch.nn.Identity()
|
|
model.eval()
|
|
model_type = 'cnn'
|
|
|
|
elif model_name == "vit_b_16_cc3m_50":
|
|
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained=weight)
|
|
elif model_name == "vit_b_16_cc3m_50_28ep":
|
|
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained=weight)
|
|
elif model_name == "vit_b_16_cc3m_50_30ep_difficult_batches":
|
|
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained=weight)
|
|
elif model_name == "vit_b_16_cc3m_50_30ep":
|
|
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained=weight)
|
|
elif model_name == "rn50_cc3m_mix_000":
|
|
model, _, preprocess = open_clip.create_model_and_transforms('RN50', pretrained=weight)
|
|
elif model_name == "vit_b_16_cc3m_future_models":
|
|
model, _, preprocess = open_clip.create_model_and_transforms('RN50', pretrained=weight)
|
|
elif model_name == "vit_b_16_cc3m_original":
|
|
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained=weight)
|
|
elif model_name == 'dino_v1_vit_s_16':
|
|
model = torch.hub.load('facebookresearch/dino:main', 'dino_vits16')
|
|
elif model_name == 'dino_v1_vit_b_16':
|
|
model = torch.hub.load('facebookresearch/dino:main', 'dino_vitb16')
|
|
elif model_name == 'dino_v2_vit_s_14':
|
|
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')
|
|
elif model_name == 'dino_v2_vit_b_14':
|
|
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitb14')
|
|
elif model_name == 'dino_v2_vit_l_14':
|
|
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitl14')
|
|
elif model_name == 'dino_v2_vit_g_14':
|
|
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitg14')
|
|
elif model_name == "vit_b_16_400m":
|
|
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained='laion400m_e32')
|
|
clip_like = True
|
|
elif model_name == "vit_b_16_2b":
|
|
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained='laion2b_s34b_b88k')
|
|
clip_like = True
|
|
elif model_name == "vit_b_32_400m":
|
|
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion400m_e32')
|
|
clip_like = True
|
|
elif model_name == "vit_b_32_2b":
|
|
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k')
|
|
clip_like = True
|
|
elif model_name == "vit_l_14_400m":
|
|
model, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='laion400m_e32')
|
|
clip_like = True
|
|
elif model_name == "vit_l_14_2b":
|
|
model, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='laion2b_s32b_b82k')
|
|
clip_like = True
|
|
elif "resnet" in model_name:
|
|
model = torch.hub.load('pytorch/vision:v0.10.0', model_name, pretrained=True)
|
|
model.fc = torch.nn.Identity()
|
|
model.eval()
|
|
model_type = 'cnn'
|
|
elif "vit" in model_name:
|
|
model = torch.hub.load('pytorch/vision', model_name, weights='IMAGENET1K_V1')
|
|
model.heads = torch.nn.Identity()
|
|
elif "ViT" in model_name:
|
|
model, preprocess = clip.load(model_name, device)
|
|
clip_like = True
|
|
elif "RN" in model_name:
|
|
model, preprocess = clip.load(model_name, device)
|
|
model.visual.attnpool = torch.nn.AdaptiveAvgPool2d((1,1))
|
|
clip_like = True
|
|
model_type = 'cnn'
|
|
else:
|
|
w = torch.load(weight)
|
|
model = torchvision.models.resnet50(pretrained=False)
|
|
model.fc = torch.nn.Identity()
|
|
model.eval()
|
|
model.load_state_dict(update_keys(w['state_dict']), strict=True)
|
|
model_type = 'cnn'
|
|
|
|
model.cuda()
|
|
|
|
|
|
if 'simclr' in model_name:
|
|
img_norm_cfg = dict(mean=[0., 0., 0.], std=[1., 1., 1.])
|
|
else:
|
|
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
|
|
|
|
|
transform_test = transforms.Compose([
|
|
transforms.CenterCrop((224, 224)),
|
|
transforms.ToTensor(),
|
|
transforms.Normalize(**img_norm_cfg)
|
|
])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
features_root = "../../features/PHASE_EMOTIONS/"
|
|
|
|
|
|
train_dataset = PHASE(csv_file=f'/home/kis/Desktop/rhome/kis/datasets/phase/phase_annotations/train_annotations_emotion.txt',
|
|
root_dir='/home/kis/Desktop/rhome/kis/datasets/phase/images/train_bb/',
|
|
transform=transform_test
|
|
)
|
|
val_dataset = PHASE(csv_file=f'/home/kis/Desktop/rhome/kis/datasets/phase/phase_annotations/val_annotations_emotion.txt',
|
|
root_dir='/home/kis/Desktop/rhome/kis/datasets/phase/images/val_bb/',
|
|
transform=transform_test
|
|
)
|
|
|
|
|
|
|
|
def get_features(dataset):
|
|
all_features = []
|
|
all_labels = []
|
|
|
|
with torch.no_grad():
|
|
for images, labels in tqdm(DataLoader(dataset, batch_size=512)):
|
|
if model_type == 'cnn':
|
|
features = model(images.to(device))
|
|
else:
|
|
features = model.encode_image(images.to(device))
|
|
|
|
print(features.shape)
|
|
all_features.append(features)
|
|
all_labels.append(labels)
|
|
|
|
return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy()
|
|
|
|
|
|
|
|
feat_path = Path(f"{features_root}/{model_name}_features.pkl")
|
|
if feat_path.exists():
|
|
print("Already extracted!")
|
|
with open(f"{features_root}/{model_name}_features.pkl", 'rb') as f:
|
|
features = pickle.load(f)
|
|
with open(f"{features_root}/{model_name}_labels.pkl", 'rb') as f:
|
|
labels = pickle.load(f)
|
|
with open(f"{features_root}/{model_name}_features_val.pkl", 'rb') as f:
|
|
features_val = pickle.load(f)
|
|
with open(f"{features_root}/{model_name}_labels_val.pkl", 'rb') as f:
|
|
labels_val = pickle.load(f)
|
|
|
|
else:
|
|
features, labels = get_features(train_dataset)
|
|
with open(f"{features_root}/{model_name}_features.pkl", 'wb') as f:
|
|
pickle.dump(features, f)
|
|
|
|
with open(f"{features_root}/{model_name}_labels.pkl", 'wb') as f:
|
|
pickle.dump(labels, f)
|
|
|
|
features_val, labels_val = get_features(val_dataset)
|
|
with open(f"{features_root}/{model_name}_features_val.pkl", 'wb') as f:
|
|
pickle.dump(features_val, f)
|
|
|
|
with open(f"{features_root}/{model_name}_labels_val.pkl", 'wb') as f:
|
|
pickle.dump(labels_val, f)
|
|
|
|
|
|
print("Done!")
|
|
|
|
|
|
for i in range(1, 10):
|
|
c = i * 0.1
|
|
classifier = LogisticRegression(random_state=0, C=c, max_iter=10000, verbose=0, class_weight="balanced")
|
|
classifier.fit(features, labels)
|
|
|
|
predictions = classifier.predict(features_val)
|
|
dump(classifier, f'{features_root}/{model_name}_logistic_regression_classifier_c_{c}.joblib')
|
|
with open(f'{features_root}/{model_name}_predictions_c_{c}.pkl', 'wb') as f:
|
|
pickle.dump(predictions, f)
|
|
|
|
pd.DataFrame(predictions).to_csv(f"{features_root}/{model_name}_predictions_c_{c}.txt")
|
|
accuracy = np.mean((labels_val == predictions).astype(float)) * 100.
|
|
print(f"C={c}, Accuracy = {accuracy:.3f}") |