{ "cells": [ { "cell_type": "code", "execution_count": 11, "id": "11ab9ea6-1c5b-4f9b-a6ea-1bc75be56108", "metadata": {}, "outputs": [], "source": [ "import torch\n", "from tqdm import tqdm\n", "import os\n", "import warnings\n", "\n", "import joblib\n", "import numpy as np\n", "import pandas as pd\n", "import torch\n", "from sklearn.metrics import mean_absolute_error\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler\n", "from torch import nn\n", "from torch.optim import AdamW\n", "from torch.optim.lr_scheduler import LinearLR\n", "from torch.utils.data import DataLoader, Dataset\n", "from tqdm import tqdm\n", "from transformers import (\n", " AutoConfig,\n", " AutoModel,\n", " AutoTokenizer,\n", " BertConfig,\n", " BertModel,\n", " BertTokenizerFast,\n", " PreTrainedModel,\n", ")\n", "from transformers.activations import ACT2FN\n", "\n", "def global_ap(x):\n", " return torch.mean(x.view(x.size(0), x.size(1), -1), dim=1)\n", "\n", "class SimSonEncoder(nn.Module):\n", " def __init__(self, config: BertConfig, max_len: int, dropout: float = 0.1):\n", " super(SimSonEncoder, self).__init__()\n", " self.config = config\n", " self.max_len = max_len\n", " \n", " self.bert = BertModel(config, add_pooling_layer=False)\n", " \n", " self.linear = nn.Linear(config.hidden_size, max_len)\n", " self.dropout = nn.Dropout(dropout)\n", " \n", " def forward(self, input_ids, attention_mask=None):\n", " if attention_mask is None:\n", " attention_mask = input_ids.ne(0)\n", " \n", " outputs = self.bert(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask\n", " )\n", " \n", " hidden_states = outputs.last_hidden_state\n", " \n", " hidden_states = self.dropout(hidden_states)\n", " \n", " pooled = global_ap(hidden_states)\n", " \n", " out = self.linear(pooled)\n", " \n", " return out\n", "\n", "class SimSonClassifier(nn.Module):\n", " def __init__(self, encoder: SimSonEncoder, num_labels: int, dropout=0.1):\n", " super(SimSonClassifier, self).__init__()\n", " self.encoder = encoder\n", " self.clf = nn.Linear(encoder.max_len, num_labels)\n", " self.relu = nn.ReLU()\n", " self.dropout = nn.Dropout(dropout)\n", "\n", " def forward(self, input_ids, attention_mask=None, labels=None):\n", " x = self.encoder(input_ids, attention_mask)\n", " x = self.relu(self.dropout(x))\n", " x = self.clf(x)\n", " return x" ] }, { "cell_type": "code", "execution_count": 8, "id": "ce760993-fbef-4546-8b2c-1e7a722ad374", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import torch\n", "from torch.utils.data import Dataset, DataLoader, Sampler\n", "import torch.nn as nn\n", "from torch.optim import AdamW\n", "from torch.optim.lr_scheduler import LinearLR\n", "from tqdm import tqdm\n", "\n", "# 1. BINNING, SAMPLING, SAMPLE WEIGHTING\n", "def compute_multitarget_sample_weights(labels_unscaled, bins4, bins5):\n", " # Each returns shape (N,)\n", " inds4 = np.digitize(labels_unscaled[:, 0], bins4, right=False) - 1\n", " inds5 = np.digitize(labels_unscaled[:, 1], bins5, right=False) - 1\n", " freq4 = np.bincount(inds4, minlength=len(bins4))\n", " freq5 = np.bincount(inds5, minlength=len(bins5))\n", " w4 = 1.0 / (freq4[inds4] + 1e-8)\n", " w5 = 1.0 / (freq5[inds5] + 1e-8)\n", " main_weights = np.maximum(w4, w5) # Or average, or sum as suits\n", " main_weights /= main_weights.mean() # Normalize for stability\n", " return main_weights # shape (N_samples,)\n", "\n", "\n", "class TargetedSampler(Sampler):\n", " \"\"\"\n", " Enforces a proportion of 'high value' samples for either of the main labels in every batch.\n", " \"\"\"\n", " def __init__(self, inds4, inds5, high_bins4, high_bins5, batch_size, high_frac=0.3, shuffle=True):\n", " # indices for which either label 4 or label 5 is in a 'high' bin\n", " high_mask = (inds4 >= high_bins4) | (inds5 >= high_bins5)\n", " self.high_indices = np.where(high_mask)[0]\n", " self.low_indices = np.where(~high_mask)[0]\n", " self.batch_size = batch_size\n", " self.high_count = int(batch_size * high_frac)\n", " self.low_count = batch_size - self.high_count\n", " self.shuffle = shuffle\n", " \n", " def __iter__(self):\n", " high = np.copy(self.high_indices)\n", " low = np.copy(self.low_indices)\n", " if self.shuffle:\n", " np.random.shuffle(high)\n", " np.random.shuffle(low)\n", " hi_ptr, low_ptr = 0, 0\n", " while hi_ptr < len(high) or low_ptr < len(low):\n", " batch_high = high[hi_ptr: hi_ptr+self.high_count]\n", " batch_low = low[low_ptr: low_ptr+self.low_count]\n", " if len(batch_high) < self.high_count:\n", " np.random.shuffle(high)\n", " hi_ptr = 0\n", " batch_high = high[hi_ptr: hi_ptr+self.high_count]\n", " if len(batch_low) < self.low_count:\n", " np.random.shuffle(low)\n", " low_ptr = 0\n", " batch_low = low[low_ptr: low_ptr+self.low_count]\n", " batch = np.concatenate([batch_high, batch_low])\n", " np.random.shuffle(batch)\n", " yield batch.tolist()\n", " hi_ptr += self.high_count\n", " low_ptr += self.low_count\n", " \n", " def __len__(self):\n", " return (len(self.high_indices) + len(self.low_indices)) // self.batch_size\n", "\n", "class SMILESDataset(torch.utils.data.Dataset):\n", " def __init__(self, smiles_list, labels, sample_weights, tokenizer, max_length=256):\n", " self.smiles_list = smiles_list\n", " self.labels = labels # shape (N, 6), already scaled\n", " self.tokenizer = tokenizer\n", " self.max_length = max_length\n", " self.sample_weights = sample_weights\n", "\n", " def __len__(self):\n", " return len(self.smiles_list)\n", "\n", " def __getitem__(self, idx):\n", " smiles = self.tokenizer.cls_token + self.smiles_list[idx]\n", " encoding = self.tokenizer(\n", " smiles,\n", " truncation=True,\n", " padding='max_length',\n", " max_length=self.max_length,\n", " return_tensors='pt'\n", " )\n", " return {\n", " 'input_ids': encoding['input_ids'].flatten(),\n", " 'attention_mask': encoding['attention_mask'].flatten(),\n", " 'labels': torch.tensor(self.labels[idx], dtype=torch.float32),\n", " 'weight': torch.tensor(self.sample_weights[idx], dtype=torch.float32),\n", " 'index': torch.tensor(idx, dtype=torch.long)\n", " }\n", "\n", "\n", "def calculate_weighted_loss(predictions, labels, weights):\n", " \"\"\"\n", " Calculate weighted loss for two labels with masking\n", " \n", " Args:\n", " predictions: Model outputs (batch_size, 6)\n", " labels: Ground truth labels (batch_size, 6)\n", " label_mask: Mask for valid labels (batch_size, 6)\n", " label_weights: Weights for each label (6,)\n", " \"\"\"\n", " loss_fn = nn.MSELoss(reduction='none')\n", " \n", " # Calculate per-sample, per-label losses\n", " losses = loss_fn(predictions, labels) # Shape: (batch_size, 2)\n", " return losses.mean()\n", "\n", "def stratified_metrics(preds, trues, bins, scalers):\n", " # Only for last two labels\n", " results = {}\n", " for li, label_col in enumerate([4, 5]):\n", " unscaled_pred = scalers[label_col].inverse_transform(preds[:,label_col].reshape(-1,1)).flatten()\n", " unscaled_true = scalers[label_col].inverse_transform(trues[:,label_col].reshape(-1,1)).flatten()\n", " bin_idx = np.digitize(unscaled_true, bins[li])\n", " for i in range(len(bins[li])+1):\n", " in_bin = bin_idx == i\n", " if np.sum(in_bin) == 0:\n", " continue\n", " bin_mae = np.mean(np.abs(unscaled_pred[in_bin] - unscaled_true[in_bin]))\n", " bin_r2 = 1 - (np.mean((unscaled_pred[in_bin] - unscaled_true[in_bin])**2) /\n", " (np.var(unscaled_true[in_bin]) + 1e-8))\n", " results[f'label{label_col}_bin{i}_count'] = np.sum(in_bin)\n", " results[f'label{label_col}_bin{i}_mae'] = bin_mae\n", " return results\n", "\n", "# 5. MAIN TRAIN/VAL LOOP WITH TARGETED SAMPLING AND STRATIFIED EVALUATION\n", "def run_training(smiles_train, smiles_test, labels_train, labels_test, model, tokenizer, scalers,\n", " num_epochs=5, learning_rate=1e-5, batch_size=256, validation_steps=500):\n", " # 1. Bins for columns 4 & 5 using **unscaled train-data**\n", " bins_label4 = create_bins(train['unscaled_CO2'], n_bins=10)\n", " bins_label5 = create_bins(train['unscaled_CH4'], n_bins=10)\n", " bins = [bins_label4, bins_label5]\n", " # 2. Bin indicators for each sample in train\n", " inds4 = np.digitize(train['unscaled_CO2'], bins_label4, right=False) - 1\n", " inds5 = np.digitize(train['unscaled_CH4'], bins_label5, right=False) - 1\n", " # 3. Choose high-bin threshold (e.g. top bin, or top 2 bins as \"high\"), adjust as needed\n", " high_bins4 = len(bins_label4) - 1\n", " high_bins5 = len(bins_label5) - 1\n", " # 4. Compute multitarget weights (max rarity of either label-of-interest, UNscaled)\n", " sample_weights = compute_multitarget_sample_weights(\n", " train[['unscaled_CO2', 'unscaled_CH4']].values, bins_label4, bins_label5)\n", " val_sample_weights = compute_multitarget_sample_weights(\n", " test[['unscaled_CO2', 'unscaled_CH4']].values, bins_label4, bins_label5)\n", " # 5. Dataset and batch sampler\n", " targeted_sampler = TargetedSampler(inds4, inds5, high_bins4, high_bins5, batch_size, high_frac=0.3)\n", " train_dataset = SMILESDataset(smiles_train, labels_train, sample_weights, tokenizer)\n", " val_dataset = SMILESDataset(smiles_test, labels_test, val_sample_weights, tokenizer)\n", " train_loader = DataLoader(train_dataset, batch_sampler=targeted_sampler, num_workers=4)\n", " val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)\n", " # 6. Model, optimizer, scheduler\n", " device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", " model.to(device)\n", " optimizer = AdamW(model.parameters(), lr=learning_rate)\n", " total_steps = len(train_loader)*3\n", " scheduler = LinearLR(optimizer, start_factor=1.0, end_factor=0.1, total_iters=total_steps)\n", " best_val_loss = float('inf')\n", " best_state = None\n", " steps_no_improve, patience = 0, 10\n", " global_step, running_train_loss, train_steps_count = 0, 0, 0\n", "\n", " for epoch in range(num_epochs):\n", " print(f\"Epoch {epoch+1}/{num_epochs}\")\n", " model.train()\n", " pbar = tqdm(train_loader, desc='Training', total=len(train_loader) * num_epochs)\n", " for batch in pbar:\n", " input_ids = batch['input_ids'].to(device)\n", " attention_mask = batch['attention_mask'].to(device)\n", " labels = batch['labels'].to(device)\n", " weights = batch['weight'].to(device)\n", " optimizer.zero_grad()\n", " outputs = model(input_ids=input_ids, attention_mask=attention_mask)\n", " loss = calculate_weighted_loss(outputs, labels, weights)\n", " loss.backward()\n", " torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)\n", " optimizer.step(); scheduler.step()\n", " running_train_loss += loss.item(); train_steps_count += 1; global_step += 1\n", " pbar.set_postfix(loss=f'{loss.item():.3f}')\n", " if global_step % validation_steps == 0:\n", " avg_train_loss = running_train_loss / train_steps_count\n", " print(f\"Step {global_step}: Mean Weighted Train Loss: {avg_train_loss:.4f}\")\n", " running_train_loss = 0; train_steps_count = 0\n", " # Validation:\n", " all_preds, all_trues, all_weights = [], [], []\n", " model.eval()\n", " with torch.no_grad():\n", " for vb in val_loader:\n", " vi = vb['input_ids'].to(device)\n", " va = vb['attention_mask'].to(device)\n", " vl = vb['labels'].to(device)\n", " vw = vb['weight'].to(device)\n", " out = model(input_ids=vi, attention_mask=va)\n", " all_preds.append(out.cpu()); all_trues.append(vl.cpu()); all_weights.append(vw.cpu())\n", " preds = torch.cat(all_preds).numpy()\n", " trues = torch.cat(all_trues).numpy()\n", " weights = torch.cat(all_weights).numpy()\n", " val_loss = calculate_weighted_loss(torch.tensor(preds), torch.tensor(trues), torch.tensor(weights)).item()\n", " print(f\"Weighted Val MSE (scaled): {val_loss:.4f}\")\n", " metrics = stratified_metrics(preds, trues, bins, scalers)\n", " for k, v in metrics.items():\n", " print(f\"{k}: {v:.4f}\")\n", " if val_loss < best_val_loss:\n", " best_val_loss = val_loss; best_state = model.state_dict().copy(); steps_no_improve = 0\n", " torch.save(model.state_dict(), '/home/jovyan/simson_training_bolgov/regression/better_regression_states/best_state.bin')\n", " print(f\"New best val_loss: {best_val_loss:.4f}\")\n", " else:\n", " steps_no_improve += 1\n", " print(f'Patience meter: {steps_no_improve} out of {patience}')\n", " if steps_no_improve >= patience:\n", " print(f\"Early stopping at step {global_step}\")\n", " if best_state: model.load_state_dict(best_state)\n", " return\n", " model.train()\n", " if best_state: model.load_state_dict(best_state)\n", " print(f\"Training completed, best weighted val_loss: {best_val_loss:.4f}\")\n" ] }, { "cell_type": "code", "execution_count": 9, "id": "12a2b8c3-2c4d-4b1b-8cc7-930c9fe68fd7", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "df = pd.read_csv('/home/jovyan/simson_training_bolgov/regression/PI_Tg_P308K_synth_db_chem.csv')\n", "df['unscaled_CO2'] = df['CO2'].copy()\n", "df['unscaled_CH4'] = df['CH4'].copy()\n", "targets = ['Tg', 'He', 'N2', 'O2', 'CH4', 'CO2']" ] }, { "cell_type": "code", "execution_count": 18, "id": "9946f5cd-3683-49db-8535-393cb04140ce", "metadata": {}, "outputs": [], "source": [ "from transformers import AutoTokenizer\n", "tokenizer_path = 'DeepChem/ChemBERTa-77M-MTR'\n", "tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)\n", "\n", "# Only the hidden size is slightly larger, everything else is the same\n", "config = BertConfig(\n", " vocab_size=tokenizer.vocab_size,\n", " hidden_size=768,\n", " num_hidden_layers=4,\n", " num_attention_heads=12,\n", " intermediate_size=2048,\n", " max_position_embeddings=512\n", " )\n", "\n", "simson_params = torch.load('/home/jovyan/simson_training_bolgov/regression/actual_encoder_state.pkl', weights_only=False)\n", "\n", "backbone = SimSonEncoder(config=config, max_len=512)\n", "backbone.load_state_dict(simson_params)\n", "\n", "\n", "model = SimSonClassifier(encoder=backbone, num_labels=len(targets))\n", "\n", "for param in model.encoder.parameters():\n", " param.requires_grad = False" ] }, { "cell_type": "code", "execution_count": 14, "id": "903489f0-9039-4504-894e-6739b4a15371", "metadata": {}, "outputs": [], "source": [ "def create_stratified_splits_regression(\n", " df,\n", " label_cols,\n", " n_bins=10,\n", " val_frac=0.05,\n", " seed=42\n", "):\n", " \n", " values = df[label_cols].values\n", " # Each label gets its own bins, based on the overall distribution\n", " bins = [np.unique(np.quantile(values[:,i], np.linspace(0, 1, n_bins+1))) for i in range(len(label_cols))]\n", " # Assign each row to a bin for each label\n", " inds = [\n", " np.digitize(values[:,i], bins[i][1:-1], right=False) # exclude leftmost/rightmost for in-bin, avoids all bin edges as bins\n", " for i in range(len(label_cols))\n", " ]\n", " # Combine into a single integer stratification variable (tuple or max or sum...)\n", " strat_col = np.maximum.reduce(inds) # This ensures high bin in one = high bin overall\n", " # Use sklearn's train_test_split with stratify\n", " train_idx, val_idx = train_test_split(\n", " df.index.values,\n", " test_size=val_frac,\n", " random_state=seed,\n", " shuffle=True,\n", " stratify=strat_col\n", " )\n", " train = df.loc[train_idx].reset_index(drop=True)\n", " val = df.loc[val_idx].reset_index(drop=True)\n", " return train, val\n", "\n", "\n", "# For your use case:\n", "train, test = create_stratified_splits_regression(\n", " df,\n", " label_cols=['unscaled_CO2', 'unscaled_CH4'], # or actual column names\n", " n_bins=10,\n", " val_frac=0.05,\n", " seed=42\n", ")" ] }, { "cell_type": "code", "execution_count": 15, "id": "00c271f1-bd44-457d-9a0e-7b221871ab78", "metadata": {}, "outputs": [], "source": [ "scalers = []\n", "\n", "for target in targets:\n", " target_scaler = StandardScaler()\n", " train[target] = target_scaler.fit_transform(train[target].to_numpy().reshape(-1, 1))\n", " test[target] = target_scaler.transform(test[target].to_numpy().reshape(-1, 1))\n", " \n", " scalers.append(target_scaler)\n", "\n", "smiles_train = train['Smiles']\n", "smiles_test = test['Smiles']\n", "\n", "labels_train = train[targets].values\n", "labels_test = test[targets].values" ] }, { "cell_type": "code", "execution_count": 16, "id": "01ebce4a-9ac0-4527-a9bd-8d13913f15e3", "metadata": {}, "outputs": [], "source": [ "def create_bins(target_values, n_bins=5, strategy='percentile'):\n", " \"\"\"\n", " Create bins for a target based on the specified strategy.\n", " - 'percentile' creates approximately equal-sized groups\n", " - 'uniform' creates equal-width bins\n", " Returns:\n", " bin_edges: array of length n_bins+1\n", " \"\"\"\n", " target_values = target_values[~np.isnan(target_values)]\n", " if strategy == 'percentile':\n", " return np.percentile(target_values, np.linspace(0, 100, n_bins+1))\n", " else:\n", " return np.linspace(np.min(target_values), np.max(target_values), n_bins+1)" ] }, { "cell_type": "code", "execution_count": null, "id": "ac00906e-e22a-4f50-94e7-650acf5eec1f", "metadata": {}, "outputs": [], "source": [ "# model.load_state_dict(torch.load('/home/jovyan/simson_training_bolgov/regression/regression_simson.pth'))" ] }, { "cell_type": "code", "execution_count": null, "id": "4405c601-f006-4eeb-989e-fb35dd5349ba", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/6\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Training: 4%|▌ | 6499/149778 [21:21<7:51:15, 5.07it/s, loss=0.414]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Step 6500: Mean Weighted Train Loss: 34.2319\n", "Weighted Val MSE (scaled): 0.1242\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Training: 4%|▍ | 6500/149778 [25:38<3067:56:23, 77.08s/it, loss=0.414]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "label4_bin0_count: 1269.0000\n", "label4_bin0_mae: 2.4281\n", "label4_bin1_count: 208626.0000\n", "label4_bin1_mae: 0.8366\n", "label4_bin2_count: 53528.0000\n", "label4_bin2_mae: 1.2479\n", "label4_bin3_count: 37339.0000\n", "label4_bin3_mae: 2.0683\n", "label4_bin4_count: 21793.0000\n", "label4_bin4_mae: 3.2345\n", "label4_bin5_count: 8194.0000\n", "label4_bin5_mae: 5.3938\n", "label4_bin6_count: 3423.0000\n", "label4_bin6_mae: 8.1634\n", "label4_bin7_count: 1506.0000\n", "label4_bin7_mae: 12.3689\n", "label4_bin8_count: 610.0000\n", "label4_bin8_mae: 28.5760\n", "label4_bin9_count: 60.0000\n", "label4_bin9_mae: 29.3493\n", "label5_bin1_count: 299.0000\n", "label5_bin1_mae: 134.7964\n", "label5_bin2_count: 1068.0000\n", "label5_bin2_mae: 116.7105\n", "label5_bin3_count: 1998.0000\n", "label5_bin3_mae: 114.5267\n", "label5_bin4_count: 4626.0000\n", "label5_bin4_mae: 128.3239\n", "label5_bin5_count: 9057.0000\n", "label5_bin5_mae: 140.8095\n", "label5_bin6_count: 12853.0000\n", "label5_bin6_mae: 146.8567\n", "label5_bin7_count: 17785.0000\n", "label5_bin7_mae: 156.6570\n", "label5_bin8_count: 23487.0000\n", "label5_bin8_mae: 177.3546\n", "label5_bin9_count: 32135.0000\n", "label5_bin9_mae: 219.6385\n", "label5_bin10_count: 190443.0000\n", "label5_bin10_mae: 267.8157\n", "label5_bin11_count: 42597.0000\n", "label5_bin11_mae: 1224.3250\n", "New best val_loss: 0.1242\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Training: 9%|█ | 12999/149778 [48:04<7:49:37, 4.85it/s, loss=0.321]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Step 13000: Mean Weighted Train Loss: 0.3521\n", "Weighted Val MSE (scaled): 0.1131\n", "label4_bin0_count: 1269.0000\n", "label4_bin0_mae: 1.1570\n", "label4_bin1_count: 208626.0000\n", "label4_bin1_mae: 0.7505\n", "label4_bin2_count: 53528.0000\n", "label4_bin2_mae: 1.1643\n", "label4_bin3_count: 37339.0000\n", "label4_bin3_mae: 1.9008\n", "label4_bin4_count: 21793.0000\n", "label4_bin4_mae: 3.1142\n", "label4_bin5_count: 8194.0000\n", "label4_bin5_mae: 5.0156\n", "label4_bin6_count: 3423.0000\n", "label4_bin6_mae: 7.3518\n", "label4_bin7_count: 1506.0000\n", "label4_bin7_mae: 9.9343\n", "label4_bin8_count: 610.0000\n", "label4_bin8_mae: 25.9864\n", "label4_bin9_count: 60.0000\n", "label4_bin9_mae: 27.8746\n", "label5_bin1_count: 299.0000\n", "label5_bin1_mae: 86.6469\n", "label5_bin2_count: 1068.0000\n", "label5_bin2_mae: 87.6131\n", "label5_bin3_count: 1998.0000\n", "label5_bin3_mae: 87.2714\n", "label5_bin4_count: 4626.0000\n", "label5_bin4_mae: 90.1329\n", "label5_bin5_count: 9057.0000\n", "label5_bin5_mae: 102.7583\n", "label5_bin6_count: 12853.0000\n", "label5_bin6_mae: 109.2145\n", "label5_bin7_count: 17785.0000\n", "label5_bin7_mae: 125.8342\n", "label5_bin8_count: 23487.0000\n", "label5_bin8_mae: 151.9758\n", "label5_bin9_count: 32135.0000\n", "label5_bin9_mae: 194.6474\n", "label5_bin10_count: 190443.0000\n", "label5_bin10_mae: 236.1328\n", "label5_bin11_count: 42597.0000\n", "label5_bin11_mae: 1116.2136\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Training: 9%|▊ | 13001/149778 [52:21<2053:40:30, 54.05s/it, loss=0.309]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "New best val_loss: 0.1131\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Training: 13%|█▎ | 19049/149778 [1:12:14<7:14:15, 5.02it/s, loss=0.264]" ] } ], "source": [ "import numpy as np\n", "import torch\n", "from torch.optim import AdamW\n", "from torch.optim.lr_scheduler import LinearLR\n", "from torch.utils.data import DataLoader\n", "from tqdm import tqdm\n", "\n", "\n", "\n", "train_losses, val_losses, best_loss = run_training(\n", " smiles_train, smiles_test, labels_train, labels_test, \n", " model, tokenizer, scalers, num_epochs=6, learning_rate=2e-5, batch_size=256, validation_steps=6_500,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "64baf577-cfe9-454c-8a2b-71b7a5b12506", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:.mlspace-bolgov_simson_training]", "language": "python", "name": "conda-env-.mlspace-bolgov_simson_training-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.11" } }, "nbformat": 4, "nbformat_minor": 5 }