{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "11ab9ea6-1c5b-4f9b-a6ea-1bc75be56108", "metadata": {}, "outputs": [], "source": [ "import os\n", "import warnings\n", "\n", "import joblib\n", "import numpy as np\n", "import pandas as pd\n", "import torch\n", "from sklearn.metrics import mean_absolute_error\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler\n", "from torch import nn\n", "from torch.optim import AdamW\n", "from torch.optim.lr_scheduler import LinearLR\n", "from torch.utils.data import DataLoader, Dataset\n", "from tqdm import tqdm\n", "from transformers import (\n", " AutoConfig,\n", " AutoModel,\n", " AutoTokenizer,\n", " BertConfig,\n", " BertModel,\n", " BertTokenizerFast,\n", " PreTrainedModel,\n", ")\n", "from transformers.activations import ACT2FN\n", "\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "torch.backends.cuda.matmul.allow_tf32 = True\n", "torch.backends.cudnn.allow_tf32 = True\n", "\n", "def global_ap(x):\n", " return torch.mean(x.view(x.size(0), x.size(1), -1), dim=1)\n", "\n", "class SimSonEncoder(nn.Module):\n", " def __init__(self, config: BertConfig, max_len: int, dropout: float = 0.1):\n", " super(SimSonEncoder, self).__init__()\n", " self.config = config\n", " self.max_len = max_len\n", " \n", " self.bert = BertModel(config, add_pooling_layer=False)\n", " \n", " self.linear = nn.Linear(config.hidden_size, max_len)\n", " self.dropout = nn.Dropout(dropout)\n", " \n", " def forward(self, input_ids, attention_mask=None):\n", " if attention_mask is None:\n", " attention_mask = input_ids.ne(0)\n", " \n", " outputs = self.bert(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask\n", " )\n", " \n", " hidden_states = outputs.last_hidden_state\n", " \n", " hidden_states = self.dropout(hidden_states)\n", " \n", " pooled = global_ap(hidden_states)\n", " \n", " out = self.linear(pooled)\n", " \n", " return out\n", "\n", "class SimSonClassifier(nn.Module):\n", " def __init__(self, encoder: SimSonEncoder, num_labels: int, dropout=0.1):\n", " super(SimSonClassifier, self).__init__()\n", " self.encoder = encoder\n", " self.clf = nn.Linear(encoder.max_len, num_labels)\n", " self.relu = nn.ReLU()\n", " self.dropout = nn.Dropout(dropout)\n", "\n", " def forward(self, input_ids, attention_mask=None, labels=None):\n", " x = self.encoder(input_ids, attention_mask)\n", " x = self.relu(self.dropout(x))\n", " x = self.clf(x)\n", " return x" ] }, { "cell_type": "code", "execution_count": 10, "id": "ce760993-fbef-4546-8b2c-1e7a722ad374", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import torch\n", "from torch.utils.data import Dataset\n", "\n", "\n", "class SMILESDataset(Dataset):\n", " def __init__(self, smiles_list, labels, tokenizer, max_length=256):\n", " self.smiles_list = smiles_list\n", " self.labels = labels # Shape: (num_samples, 2) - already scaled\n", " self.tokenizer = tokenizer\n", " self.max_length = max_length\n", " \n", " # Create mask for valid (non-NaN) labels\n", " self.label_masks = ~np.isnan(self.labels) # True where label is valid\n", " \n", " # Replace NaNs with 0 for safe tensor conversion (mask will handle exclusion)\n", " self.labels = np.nan_to_num(self.labels, nan=0.0)\n", " \n", " def __len__(self):\n", " return len(self.smiles_list)\n", " \n", " def __getitem__(self, idx):\n", " smiles = self.tokenizer.cls_token + self.smiles_list[idx]\n", " \n", " # Tokenize the SMILES string\n", " encoding = self.tokenizer(\n", " smiles,\n", " truncation=True,\n", " padding='max_length',\n", " max_length=self.max_length,\n", " return_tensors='pt'\n", " )\n", " \n", " return {\n", " 'input_ids': encoding['input_ids'].flatten(),\n", " 'attention_mask': encoding['attention_mask'].flatten(),\n", " 'labels': torch.tensor(self.labels[idx], dtype=torch.float32),\n", " 'label_mask': torch.tensor(self.label_masks[idx], dtype=torch.float32)\n", " }\n", " \n", " def get_label_statistics(self):\n", " \"\"\"Return statistics about label availability\"\"\"\n", " label_counts = self.label_masks.sum(axis=0)\n", " total_samples = len(self.smiles_list)\n", " \n", " stats = {\n", " 'total_samples': total_samples,\n", " 'label_0_count': label_counts[0],\n", " 'label_1_count': label_counts[1],\n", " 'label_0_ratio': label_counts[0] / total_samples,\n", " 'label_1_ratio': label_counts[1] / total_samples,\n", " 'both_labels_count': (self.label_masks.sum(axis=1) == 2).sum(),\n", " 'single_label_count': (self.label_masks.sum(axis=1) == 1).sum(),\n", " 'no_labels_count': (self.label_masks.sum(axis=1) == 0).sum()\n", " }\n", " \n", " return stats\n", "\n", "def calculate_weighted_loss(predictions, labels, label_mask, label_weights):\n", " \"\"\"\n", " Calculate weighted loss for two labels with masking\n", " \n", " Args:\n", " predictions: Model outputs (batch_size, 2)\n", " labels: Ground truth labels (batch_size, 2)\n", " label_mask: Mask for valid labels (batch_size, 2)\n", " label_weights: Weights for each label (2,)\n", " \"\"\"\n", " loss_fn = nn.MSELoss(reduction='none')\n", " \n", " # Calculate per-sample, per-label losses\n", " losses = loss_fn(predictions, labels) # Shape: (batch_size, 2)\n", " \n", " # Apply masking to exclude NaN labels\n", " valid_mask = label_mask.bool()\n", " masked_losses = losses * valid_mask.float()\n", " \n", " # Apply label-specific weights\n", " weighted_losses = masked_losses * label_weights.unsqueeze(0) # Broadcast weights\n", " \n", " # Calculate final loss (only over valid predictions)\n", " total_loss = weighted_losses.sum()\n", " total_valid = valid_mask.sum()\n", " \n", " return total_loss / total_valid if total_valid > 0 else torch.tensor(0.0, device=predictions.device, requires_grad=True)\n", "\n", "def compute_label_weights(dataset):\n", " \"\"\"\n", " Compute inverse frequency weights based on label availability\n", " \n", " Args:\n", " dataset: SMILESDataset instance\n", " \n", " Returns:\n", " torch.Tensor: Normalized weights for each label\n", " \"\"\"\n", " # Get label counts from dataset\n", " label_counts = dataset.label_masks.sum(axis=0) # Count valid samples per label\n", " total_samples = len(dataset)\n", " \n", " # Inverse frequency weighting\n", " weights = total_samples / (2 * label_counts) # 2 is the number of labels\n", " \n", " # Normalize weights so they sum to number of labels (2)\n", " weights = weights / weights.sum() * 2\n", " \n", " return torch.tensor(weights, dtype=torch.float32)\n", "\n", "def calculate_true_loss(predictions, labels, label_mask, scalers=None):\n", " \"\"\"\n", " Calculate unscaled MAE loss for monitoring using separate scalers for each label\n", " \n", " Args:\n", " predictions (torch.Tensor): Model outputs of shape (batch_size, 2).\n", " labels (torch.Tensor): Ground truth labels of shape (batch_size, 2).\n", " label_mask (torch.Tensor): Boolean mask for valid labels of shape (batch_size, 2).\n", " scalers: List of scaler objects, one for each label\n", " \"\"\"\n", " # Detach tensors from the computation graph and move to CPU\n", " predictions_np = predictions.cpu().detach().numpy()\n", " labels_np = labels.cpu().numpy()\n", " label_mask_np = label_mask.cpu().numpy().astype(bool)\n", " \n", " total_mae = 0\n", " total_samples = 0\n", " \n", " for label_idx in range(2):\n", " # Get valid samples for this label\n", " valid_mask = label_mask_np[:, label_idx]\n", " \n", " if valid_mask.any():\n", " valid_preds = predictions_np[valid_mask, label_idx].reshape(-1, 1)\n", " valid_labels = labels_np[valid_mask, label_idx].reshape(-1, 1)\n", " \n", " if scalers is not None:\n", " # Unscale using the corresponding scaler for this label\n", " unscaled_preds = scalers[label_idx].inverse_transform(valid_preds).flatten()\n", " unscaled_labels = scalers[label_idx].inverse_transform(valid_labels).flatten()\n", " else:\n", " unscaled_preds = valid_preds.flatten()\n", " unscaled_labels = valid_labels.flatten()\n", " \n", " # Calculate MAE for this label\n", " mae = np.mean(np.abs(unscaled_preds - unscaled_labels))\n", " total_mae += mae * len(unscaled_preds)\n", " total_samples += len(unscaled_preds)\n", " \n", " return total_mae / total_samples if total_samples > 0 else 0.0\n", "\n", "\n", "def train_model(model, train_dataloader, val_dataloader, label_weights, \n", " scalers=None, num_epochs=10, learning_rate=2e-5, device='cuda', \n", " patience=3, validation_steps=500):\n", " \"\"\"\n", " Train model with weighted loss for two labels with step-based validation\n", " \n", " Args:\n", " model: CustomModel instance (should output 2 labels)\n", " train_dataloader: Training data loader\n", " val_dataloader: Validation data loader \n", " label_weights: Tensor with weights for each label\n", " scalers: List of scalers for unscaled loss monitoring\n", " num_epochs: Number of training epochs\n", " learning_rate: Learning rate\n", " device: Training device\n", " patience: Early stopping patience (in validation steps)\n", " validation_steps: Perform validation every N training steps\n", " \"\"\"\n", " model.to(device)\n", " label_weights = label_weights.to(device)\n", " \n", " optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01)\n", " total_steps = len(train_dataloader) * num_epochs\n", " scheduler = LinearLR(optimizer, start_factor=1.0, end_factor=0.1, total_iters=total_steps)\n", " \n", " train_losses = []\n", " val_losses = []\n", " \n", " # Early stopping initialization\n", " best_val_loss = float('inf')\n", " steps_no_improve = 0\n", " best_model_state = None\n", " \n", " # Training tracking\n", " global_step = 0\n", " running_train_loss = 0\n", " running_true_train_loss = 0\n", " train_steps_count = 0\n", " \n", " print(f\"Label weights: {label_weights.cpu().numpy()}\")\n", " print(f\"Validation will be performed every {validation_steps} steps\")\n", " \n", " model.train()\n", " \n", " for epoch in range(num_epochs):\n", " print(f\"\\nEpoch {epoch + 1}/{num_epochs}\")\n", " \n", " train_progress = tqdm(train_dataloader, desc=\"Training\", leave=False)\n", " \n", " for batch_idx, batch in enumerate(train_progress):\n", " with torch.autocast(dtype=torch.float16, device_type=\"cuda\"):\n", " input_ids = batch['input_ids'].to(device)\n", " attention_mask = batch['attention_mask'].to(device)\n", " labels = batch['labels'].to(device)\n", " label_mask = batch['label_mask'].to(device)\n", " \n", " optimizer.zero_grad()\n", " \n", " # Model forward pass\n", " outputs = model(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " )\n", " \n", " # Calculate weighted loss\n", " loss = calculate_weighted_loss(outputs, labels, label_mask, label_weights)\n", " \n", " # Calculate true loss for monitoring\n", " true_loss = calculate_true_loss(outputs, labels, label_mask, scalers)\n", " \n", " # Accumulate losses for averaging\n", " running_train_loss += loss.item()\n", " running_true_train_loss += true_loss\n", " train_steps_count += 1\n", " \n", " loss.backward()\n", " \n", " torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)\n", " \n", " optimizer.step()\n", " scheduler.step()\n", " \n", " global_step += 1\n", " \n", " train_progress.set_postfix({\n", " 'step': global_step,\n", " 'loss': f'{loss.item():.4f}',\n", " 'true_loss': f'{true_loss:.4f}',\n", " 'lr': f'{scheduler.get_last_lr()[0]:.2e}'\n", " })\n", " \n", " # Perform validation every validation_steps\n", " if global_step % validation_steps == 0:\n", " # Calculate average training losses since last validation\n", " avg_train_loss = running_train_loss / train_steps_count\n", " avg_true_train_loss = running_true_train_loss / train_steps_count\n", " \n", " train_losses.append(avg_train_loss)\n", " \n", " # Reset running averages\n", " running_train_loss = 0\n", " running_true_train_loss = 0\n", " train_steps_count = 0\n", " \n", " # Validation\n", " model.eval()\n", " total_val_loss = 0\n", " total_true_val_loss = 0\n", " val_batches = 0\n", "\n", " with torch.no_grad():\n", " for val_batch in val_dataloader:\n", " with torch.autocast(dtype=torch.float16, device_type=\"cuda\"):\n", " input_ids = val_batch['input_ids'].to(device)\n", " attention_mask = val_batch['attention_mask'].to(device)\n", " labels = val_batch['labels'].to(device)\n", " label_mask = val_batch['label_mask'].to(device)\n", " \n", " outputs = model(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " )\n", " \n", " val_loss = calculate_weighted_loss(outputs, labels, label_mask, label_weights)\n", " val_true_loss = calculate_true_loss(outputs, labels, label_mask, scalers)\n", "\n", " total_val_loss += val_loss.item()\n", " total_true_val_loss += val_true_loss\n", " val_batches += 1\n", " \n", " avg_val_loss = total_val_loss / val_batches\n", " avg_val_true_loss = total_true_val_loss / val_batches\n", " val_losses.append(avg_val_loss)\n", " \n", " print(f\"\\nStep {global_step} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | True train loss: {avg_true_train_loss:.4f} | True val loss: {avg_val_true_loss:.4f}\")\n", " \n", " # Early stopping check and best model saving\n", " if avg_val_loss < best_val_loss:\n", " best_val_loss = avg_val_loss\n", " steps_no_improve = 0\n", " best_model_state = model.state_dict().copy()\n", " print(f\"New best validation loss: {best_val_loss:.4f}\")\n", " else:\n", " steps_no_improve += 1\n", " if steps_no_improve >= patience:\n", " print(f\"Early stopping triggered after {global_step} steps ({steps_no_improve} validation steps without improvement).\")\n", " # Load best model and return\n", " if best_model_state is not None:\n", " model.load_state_dict(best_model_state)\n", " print(f\"Loaded best model with validation loss: {best_val_loss:.4f}\")\n", " return train_losses, val_losses, best_val_loss\n", " \n", " model.train()\n", " \n", " # Handle any remaining training loss that hasn't been validated\n", " if train_steps_count > 0:\n", " avg_train_loss = running_train_loss / train_steps_count\n", " train_losses.append(avg_train_loss)\n", " \n", " # Load the best model state before returning\n", " if best_model_state is not None:\n", " model.load_state_dict(best_model_state)\n", " print(f\"Loaded best model with validation loss: {best_val_loss:.4f}\")\n", " \n", " return train_losses, val_losses, best_val_loss\n", "\n", "def run_training(smiles_train, smiles_test, labels_train, labels_test, \n", " model, tokenizer, scalers, num_epochs=5, learning_rate=1e-5, \n", " batch_size=256, validation_steps=500):\n", " \"\"\"\n", " Complete training pipeline for two labels with step-based validation\n", " \n", " Args:\n", " smiles_train, smiles_test: Lists of SMILES strings\n", " labels_train, labels_test: numpy arrays of shape (num_samples, 2) - ALREADY SCALED\n", " model: CustomModel instance (configured for 2 outputs)\n", " tokenizer: Tokenizer instance\n", " scalers: List of 2 scalers, one for each label (for inverse transform only)\n", " num_epochs: Number of training epochs\n", " learning_rate: Learning rate\n", " batch_size: Batch size for training\n", " validation_steps: Perform validation every N training steps\n", " \"\"\"\n", " \n", " print(\"Setting up datasets for two-label training (labels assumed pre-scaled)\")\n", " \n", " # Create datasets - no scaling performed here\n", " train_dataset = SMILESDataset(smiles_train, labels_train, tokenizer)\n", " val_dataset = SMILESDataset(smiles_test, labels_test, tokenizer)\n", " \n", " # Print dataset statistics\n", " train_stats = train_dataset.get_label_statistics()\n", " val_stats = val_dataset.get_label_statistics()\n", " \n", " print(\"Training dataset statistics:\")\n", " for key, value in train_stats.items():\n", " print(f\" {key}: {value}\")\n", " \n", " print(\"Validation dataset statistics:\")\n", " for key, value in val_stats.items():\n", " print(f\" {key}: {value}\")\n", " \n", " # Compute label weights based on training data\n", " label_weights = compute_label_weights(train_dataset)\n", " print(f\"Computed label weights: {label_weights.numpy()}\")\n", " \n", " # Create data loaders\n", " train_dataloader = DataLoader(\n", " train_dataset,\n", " batch_size=batch_size,\n", " shuffle=True,\n", " num_workers=4,\n", " pin_memory=True\n", " )\n", " \n", " val_dataloader = DataLoader(\n", " val_dataset,\n", " batch_size=batch_size,\n", " shuffle=False,\n", " num_workers=4,\n", " pin_memory=True\n", " )\n", " \n", " # Set device\n", " device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", " print(f\"Using device: {device}\")\n", " print(f\"Training steps per epoch: {len(train_dataloader)}\")\n", " print(f\"Total training steps: {len(train_dataloader) * num_epochs}\")\n", " \n", " # Train the model\n", " train_losses, val_losses, best_val_loss = train_model(\n", " model=model,\n", " train_dataloader=train_dataloader,\n", " val_dataloader=val_dataloader,\n", " label_weights=label_weights,\n", " scalers=scalers, # Still pass scalers for true loss calculation\n", " num_epochs=num_epochs,\n", " learning_rate=learning_rate,\n", " device=device,\n", " patience=10,\n", " validation_steps=validation_steps,\n", " )\n", " \n", " print('Training completed.')\n", " print(f'Number of validation checkpoints: {len(val_losses)}')\n", " print(f'Final training losses: {train_losses[-5:] if len(train_losses) >= 5 else train_losses}')\n", " print(f'Best validation loss: {best_val_loss:.4f}')\n", " \n", " # Save model\n", " torch.save(model.state_dict(), '/home/jovyan/simson_training_bolgov/regression/regression_simson.pth')\n", " print(\"Model saved successfully!\")\n", " \n", " return train_losses, val_losses, best_val_loss" ] }, { "cell_type": "code", "execution_count": 3, "id": "12a2b8c3-2c4d-4b1b-8cc7-930c9fe68fd7", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "df = pd.read_csv('/home/jovyan/simson_training_bolgov/regression/PI_Tg_P308K_synth_db_chem.csv')\n", "targets = ['Tg', 'He', 'N2', 'O2', 'CH4', 'CO2']" ] }, { "cell_type": "code", "execution_count": 6, "id": "8e1296a2-551c-48ab-aab3-fcf4b6110d75", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from matplotlib import pyplot as plt\n", "\n", "plt.plot(df['CO2'].to_list())" ] }, { "cell_type": "code", "execution_count": 4, "id": "9946f5cd-3683-49db-8535-393cb04140ce", "metadata": {}, "outputs": [], "source": [ "tokenizer_path = 'DeepChem/ChemBERTa-77M-MTR'\n", "tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)\n", "\n", "# Only the hidden size is slightly larger, everything else is the same\n", "config = BertConfig(\n", " vocab_size=tokenizer.vocab_size,\n", " hidden_size=768,\n", " num_hidden_layers=4,\n", " num_attention_heads=12,\n", " intermediate_size=2048,\n", " max_position_embeddings=512\n", " )\n", "\n", "simson_params = torch.load('/home/jovyan/simson_training_bolgov/simson_checkpoints_1M/checkpoint_best_model.bin')\n", "\n", "backbone = SimSonEncoder(config=config, max_len=512)\n", "backbone = torch.compile(backbone)\n", "backbone.load_state_dict(simson_params)\n", "\n", "\n", "model = SimSonClassifier(encoder=backbone, num_labels=len(targets))\n", "model = torch.compile(model, fullgraph=True)" ] }, { "cell_type": "code", "execution_count": 5, "id": "903489f0-9039-4504-894e-6739b4a15371", "metadata": {}, "outputs": [], "source": [ "def create_splits(df):\n", " length = len(df)\n", " train_length = int(0.99 * length)\n", " train = df.loc[:train_length]\n", " test = df.loc[train_length:]\n", " return train, test\n", "\n", "train, test = create_splits(df)\n", "\n", "train = train.reset_index(drop=True)\n", "test = test.reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 6, "id": "00c271f1-bd44-457d-9a0e-7b221871ab78", "metadata": {}, "outputs": [], "source": [ "scalers = []\n", "\n", "for target in targets:\n", " target_scaler = StandardScaler()\n", " train[target] = target_scaler.fit_transform(train[target].to_numpy().reshape(-1, 1))\n", " test[target] = target_scaler.transform(test[target].to_numpy().reshape(-1, 1))\n", " \n", " scalers.append(target_scaler)\n", "\n", "smiles_train = train['Smiles']\n", "smiles_test = test['Smiles']\n", "\n", "labels_train = train[targets].values\n", "labels_test = test[targets].values" ] }, { "cell_type": "code", "execution_count": 7, "id": "01ebce4a-9ac0-4527-a9bd-8d13913f15e3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['/home/jovyan/simson_training_bolgov/regression/scalers']" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import joblib\n", "\n", "joblib.dump(scalers, '/home/jovyan/simson_training_bolgov/regression/scalers')" ] }, { "cell_type": "code", "execution_count": 8, "id": "4405c601-f006-4eeb-989e-fb35dd5349ba", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Setting up datasets for two-label training (labels assumed pre-scaled)\n", "Training dataset statistics:\n", " total_samples: 6659681\n", " label_0_count: 6659681\n", " label_1_count: 6659681\n", " label_0_ratio: 1.0\n", " label_1_ratio: 1.0\n", " both_labels_count: 0\n", " single_label_count: 0\n", " no_labels_count: 0\n", "Validation dataset statistics:\n", " total_samples: 67270\n", " label_0_count: 67270\n", " label_1_count: 67270\n", " label_0_ratio: 1.0\n", " label_1_ratio: 1.0\n", " both_labels_count: 0\n", " single_label_count: 0\n", " no_labels_count: 0\n", "Computed label weights: [0.33333334 0.33333334 0.33333334 0.33333334 0.33333334 0.33333334]\n", "Using device: cuda\n", "Training steps per epoch: 26015\n", "Total training steps: 78045\n", "Label weights: [0.33333334 0.33333334 0.33333334 0.33333334 0.33333334 0.33333334]\n", "Validation will be performed every 7000 steps\n", "\n", "Epoch 1/3\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Training: 27%|██▍ | 7001/26015 [10:27<10:43:20, 2.03s/it, step=7002, loss=0.0372, true_loss=16.0679, lr=1.84e-05]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Step 7000 | Train Loss: 0.0618 | Val Loss: 0.1191 | True train loss: 17.4244 | True val loss: 18.3473\n", "New best validation loss: 0.1191\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Training: 54%|████▎ | 14001/26015 [20:40<3:46:01, 1.13s/it, step=14002, loss=0.0315, true_loss=15.1059, lr=1.68e-05]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Step 14000 | Train Loss: 0.0357 | Val Loss: 0.0652 | True train loss: 16.0534 | True val loss: 17.3651\n", "New best validation loss: 0.0652\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Training: 81%|██████▍ | 21001/26015 [30:53<1:34:27, 1.13s/it, step=21002, loss=0.0348, true_loss=15.9539, lr=1.52e-05]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Step 21000 | Train Loss: 0.0319 | Val Loss: 0.0438 | True train loss: 15.7137 | True val loss: 16.3045\n", "New best validation loss: 0.0438\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " \r" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Epoch 2/3\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Training: 8%|▋ | 1987/26015 [02:59<5:37:18, 1.19it/s, step=28002, loss=0.0224, true_loss=14.3285, lr=1.35e-05]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Step 28000 | Train Loss: 0.0284 | Val Loss: 0.0393 | True train loss: 15.0774 | True val loss: 15.2044\n", "New best validation loss: 0.0393\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Training: 35%|███ | 8987/26015 [13:13<3:55:46, 1.20it/s, step=35002, loss=0.0302, true_loss=13.3737, lr=1.19e-05]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Step 35000 | Train Loss: 0.0257 | Val Loss: 0.0279 | True train loss: 14.4303 | True val loss: 14.4498\n", "New best validation loss: 0.0279\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Training: 61%|████▉ | 15987/26015 [23:29<2:17:59, 1.21it/s, step=42002, loss=0.0264, true_loss=14.5345, lr=1.03e-05]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Step 42000 | Train Loss: 0.0245 | Val Loss: 0.0351 | True train loss: 14.1197 | True val loss: 14.2312\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Training: 88%|████████▊ | 22987/26015 [33:46<41:56, 1.20it/s, step=49002, loss=0.0216, true_loss=14.1316, lr=8.70e-06]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Step 49000 | Train Loss: 0.0233 | Val Loss: 0.0290 | True train loss: 13.9434 | True val loss: 14.4628\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " \r" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Epoch 3/3\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Training: 15%|█▎ | 3971/26015 [05:52<7:13:46, 1.18s/it, step=56002, loss=0.0254, true_loss=14.4344, lr=7.08e-06]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Step 56000 | Train Loss: 0.0229 | Val Loss: 0.0479 | True train loss: 13.9115 | True val loss: 14.0929\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Training: 42%|███▎ | 10971/26015 [16:06<4:48:34, 1.15s/it, step=63002, loss=0.0201, true_loss=12.8691, lr=5.47e-06]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Step 63000 | Train Loss: 0.0219 | Val Loss: 0.0239 | True train loss: 13.6746 | True val loss: 13.5177\n", "New best validation loss: 0.0239\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Training: 69%|██████▏ | 17971/26015 [26:24<2:31:18, 1.13s/it, step=7e+4, loss=0.0248, true_loss=14.9835, lr=3.86e-06]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Step 70000 | Train Loss: 0.0212 | Val Loss: 0.0259 | True train loss: 13.5072 | True val loss: 13.7410\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Training: 96%|█████████▌| 24971/26015 [36:41<19:36, 1.13s/it, step=77002, loss=0.0228, true_loss=13.9553, lr=2.24e-06]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Step 77000 | Train Loss: 0.0207 | Val Loss: 0.0267 | True train loss: 13.4052 | True val loss: 13.8021\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " " ] }, { "name": "stdout", "output_type": "stream", "text": [ "Loaded best model with validation loss: 0.0239\n", "Training completed.\n", "Number of validation checkpoints: 11\n", "Final training losses: [0.022863016219410514, 0.02186289042873042, 0.021151691354678145, 0.020719855580878046, 0.020669196563010864]\n", "Best validation loss: 0.0239\n", "Model saved successfully!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r" ] } ], "source": [ "import numpy as np\n", "import torch\n", "from torch.optim import AdamW\n", "from torch.optim.lr_scheduler import LinearLR\n", "from torch.utils.data import DataLoader\n", "from tqdm import tqdm\n", "\n", "train_losses, val_losses, best_loss = run_training(\n", " smiles_train, smiles_test, labels_train, labels_test, \n", " model, tokenizer, scalers, num_epochs=3, learning_rate=2e-5, batch_size=256, validation_steps=7000,\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:.mlspace-bolgov_simson_training]", "language": "python", "name": "conda-env-.mlspace-bolgov_simson_training-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.11" } }, "nbformat": 4, "nbformat_minor": 5 }