{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Installing the libraries" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "collapsed": true, "id": "pGooXpPcLsEJ", "jupyter": { "outputs_hidden": true }, "outputId": "7af14467-5a05-4c1f-b1de-8605e64069a1", "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (4.51.3)\n", "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (2.6.0+cu124)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (2.2.2)\n", "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (1.6.1)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from transformers) (3.18.0)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.30.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.31.2)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2.0.2)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (24.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (6.0.2)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2024.11.6)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers) (2.32.3)\n", "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.21.1)\n", "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.3)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers) (4.67.1)\n", "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.11/dist-packages (from torch) (4.13.2)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch) (3.4.2)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.6)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch) (2025.3.2)\n", "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)\n", " Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)\n", " Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)\n", " Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)\n", " Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)\n", " Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)\n", " Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-curand-cu12==10.3.5.147 (from torch)\n", " Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)\n", " Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)\n", " Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch) (0.6.2)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch) (2.21.5)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n", "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch)\n", " Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch) (3.2.0)\n", "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch) (1.13.1)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.2)\n", "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (1.15.3)\n", "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (1.5.0)\n", "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (3.6.0)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch) (3.0.2)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.4.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2.4.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2025.4.26)\n", "Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m50.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m27.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m23.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m11.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m51.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12\n", " Attempting uninstall: nvidia-nvjitlink-cu12\n", " Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n", " Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n", " Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n", " Attempting uninstall: nvidia-curand-cu12\n", " Found existing installation: nvidia-curand-cu12 10.3.6.82\n", " Uninstalling nvidia-curand-cu12-10.3.6.82:\n", " Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n", " Attempting uninstall: nvidia-cufft-cu12\n", " Found existing installation: nvidia-cufft-cu12 11.2.3.61\n", " Uninstalling nvidia-cufft-cu12-11.2.3.61:\n", " Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n", " Attempting uninstall: nvidia-cuda-runtime-cu12\n", " Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n", " Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n", " Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n", " Attempting uninstall: nvidia-cuda-nvrtc-cu12\n", " Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n", " Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n", " Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n", " Attempting uninstall: nvidia-cuda-cupti-cu12\n", " Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n", " Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n", " Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n", " Attempting uninstall: nvidia-cublas-cu12\n", " Found existing installation: nvidia-cublas-cu12 12.5.3.2\n", " Uninstalling nvidia-cublas-cu12-12.5.3.2:\n", " Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n", " Attempting uninstall: nvidia-cusparse-cu12\n", " Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n", " Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n", " Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n", " Attempting uninstall: nvidia-cudnn-cu12\n", " Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n", " Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n", " Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n", " Attempting uninstall: nvidia-cusolver-cu12\n", " Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n", " Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n", " Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n", "Successfully installed nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127\n" ] } ], "source": [ "!pip install transformers torch pandas scikit-learn" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# importing the libaries" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2025-05-16T06:14:11.763008Z", "iopub.status.busy": "2025-05-16T06:14:11.762285Z", "iopub.status.idle": "2025-05-16T06:14:11.767215Z", "shell.execute_reply": "2025-05-16T06:14:11.766414Z", "shell.execute_reply.started": "2025-05-16T06:14:11.762977Z" }, "id": "j5eauIPZLsEJ", "trusted": true }, "outputs": [], "source": [ "import os\n", "import warnings\n", "import logging\n", "import pandas as pd\n", "import torch\n", "from sklearn.model_selection import train_test_split\n", "from transformers import (\n", " AutoTokenizer,\n", " AutoModelForSequenceClassification,\n", " Trainer,\n", " TrainingArguments,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# suppressing the warnings" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-05-16T06:14:14.611821Z", "iopub.status.busy": "2025-05-16T06:14:14.611045Z", "iopub.status.idle": "2025-05-16T06:14:14.616203Z", "shell.execute_reply": "2025-05-16T06:14:14.615287Z", "shell.execute_reply.started": "2025-05-16T06:14:14.611796Z" }, "id": "Sfzl5XkfLsEK", "trusted": true }, "outputs": [], "source": [ "# Suppress tokenizer warnings and W&B\n", "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", "os.environ[\"WANDB_DISABLED\"] = \"true\"\n", "warnings.filterwarnings(\"ignore\", category=UserWarning)\n", "\n", "# Logging\n", "logging.basicConfig(level=logging.INFO)\n", "logger = logging.getLogger(__name__)\n", "\n", "# Check GPU\n", "logger.info(f\"CUDA available: {torch.cuda.is_available()}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Processing the dataset and loading mdeberta-v3-base" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "execution": { "iopub.execute_input": "2025-05-16T06:14:17.274165Z", "iopub.status.busy": "2025-05-16T06:14:17.273901Z", "iopub.status.idle": "2025-05-16T06:14:33.405623Z", "shell.execute_reply": "2025-05-16T06:14:33.404821Z", "shell.execute_reply.started": "2025-05-16T06:14:17.274145Z" }, "id": "H6ecHyFbLsEM", "outputId": "815184c9-2e5d-4af8-aafe-705a08494be3", "trusted": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "# loading csv\n", "data = pd.read_csv(\"/content/emails.csv\")\n", "data = data.dropna(subset=[\"email\", \"type\"])\n", "data = data[data[\"email\"].str.strip() != \"\"]\n", "\n", "# Validate labels\n", "valid_labels = {\"Incident\", \"Request\", \"Change\", \"Problem\"}\n", "label_set = set(data[\"type\"].unique())\n", "if not label_set.issubset(valid_labels):\n", " raise ValueError(f\"Unexpected labels: {label_set - valid_labels}\")\n", "\n", "# Label encoding\n", "label_map = {\"Incident\": 0, \"Request\": 1, \"Change\": 2, \"Problem\": 3}\n", "data[\"label_id\"] = data[\"type\"].map(label_map)\n", "\n", "# Split into train/test\n", "emails = data[\"email\"].tolist()\n", "labels = data[\"label_id\"].tolist()\n", "train_emails, test_emails, train_labels, test_labels = train_test_split(\n", " emails, labels, test_size=0.1, stratify=labels, random_state=42\n", ")\n", "\n", "# Load tokenizer & model\n", "model_name = \"microsoft/mdeberta-v3-base\"\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", "model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=4)\n", "\n", "# Tokenization\n", "logger.info(\"Tokenizing...\")\n", "train_encodings = tokenizer(train_emails, truncation=True, padding=True, max_length=512)\n", "test_encodings = tokenizer(test_emails, truncation=True, padding=True, max_length=512)\n", "\n", "\n", "# Dataset class\n", "class EmailDataset(torch.utils.data.Dataset):\n", " def __init__(self, encodings, labels):\n", " self.encodings = encodings\n", " self.labels = labels\n", "\n", " def __getitem__(self, idx):\n", " item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}\n", " item[\"labels\"] = torch.tensor(self.labels[idx])\n", " return item\n", "\n", " def __len__(self):\n", " return len(self.labels)\n", "\n", "\n", "train_dataset = EmailDataset(train_encodings, train_labels)\n", "test_dataset = EmailDataset(test_encodings, test_labels)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# computing class weights" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vH_k8vJDQzLy", "outputId": "3c4ee6e6-87a3-4cd9-bfbb-2c9f72fdec40" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Per-class weights: tensor([0.6259, 0.8746, 2.3838, 1.1912])\n" ] } ], "source": [ "\n", "from sklearn.utils.class_weight import compute_class_weight\n", "import numpy as np\n", "\n", "train_labels = data[\"label_id\"].values \n", "\n", "classes = np.unique(train_labels) # [0,1,2,3]\n", "weights = compute_class_weight(\n", " class_weight=\"balanced\",\n", " classes=classes,\n", " y=train_labels\n", ")\n", "class_weights = torch.tensor(weights, dtype=torch.float)\n", "\n", "print(\"Per-class weights:\", class_weights)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Trainer with weighted classes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "lQp4sYwCQoo2" }, "outputs": [], "source": [ "from torch.nn import CrossEntropyLoss\n", "\n", "class WeightedTrainer(Trainer):\n", " def __init__(self, *args, class_weights=None, **kwargs):\n", " super().__init__(*args, **kwargs)\n", " self.class_weights = class_weights.to(self.model.device)\n", "\n", " def compute_loss(self, model, inputs, return_outputs=False, **kwargs):\n", " labels = inputs.pop(\"labels\")\n", " outputs = model(**inputs)\n", " logits = outputs.logits\n", " loss_fct = CrossEntropyLoss(weight=self.class_weights)\n", " loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))\n", " return (loss, outputs) if return_outputs else loss" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Training Process" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "collapsed": true, "execution": { "iopub.execute_input": "2025-05-16T06:14:37.311742Z", "iopub.status.busy": "2025-05-16T06:14:37.311456Z", "iopub.status.idle": "2025-05-16T08:08:16.495712Z", "shell.execute_reply": "2025-05-16T08:08:16.494872Z", "shell.execute_reply.started": "2025-05-16T06:14:37.311724Z" }, "id": "S7d3iXksLsEN", "jupyter": { "outputs_hidden": true }, "outputId": "f4ee522e-a420-44d7-97e0-b645aa6398c8", "trusted": true }, "outputs": [ { "data": { "text/html": [ "\n", "
Step | \n", "Training Loss | \n", "
---|---|
100 | \n", "1.370100 | \n", "
200 | \n", "1.098400 | \n", "
300 | \n", "0.725600 | \n", "
400 | \n", "0.655000 | \n", "
500 | \n", "0.657600 | \n", "
600 | \n", "0.732900 | \n", "
700 | \n", "0.756800 | \n", "
800 | \n", "0.641300 | \n", "
900 | \n", "0.624400 | \n", "
1000 | \n", "0.655200 | \n", "
1100 | \n", "0.635100 | \n", "
1200 | \n", "0.666200 | \n", "
1300 | \n", "0.627400 | \n", "
1400 | \n", "0.638100 | \n", "
1500 | \n", "0.586800 | \n", "
1600 | \n", "0.635500 | \n", "
1700 | \n", "0.605800 | \n", "
1800 | \n", "0.561200 | \n", "
1900 | \n", "0.589700 | \n", "
2000 | \n", "0.573800 | \n", "
2100 | \n", "0.625900 | \n", "
2200 | \n", "0.527300 | \n", "
2300 | \n", "0.549800 | \n", "
2400 | \n", "0.581500 | \n", "
2500 | \n", "0.545700 | \n", "
2600 | \n", "0.558400 | \n", "
2700 | \n", "0.696800 | \n", "
2800 | \n", "0.531800 | \n", "
2900 | \n", "0.541500 | \n", "
3000 | \n", "0.460900 | \n", "
3100 | \n", "0.568600 | \n", "
3200 | \n", "0.536700 | \n", "
3300 | \n", "0.538100 | \n", "
3400 | \n", "0.497000 | \n", "
3500 | \n", "0.486100 | \n", "
3600 | \n", "0.540400 | \n", "
3700 | \n", "0.505900 | \n", "
3800 | \n", "0.479200 | \n", "
3900 | \n", "0.515300 | \n", "
4000 | \n", "0.516700 | \n", "
4100 | \n", "0.495200 | \n", "
4200 | \n", "0.534700 | \n", "
4300 | \n", "0.483500 | \n", "
4400 | \n", "0.534300 | \n", "
4500 | \n", "0.488900 | \n", "
4600 | \n", "0.499800 | \n", "
4700 | \n", "0.497900 | \n", "
4800 | \n", "0.494500 | \n", "
4900 | \n", "0.436700 | \n", "
5000 | \n", "0.465000 | \n", "
5100 | \n", "0.527200 | \n", "
5200 | \n", "0.479900 | \n", "
5300 | \n", "0.478500 | \n", "
5400 | \n", "0.472500 | \n", "
5500 | \n", "0.426800 | \n", "
5600 | \n", "0.408700 | \n", "
5700 | \n", "0.416400 | \n", "
5800 | \n", "0.408100 | \n", "
5900 | \n", "0.398900 | \n", "
6000 | \n", "0.448100 | \n", "
6100 | \n", "0.423500 | \n", "
6200 | \n", "0.378800 | \n", "
6300 | \n", "0.477300 | \n", "
6400 | \n", "0.368800 | \n", "
6500 | \n", "0.424500 | \n", "
6600 | \n", "0.400400 | \n", "
6700 | \n", "0.413600 | \n", "
6800 | \n", "0.390900 | \n", "
6900 | \n", "0.433400 | \n", "
7000 | \n", "0.423100 | \n", "
7100 | \n", "0.403500 | \n", "
7200 | \n", "0.438000 | \n", "
7300 | \n", "0.428600 | \n", "
7400 | \n", "0.494800 | \n", "
7500 | \n", "0.411100 | \n", "
7600 | \n", "0.433600 | \n", "
7700 | \n", "0.428200 | \n", "
7800 | \n", "0.445400 | \n", "
7900 | \n", "0.431100 | \n", "
8000 | \n", "0.388200 | \n", "
8100 | \n", "0.422900 | \n", "
8200 | \n", "0.333900 | \n", "
8300 | \n", "0.324600 | \n", "
8400 | \n", "0.393700 | \n", "
8500 | \n", "0.306100 | \n", "
8600 | \n", "0.318300 | \n", "
8700 | \n", "0.366400 | \n", "
8800 | \n", "0.350800 | \n", "
8900 | \n", "0.337500 | \n", "
9000 | \n", "0.315600 | \n", "
9100 | \n", "0.344400 | \n", "
9200 | \n", "0.340300 | \n", "
9300 | \n", "0.314400 | \n", "
9400 | \n", "0.304700 | \n", "
9500 | \n", "0.333300 | \n", "
9600 | \n", "0.356100 | \n", "
9700 | \n", "0.322900 | \n", "
9800 | \n", "0.356500 | \n", "
9900 | \n", "0.354500 | \n", "
10000 | \n", "0.301600 | \n", "
10100 | \n", "0.320000 | \n", "
10200 | \n", "0.279400 | \n", "
10300 | \n", "0.340800 | \n", "
10400 | \n", "0.344400 | \n", "
10500 | \n", "0.363000 | \n", "
10600 | \n", "0.297600 | \n", "
10700 | \n", "0.295300 | \n", "
10800 | \n", "0.299600 | \n", "
"
],
"text/plain": [
"