{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Installing the libraries" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "collapsed": true, "id": "pGooXpPcLsEJ", "jupyter": { "outputs_hidden": true }, "outputId": "7af14467-5a05-4c1f-b1de-8605e64069a1", "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (4.51.3)\n", "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (2.6.0+cu124)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (2.2.2)\n", "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (1.6.1)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from transformers) (3.18.0)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.30.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.31.2)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2.0.2)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (24.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (6.0.2)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2024.11.6)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers) (2.32.3)\n", "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.21.1)\n", "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.3)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers) (4.67.1)\n", "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.11/dist-packages (from torch) (4.13.2)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch) (3.4.2)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.6)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch) (2025.3.2)\n", "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)\n", " Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)\n", " Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)\n", " Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)\n", " Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)\n", " Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)\n", " Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-curand-cu12==10.3.5.147 (from torch)\n", " Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)\n", " Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)\n", " Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch) (0.6.2)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch) (2.21.5)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n", "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch)\n", " Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch) (3.2.0)\n", "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch) (1.13.1)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.2)\n", "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (1.15.3)\n", "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (1.5.0)\n", "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (3.6.0)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch) (3.0.2)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.4.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2.4.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2025.4.26)\n", "Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m50.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m27.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m23.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m11.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m51.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12\n", " Attempting uninstall: nvidia-nvjitlink-cu12\n", " Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n", " Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n", " Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n", " Attempting uninstall: nvidia-curand-cu12\n", " Found existing installation: nvidia-curand-cu12 10.3.6.82\n", " Uninstalling nvidia-curand-cu12-10.3.6.82:\n", " Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n", " Attempting uninstall: nvidia-cufft-cu12\n", " Found existing installation: nvidia-cufft-cu12 11.2.3.61\n", " Uninstalling nvidia-cufft-cu12-11.2.3.61:\n", " Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n", " Attempting uninstall: nvidia-cuda-runtime-cu12\n", " Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n", " Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n", " Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n", " Attempting uninstall: nvidia-cuda-nvrtc-cu12\n", " Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n", " Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n", " Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n", " Attempting uninstall: nvidia-cuda-cupti-cu12\n", " Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n", " Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n", " Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n", " Attempting uninstall: nvidia-cublas-cu12\n", " Found existing installation: nvidia-cublas-cu12 12.5.3.2\n", " Uninstalling nvidia-cublas-cu12-12.5.3.2:\n", " Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n", " Attempting uninstall: nvidia-cusparse-cu12\n", " Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n", " Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n", " Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n", " Attempting uninstall: nvidia-cudnn-cu12\n", " Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n", " Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n", " Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n", " Attempting uninstall: nvidia-cusolver-cu12\n", " Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n", " Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n", " Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n", "Successfully installed nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127\n" ] } ], "source": [ "!pip install transformers torch pandas scikit-learn" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# importing the libaries" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2025-05-16T06:14:11.763008Z", "iopub.status.busy": "2025-05-16T06:14:11.762285Z", "iopub.status.idle": "2025-05-16T06:14:11.767215Z", "shell.execute_reply": "2025-05-16T06:14:11.766414Z", "shell.execute_reply.started": "2025-05-16T06:14:11.762977Z" }, "id": "j5eauIPZLsEJ", "trusted": true }, "outputs": [], "source": [ "import os\n", "import warnings\n", "import logging\n", "import pandas as pd\n", "import torch\n", "from sklearn.model_selection import train_test_split\n", "from transformers import (\n", " AutoTokenizer,\n", " AutoModelForSequenceClassification,\n", " Trainer,\n", " TrainingArguments,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# suppressing the warnings" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-05-16T06:14:14.611821Z", "iopub.status.busy": "2025-05-16T06:14:14.611045Z", "iopub.status.idle": "2025-05-16T06:14:14.616203Z", "shell.execute_reply": "2025-05-16T06:14:14.615287Z", "shell.execute_reply.started": "2025-05-16T06:14:14.611796Z" }, "id": "Sfzl5XkfLsEK", "trusted": true }, "outputs": [], "source": [ "# Suppress tokenizer warnings and W&B\n", "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", "os.environ[\"WANDB_DISABLED\"] = \"true\"\n", "warnings.filterwarnings(\"ignore\", category=UserWarning)\n", "\n", "# Logging\n", "logging.basicConfig(level=logging.INFO)\n", "logger = logging.getLogger(__name__)\n", "\n", "# Check GPU\n", "logger.info(f\"CUDA available: {torch.cuda.is_available()}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Processing the dataset and loading mdeberta-v3-base" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "execution": { "iopub.execute_input": "2025-05-16T06:14:17.274165Z", "iopub.status.busy": "2025-05-16T06:14:17.273901Z", "iopub.status.idle": "2025-05-16T06:14:33.405623Z", "shell.execute_reply": "2025-05-16T06:14:33.404821Z", "shell.execute_reply.started": "2025-05-16T06:14:17.274145Z" }, "id": "H6ecHyFbLsEM", "outputId": "815184c9-2e5d-4af8-aafe-705a08494be3", "trusted": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "# loading csv\n", "data = pd.read_csv(\"/content/emails.csv\")\n", "data = data.dropna(subset=[\"email\", \"type\"])\n", "data = data[data[\"email\"].str.strip() != \"\"]\n", "\n", "# Validate labels\n", "valid_labels = {\"Incident\", \"Request\", \"Change\", \"Problem\"}\n", "label_set = set(data[\"type\"].unique())\n", "if not label_set.issubset(valid_labels):\n", " raise ValueError(f\"Unexpected labels: {label_set - valid_labels}\")\n", "\n", "# Label encoding\n", "label_map = {\"Incident\": 0, \"Request\": 1, \"Change\": 2, \"Problem\": 3}\n", "data[\"label_id\"] = data[\"type\"].map(label_map)\n", "\n", "# Split into train/test\n", "emails = data[\"email\"].tolist()\n", "labels = data[\"label_id\"].tolist()\n", "train_emails, test_emails, train_labels, test_labels = train_test_split(\n", " emails, labels, test_size=0.1, stratify=labels, random_state=42\n", ")\n", "\n", "# Load tokenizer & model\n", "model_name = \"microsoft/mdeberta-v3-base\"\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", "model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=4)\n", "\n", "# Tokenization\n", "logger.info(\"Tokenizing...\")\n", "train_encodings = tokenizer(train_emails, truncation=True, padding=True, max_length=512)\n", "test_encodings = tokenizer(test_emails, truncation=True, padding=True, max_length=512)\n", "\n", "\n", "# Dataset class\n", "class EmailDataset(torch.utils.data.Dataset):\n", " def __init__(self, encodings, labels):\n", " self.encodings = encodings\n", " self.labels = labels\n", "\n", " def __getitem__(self, idx):\n", " item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}\n", " item[\"labels\"] = torch.tensor(self.labels[idx])\n", " return item\n", "\n", " def __len__(self):\n", " return len(self.labels)\n", "\n", "\n", "train_dataset = EmailDataset(train_encodings, train_labels)\n", "test_dataset = EmailDataset(test_encodings, test_labels)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# computing class weights" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vH_k8vJDQzLy", "outputId": "3c4ee6e6-87a3-4cd9-bfbb-2c9f72fdec40" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Per-class weights: tensor([0.6259, 0.8746, 2.3838, 1.1912])\n" ] } ], "source": [ "\n", "from sklearn.utils.class_weight import compute_class_weight\n", "import numpy as np\n", "\n", "train_labels = data[\"label_id\"].values \n", "\n", "classes = np.unique(train_labels) # [0,1,2,3]\n", "weights = compute_class_weight(\n", " class_weight=\"balanced\",\n", " classes=classes,\n", " y=train_labels\n", ")\n", "class_weights = torch.tensor(weights, dtype=torch.float)\n", "\n", "print(\"Per-class weights:\", class_weights)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Trainer with weighted classes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "lQp4sYwCQoo2" }, "outputs": [], "source": [ "from torch.nn import CrossEntropyLoss\n", "\n", "class WeightedTrainer(Trainer):\n", " def __init__(self, *args, class_weights=None, **kwargs):\n", " super().__init__(*args, **kwargs)\n", " self.class_weights = class_weights.to(self.model.device)\n", "\n", " def compute_loss(self, model, inputs, return_outputs=False, **kwargs):\n", " labels = inputs.pop(\"labels\")\n", " outputs = model(**inputs)\n", " logits = outputs.logits\n", " loss_fct = CrossEntropyLoss(weight=self.class_weights)\n", " loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))\n", " return (loss, outputs) if return_outputs else loss" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Training Process" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "collapsed": true, "execution": { "iopub.execute_input": "2025-05-16T06:14:37.311742Z", "iopub.status.busy": "2025-05-16T06:14:37.311456Z", "iopub.status.idle": "2025-05-16T08:08:16.495712Z", "shell.execute_reply": "2025-05-16T08:08:16.494872Z", "shell.execute_reply.started": "2025-05-16T06:14:37.311724Z" }, "id": "S7d3iXksLsEN", "jupyter": { "outputs_hidden": true }, "outputId": "f4ee522e-a420-44d7-97e0-b645aa6398c8", "trusted": true }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [10800/10800 1:50:16, Epoch 4/4]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
1001.370100
2001.098400
3000.725600
4000.655000
5000.657600
6000.732900
7000.756800
8000.641300
9000.624400
10000.655200
11000.635100
12000.666200
13000.627400
14000.638100
15000.586800
16000.635500
17000.605800
18000.561200
19000.589700
20000.573800
21000.625900
22000.527300
23000.549800
24000.581500
25000.545700
26000.558400
27000.696800
28000.531800
29000.541500
30000.460900
31000.568600
32000.536700
33000.538100
34000.497000
35000.486100
36000.540400
37000.505900
38000.479200
39000.515300
40000.516700
41000.495200
42000.534700
43000.483500
44000.534300
45000.488900
46000.499800
47000.497900
48000.494500
49000.436700
50000.465000
51000.527200
52000.479900
53000.478500
54000.472500
55000.426800
56000.408700
57000.416400
58000.408100
59000.398900
60000.448100
61000.423500
62000.378800
63000.477300
64000.368800
65000.424500
66000.400400
67000.413600
68000.390900
69000.433400
70000.423100
71000.403500
72000.438000
73000.428600
74000.494800
75000.411100
76000.433600
77000.428200
78000.445400
79000.431100
80000.388200
81000.422900
82000.333900
83000.324600
84000.393700
85000.306100
86000.318300
87000.366400
88000.350800
89000.337500
90000.315600
91000.344400
92000.340300
93000.314400
94000.304700
95000.333300
96000.356100
97000.322900
98000.356500
99000.354500
100000.301600
101000.320000
102000.279400
103000.340800
104000.344400
105000.363000
106000.297600
107000.295300
108000.299600

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "training_args = TrainingArguments(\n", " output_dir=\"/content\",\n", " num_train_epochs=4,\n", " per_device_train_batch_size=4,\n", " gradient_accumulation_steps=2,\n", " warmup_steps=500,\n", " weight_decay=0.01,\n", " logging_dir=\"/content/logs\",\n", " logging_steps=100,\n", " save_steps=3600,\n", " fp16=True,\n", " report_to=\"none\",\n", " dataloader_num_workers=2,\n", ")\n", "\n", "# trainer\n", "trainer = WeightedTrainer(\n", " model=model,\n", " args=training_args,\n", " train_dataset=train_dataset,\n", " eval_dataset=test_dataset,\n", " class_weights=class_weights,\n", ")\n", "\n", "# Training\n", "logger.info(\"Training started...\")\n", "trainer.train()\n", "logger.info(\"Training completed.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# saving model in zip for downloading" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_c80s-1GSyqw", "outputId": "61d0170c-e6c7-4329-fa02-38a441a68761" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Zipped model to: /content/mdeberta_finetuned_model.zip\n", "Zipped tokenizer to: /content/mdeberta_finetuned_tokenizer.zip\n" ] } ], "source": [ "import os\n", "\n", "model_dir = \"/content/mdeberta_finetuned_model\"\n", "tokenizer_dir = \"/content/mdeberta_finetuned_tokenizer\"\n", "\n", "model.save_pretrained(model_dir)\n", "tokenizer.save_pretrained(tokenizer_dir)\n", "\n", "os.system(f\"zip -r {model_dir}.zip {model_dir}\")\n", "os.system(f\"zip -r {tokenizer_dir}.zip {tokenizer_dir}\")\n", "logger.info(\"Model and tokenizer saved and zipped.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# comaparing zeroshot vs finetuned" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 645 }, "id": "8A6mKaWXwBnD", "outputId": "5a6331dc-9678-4f33-b315-02dde1535fc9" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", ":37: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n", " zero_shot_trainer = Trainer(model=zero_shot_model, tokenizer=tokenizer)\n", "Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).\n", ":38: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n", " finetuned_trainer = Trainer(model=finetuned_model, tokenizer=tokenizer)\n", "Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).\n" ] }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "=== Zero‑Shot mDeBERTa‑v3 ===\n", "Accuracy: 0.3992\n", " precision recall f1-score support\n", "\n", " Incident 0.40 1.00 0.57 958\n", " Request 0.00 0.00 0.00 686\n", " Change 0.00 0.00 0.00 252\n", " Problem 0.00 0.00 0.00 504\n", "\n", " accuracy 0.40 2400\n", " macro avg 0.10 0.25 0.14 2400\n", "weighted avg 0.16 0.40 0.23 2400\n", "\n", "\n", "=== Fine‑Tuned mDeBERTa‑v3 ===\n", "Accuracy: 0.7971\n", " precision recall f1-score support\n", "\n", " Incident 0.75 0.78 0.77 958\n", " Request 0.95 0.96 0.95 686\n", " Change 0.97 0.91 0.94 252\n", " Problem 0.58 0.55 0.57 504\n", "\n", " accuracy 0.80 2400\n", " macro avg 0.81 0.80 0.81 2400\n", "weighted avg 0.80 0.80 0.80 2400\n", "\n" ] } ], "source": [ "import numpy as np\n", "import logging\n", "from transformers import Trainer, AutoTokenizer, AutoModelForSequenceClassification\n", "from sklearn.metrics import accuracy_score, classification_report\n", "\n", "# 1. Label\n", "label_map = {\"Incident\": 0, \"Request\": 1, \"Change\": 2, \"Problem\": 3}\n", "num_labels = len(label_map) # ← define num_labels here\n", "\n", "# 2. Encode labels in data\n", "data[\"label_id\"] = data[\"type\"].map(label_map)\n", "\n", "# Paths and model names\n", "model_name = \"microsoft/mdeberta-v3-base\"\n", "model_dir = \"/content/mdeberta_finetuned_model\"\n", "\n", "# 1. Load tokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", "\n", "# 2a. Zero‑shot model\n", "zero_shot_model = AutoModelForSequenceClassification.from_pretrained(\n", " model_name, num_labels=num_labels\n", ")\n", "zero_shot_model.eval()\n", "\n", "# 2b. Fine‑tuned model\n", "finetuned_model = AutoModelForSequenceClassification.from_pretrained(model_dir)\n", "finetuned_model.eval()\n", "\n", "# 3. Trainers\n", "zero_shot_trainer = Trainer(model=zero_shot_model, tokenizer=tokenizer)\n", "finetuned_trainer = Trainer(model=finetuned_model, tokenizer=tokenizer)\n", "\n", "\n", "# 4. Prediction helper\n", "def get_predictions(trainer, dataset):\n", " raw_preds, _, _ = trainer.predict(dataset)\n", " return np.argmax(raw_preds, axis=1)\n", "\n", "\n", "# 5. inference on test set\n", "zero_preds = get_predictions(zero_shot_trainer, test_dataset)\n", "fine_preds = get_predictions(finetuned_trainer, test_dataset)\n", "\n", "# 6. Evaluation\n", "print(\"=== Zero‑Shot mDeBERTa‑v3 ===\")\n", "print(f\"Accuracy: {accuracy_score(test_labels, zero_preds):.4f}\")\n", "print(\n", " classification_report(test_labels, zero_preds, target_names=list(label_map.keys()))\n", ")\n", "\n", "print(\"\\n=== Fine‑Tuned mDeBERTa‑v3 ===\")\n", "print(f\"Accuracy: {accuracy_score(test_labels, fine_preds):.4f}\")\n", "print(\n", " classification_report(test_labels, fine_preds, target_names=list(label_map.keys()))\n", ")" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "T4", "name": "finetuning notebook mdeberta", "provenance": [] }, "kaggle": { "accelerator": "nvidiaTeslaT4", "dataSources": [ { "datasetId": 7426611, "sourceId": 11822841, "sourceType": "datasetVersion" } ], "dockerImageVersionId": 31041, "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 0 }