{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "12349750", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Label': ['ham', 'ham', 'ham'],\n", " 'Sentence': ['Are you up for the challenge? I know i am :)',\n", " 'Feel Yourself That You Are Always Happy.. Slowly It Becomes Your Habit & Finally It Becomes Part Of Your Life.. Follow It.. Happy Morning & Have A Happy Day:)',\n", " 'Kallis is ready for bat in 2nd innings']}" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from datasets import load_dataset\n", "\n", "data_files =\"E:/Hugging_Face/SMS_Spam.csv\"\n", "spam_data = load_dataset(\"csv\", data_files = data_files, split = \"train\")\n", "spam_data = spam_data.train_test_split(test_size = 0.2)\n", "spam_data[\"train\"][:3]" ] }, { "cell_type": "code", "execution_count": 2, "id": "35f0392d", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e6740059d6df4ea7aceaf262ef339c94", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/4459 [00:00\n", " \n", " \n", " [1338/1338 02:15, Epoch 3/3]\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossAccuracyF1PrecisionRecall
1No log0.0452970.9899100.9629630.9831930.943548
20.0953000.0427760.9932740.9752071.0000000.951613
30.0212000.0405220.9932740.9752071.0000000.951613

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "TrainOutput(global_step=1338, training_loss=0.04511010432991746, metrics={'train_runtime': 136.1512, 'train_samples_per_second': 78.596, 'train_steps_per_second': 9.827, 'total_flos': 338812011541800.0, 'train_loss': 0.04511010432991746, 'epoch': 3.0})" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import Trainer\n", "\n", "trainer = Trainer(model,\n", " training_args,\n", " train_dataset = spam_data_clean[\"train\"],\n", " eval_dataset = spam_data_clean[\"validation\"],\n", " data_collator = data_collator,\n", " processing_class = tokenizer,\n", " compute_metrics=compute_metrics,)\n", "\n", "trainer.train()" ] }, { "cell_type": "code", "execution_count": 39, "id": "c236f093", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [112/112 00:04]\n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "{'eval_loss': 0.04052222892642021,\n", " 'eval_accuracy': 0.9932735426008968,\n", " 'eval_f1': 0.9752066115702479,\n", " 'eval_precision': 1.0,\n", " 'eval_recall': 0.9516129032258065,\n", " 'eval_runtime': 5.1761,\n", " 'eval_samples_per_second': 172.33,\n", " 'eval_steps_per_second': 21.638,\n", " 'epoch': 3.0}" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainer.evaluate()" ] }, { "cell_type": "code", "execution_count": 40, "id": "1e6538eb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('spam-classifier\\\\tokenizer_config.json',\n", " 'spam-classifier\\\\special_tokens_map.json',\n", " 'spam-classifier\\\\vocab.txt',\n", " 'spam-classifier\\\\added_tokens.json',\n", " 'spam-classifier\\\\tokenizer.json')" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainer.save_model(\"spam-ham-classification\")\n", "tokenizer.save_pretrained(\"spam-classifier\")" ] }, { "cell_type": "code", "execution_count": null, "id": "99dbfb57", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 5 }