{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "words = open('names.txt', 'r').read().splitlines()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "\n",
    "N = torch.zeros((27, 27), dtype = torch.int32)\n",
    "\n",
    "chars = sorted(list(set(''.join(words))))\n",
    "\n",
    "stoi = {s:i+1 for i,s in enumerate(chars)}\n",
    "stoi['.'] = 0\n",
    "\n",
    "itos = {i:s for s,i in stoi.items()}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "P = N.float()\n",
    "P /= P.sum(1, keepdim=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ". e\n",
      "e m\n",
      "m m\n",
      "m a\n",
      "a .\n"
     ]
    }
   ],
   "source": [
    "#Creating the training set of bigrams (x,y)\n",
    "xs, ys = [], []\n",
    "\n",
    "for word in words[:1]:\n",
    "    chs = ['.'] + list(word) + ['.']\n",
    "    for ch1, ch2 in zip(chs, chs[1:]):\n",
    "        ix1 = stoi[ch1]\n",
    "        ix2 = stoi[ch2]\n",
    "        print(ch1, ch2)\n",
    "        xs.append(ix1)\n",
    "        ys.append(ix2)\n",
    "\n",
    "xs = torch.tensor(xs)\n",
    "ys = torch.tensor(ys)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Feeding these examples into a neural network\n",
    "import torch.nn.functional as F"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#<=========OPTIMIZATION============>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([ 0,  5, 13, 13,  1])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([ 5, 13, 13,  1,  0])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# randomly initialize 27 neurons' weights. each neuron receives 27 inputs\n",
    "g = torch.Generator().manual_seed(2147483647)\n",
    "W = torch.randn((27, 27), generator=g, requires_grad=True) #Adding the third parameter here for the Backward pass (as remember in micrograd we had done the same thing)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "#FORWARD PASS\n",
    "xenc = F.one_hot(xs, num_classes=27).float() # input to the network: one-hot encoding\n",
    "logits = xenc @ W # predict log-counts\n",
    "counts = logits.exp() # counts, equivalent to N\n",
    "probs = counts / counts.sum(1, keepdims=True) # probabilities for next character\n",
    "loss = -probs[torch.arange(5), ys].log().mean() #torch.arange(5) is basically 0 to 5(4) position, ys is from that tuple list | We calculate the probability values of that | Then we take their log values | Then we take their mean | Finally take the negative value (since NLL)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(3.7693)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "loss #This will be similar to the one we also calculated in the SUMMARY part of B-Main"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "#BACKWARD PASS\n",
    "W.grad = None #the gradient is first set to zero\n",
    "loss.backward()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([27, 27])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "W.grad.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "W.grad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#UPDATE\n",
    "W.data += -0.1 * W.grad"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "--------------"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#JUST PUTTING THEM TOGETHER TO PERFORM GRADIENT DESCENT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#ONLY RUN THIS THE FIRST TIME\n",
    "# randomly initialize 27 neurons' weights. each neuron receives 27 inputs\n",
    "g = torch.Generator().manual_seed(2147483647)\n",
    "W = torch.randn((27, 27), generator=g, requires_grad=True) #Adding the third parameter here for the Backward pass (as remember in micrograd we had done the same thing)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "#FORWARD PASS\n",
    "xenc = F.one_hot(xs, num_classes=27).float() # input to the network: one-hot encoding\n",
    "logits = xenc @ W # predict log-counts\n",
    "counts = logits.exp() # counts, equivalent to N\n",
    "probs = counts / counts.sum(1, keepdims=True) # probabilities for next character\n",
    "loss = -probs[torch.arange(5), ys].log().mean() #torch.arange(5) is basically 0 to 5(4) position, ys is from that tuple list | We calculate the probability values of that | Then we take their log values | Then we take their mean | Finally take the negative value (since NLL)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3.6891887187957764\n"
     ]
    }
   ],
   "source": [
    "print(loss.item()) #CHECKING THE LOSS VALUE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "#BACKWARD PASS\n",
    "W.grad = None #the gradient is first set to zero\n",
    "loss.backward()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "#UPDATE\n",
    "W.data += -0.1 * W.grad"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Yay, that worked. Noice"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "----------------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---------------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### **PUTTING THEM ALL TOGETHER**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of examples:  228146\n"
     ]
    }
   ],
   "source": [
    "# create the dataset\n",
    "xs, ys = [], []\n",
    "for w in words:\n",
    "  chs = ['.'] + list(w) + ['.']\n",
    "  for ch1, ch2 in zip(chs, chs[1:]):\n",
    "    ix1 = stoi[ch1]\n",
    "    ix2 = stoi[ch2]\n",
    "    xs.append(ix1)\n",
    "    ys.append(ix2)\n",
    "xs = torch.tensor(xs)\n",
    "ys = torch.tensor(ys)\n",
    "num = xs.nelement()\n",
    "print('number of examples: ', num)\n",
    "\n",
    "# initialize the 'network'\n",
    "g = torch.Generator().manual_seed(2147483647)\n",
    "W = torch.randn((27, 27), generator=g, requires_grad=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3.7686190605163574\n",
      "3.378804922103882\n",
      "3.1610896587371826\n",
      "3.0271859169006348\n",
      "2.9344847202301025\n",
      "2.867231607437134\n",
      "2.816654920578003\n",
      "2.777147054672241\n",
      "2.7452545166015625\n",
      "2.7188305854797363\n",
      "2.6965057849884033\n",
      "2.6773722171783447\n",
      "2.6608052253723145\n",
      "2.6463513374328613\n",
      "2.633665084838867\n",
      "2.622471332550049\n",
      "2.6125471591949463\n",
      "2.6037065982818604\n",
      "2.595794439315796\n",
      "2.5886802673339844\n"
     ]
    }
   ],
   "source": [
    "# gradient descent\n",
    "for k in range(20):\n",
    "  \n",
    "  # forward pass\n",
    "  xenc = F.one_hot(xs, num_classes=27).float() # input to the network: one-hot encoding\n",
    "  logits = xenc @ W # predict log-counts\n",
    "  counts = logits.exp() # counts, equivalent to N\n",
    "  probs = counts / counts.sum(1, keepdims=True) # probabilities for next character\n",
    "  loss = -probs[torch.arange(num), ys].log().mean() + 0.01*(W**2).mean()\n",
    "  print(loss.item())\n",
    "  \n",
    "  # backward pass\n",
    "  W.grad = None # set to zero the gradient\n",
    "  loss.backward()\n",
    "  \n",
    "  # update\n",
    "  W.data += -50 * W.grad"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "SO WE ALMOST ACHIEVED A VERY LOW LOSS VALUE. SIMILAR TO THE LOSS VALUE WE CALCULATED IN A-MAIN, WHEN WE TYPED OUR OWN NAME AND SAW HOW IT PERFORMS"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "--------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "--------------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Finally *drumrolls*, we are going to see how sampling from this model produces the outputs (Spoiler alert: it will be the same as how we made the model manually, coz... it is the same model just that we made it using Neural nets)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "juwjde.\n",
      "janaqah.\n",
      "pxzfby.\n",
      "a.\n",
      "nn.\n"
     ]
    }
   ],
   "source": [
    "# finally, sample from the 'neural net' model\n",
    "g = torch.Generator().manual_seed(2147483647)\n",
    "\n",
    "for i in range(5):\n",
    "  \n",
    "  out = []\n",
    "  ix = 0\n",
    "  while True:\n",
    "    \n",
    "    # ----------\n",
    "    # BEFORE:\n",
    "    #p = P[ix]\n",
    "    # ----------\n",
    "    # NOW:\n",
    "    xenc = F.one_hot(torch.tensor([ix]), num_classes=27).float()\n",
    "    logits = xenc @ W # predict log-counts\n",
    "    counts = logits.exp() # counts, equivalent to N\n",
    "    p = counts / counts.sum(1, keepdims=True) # probabilities for next character\n",
    "    # ----------\n",
    "    \n",
    "    ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()\n",
    "    out.append(itos[ix])\n",
    "    if ix == 0:\n",
    "      break\n",
    "  print(''.join(out))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "--------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---------"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}