{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Machine Translation Project (English to Spanish)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pathlib\n", "import random\n", "import string\n", "import tensorflow.strings as tf_strings\n", "import tensorflow.data as tf_data\n", "import re\n", "from keras.layers import TextVectorization\n", "import keras\n", "import tensorflow as tf\n", "from keras import layers\n", "import json" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Verify access to the GPU" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[name: \"/device:CPU:0\"\n", "device_type: \"CPU\"\n", "memory_limit: 268435456\n", "locality {\n", "}\n", "incarnation: 11075625745611853481\n", "xla_global_id: -1\n", ", name: \"/device:GPU:0\"\n", "device_type: \"GPU\"\n", "memory_limit: 1733715559\n", "locality {\n", " bus_id: 1\n", " links {\n", " }\n", "}\n", "incarnation: 17906485139926134931\n", "physical_device_desc: \"device: 0, name: NVIDIA GeForce RTX 2050, pci bus id: 0000:01:00.0, compute capability: 8.6\"\n", "xla_global_id: 416903419\n", "]\n" ] } ], "source": [ "from tensorflow.python.client import device_lib\n", "print(device_lib.list_local_devices())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Download and prepare the data\n", "source :\"http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "text_file = keras.utils.get_file(\n", " fname = \"spa-eng.zip\",\n", " origin = \"http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip\",\n", " extract = True,\n", ")\n", "\n", "text_file = pathlib.Path(text_file).parent / \"spa-eng\" / \"spa.txt\"\n", "\n", "with open(text_file, \"r\") as f:\n", " lines = f.read().split(\"\\n\")[:-1]\n", " \n", "text_pairs = []\n", "\n", "for line in lines:\n", " eng, spa = line.split(\"\\t\")\n", " spa = \"[start] \" + spa + \" [end]\"\n", " text_pairs.append((eng, spa))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "random.shuffle(text_pairs)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Tom came here on his own.', '[start] Tom vino aquí por sí solo. [end]')\n", "(\"I don't want Tom to see me naked.\", '[start] No quiero que Tom me vea desnuda. [end]')\n", "('Stand back!', '[start] ¡Retrocede! [end]')\n", "(\"He's quite formal when he meets a stranger.\", '[start] Es muy ceremonioso cuando le presentan una persona desconocida. [end]')\n", "(\"I'm stuck in a traffic jam.\", '[start] Estoy atrapado en un atasco. [end]')\n" ] } ], "source": [ "for i in range(5):\n", " print(text_pairs[i])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Structure of the Dataset" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "118964 total pairs\n", "83276 training pairs\n", "17844 validation pairs\n", "17844 test pairs\n" ] } ], "source": [ "num_val_samples = int(0.15 * len(text_pairs))\n", "num_train_samples = len(text_pairs) - 2 * num_val_samples\n", "train_pairs = text_pairs[:num_train_samples]\n", "val_pairs = text_pairs[num_train_samples:num_train_samples + num_val_samples]\n", "test_pairs = text_pairs[num_train_samples + num_val_samples:]\n", "\n", "print(f\"{len(text_pairs)} total pairs\")\n", "print(f\"{len(train_pairs)} training pairs\")\n", "print(f\"{len(val_pairs)} validation pairs\")\n", "print(f\"{len(test_pairs)} test pairs\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# parameters\n", "strip_chars = string.punctuation + \"¿\"\n", "strip_chars = strip_chars.replace(\"[\", \"\")\n", "strip_chars = strip_chars.replace(\"]\", \"\")\n", "\n", "vocab_size = 15000\n", "sequence_length = 20\n", "batch_size = 64" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Vectorize the data" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def custom_standardization(input_string):\n", " lowercase = tf_strings.lower(input_string)\n", " return tf_strings.regex_replace(lowercase, f\"[{re.escape(strip_chars)}]\", \"\")\n", "\n", "# vectorization\n", "eng_vectorization = TextVectorization(\n", " max_tokens = vocab_size,\n", " output_mode = \"int\",\n", " output_sequence_length = sequence_length,\n", ")\n", "\n", "spa_vectorization = TextVectorization(\n", " max_tokens = vocab_size,\n", " output_mode = \"int\",\n", " output_sequence_length = sequence_length + 1,\n", " standardize = custom_standardization,\n", ")\n", "\n", "train_eng_texts = [pair[0] for pair in train_pairs]\n", "train_spa_texts = [pair[1] for pair in train_pairs]\n", "\n", "eng_vectorization.adapt(train_eng_texts)\n", "spa_vectorization.adapt(train_spa_texts)\n", "\n", "#save the vectorization layers\n", "eng_vectorization_config = eng_vectorization.get_config()\n", "eng_vectorization_config.pop('standardize', None)\n", "eng_vocab = eng_vectorization.get_vocabulary()\n", "with open('eng_vectorization_config.json', 'w', encoding='utf-8') as f:\n", " json.dump(eng_vectorization_config, f)\n", " \n", "with open('eng_vocab.json', 'w', encoding='utf-8') as f:\n", " json.dump(eng_vocab, f)\n", " \n", "spa_vectorization_config = spa_vectorization.get_config()\n", "spa_vectorization_config.pop('standardize', None)\n", "spa_vocab = spa_vectorization.get_vocabulary()\n", "with open('spa_vectorization_config.json', 'w', encoding='utf-8') as f:\n", " json.dump(spa_vectorization_config, f)\n", " \n", "with open('spa_vocab.json', 'w', encoding='utf-8') as f:\n", " json.dump(spa_vocab, f)\n", " \n", "\n", "def format_dataset(eng, spa):\n", " eng = eng_vectorization(eng)\n", " spa = spa_vectorization(spa)\n", " return (\n", " {\n", " \"encoder_inputs\": eng,\n", " \"decoder_inputs\": spa[:, :-1],\n", " },\n", " spa[:, 1:],\n", " )\n", " \n", "def make_dataset(pairs):\n", " eng_texts, spa_texts = zip(*pairs)\n", " eng_texts = list(eng_texts)\n", " spa_texts = list(spa_texts)\n", " dataset = tf_data.Dataset.from_tensor_slices((eng_texts, spa_texts))\n", " dataset = dataset.batch(batch_size)\n", " dataset = dataset.map(format_dataset)\n", " return dataset.cache().shuffle(2048).prefetch(16)\n", "\n", "train_ds = make_dataset(train_pairs)\n", "val_ds = make_dataset(val_pairs)\n", " " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(64, 20)\n", "(64, 20)\n" ] } ], "source": [ "for inputs,targets in train_ds.take(1):\n", " print(inputs[\"encoder_inputs\"].shape)\n", " print(targets.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Model Architecture\n", "![Encoder-Decoder](images/encoder-decoder-context.png)\n", "![Encoder-Decoder](images/encoder-decoder-translation.png)\n", "![Attention Mechanism](images/attention.png)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# Creating an Encoder\n", "class TransformerEncoder(layers.Layer):\n", " def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):\n", " super().__init__(**kwargs)\n", " self.embed_dim = embed_dim\n", " self.dense_dim = dense_dim\n", " self.num_heads = num_heads\n", " self.attention = layers.MultiHeadAttention(\n", " num_heads = num_heads, key_dim = embed_dim\n", " )\n", " self.dense_proj = keras.Sequential(\n", " [\n", " layers.Dense(dense_dim, activation = \"relu\"),\n", " layers.Dense(embed_dim),\n", " ]\n", " )\n", " self.layernorm_1 = layers.LayerNormalization()\n", " self.layernorm_2 = layers.LayerNormalization()\n", " self.supports_masking = True\n", " \n", " def call(self, inputs, mask=None):\n", " if mask is not None:\n", " padding_mask = tf.cast(mask[:, None, :], dtype = tf.int32)\n", " else:\n", " padding_mask = None\n", " \n", " attention_output = self.attention(\n", " query = inputs,\n", " value = inputs,\n", " key = inputs,\n", " attention_mask = padding_mask,\n", " )\n", " proj_input = self.layernorm_1(inputs + attention_output)\n", " proj_output = self.dense_proj(proj_input)\n", " return self.layernorm_2(proj_input + proj_output)\n", " \n", " def get_config(self):\n", " config = super().get_config()\n", " config.update({\n", " \"embed_dim\": self.embed_dim,\n", " \"dense_dim\": self.dense_dim,\n", " \"num_heads\": self.num_heads,\n", " })\n", " return config\n", " \n", "# Creating a Positional Embedding\n", "class PositionalEmbedding(layers.Layer):\n", " def __init__(self, sequence_length, vocab_size, embed_dim, **kwargs):\n", " super().__init__(**kwargs)\n", " self.token_embeddings = layers.Embedding(\n", " input_dim = vocab_size, output_dim = embed_dim\n", " )\n", " self.position_embeddings = layers.Embedding(\n", " input_dim = sequence_length, output_dim = embed_dim\n", " )\n", " self.sequence_length = sequence_length\n", " self.vocab_size = vocab_size\n", " self.embed_dim = embed_dim\n", " \n", " def call(self, inputs):\n", " length = tf.shape(inputs)[-1]\n", " positions = tf.range(start = 0, limit = length, delta = 1)\n", " embedded_tokens = self.token_embeddings(inputs)\n", " embedded_positions = self.position_embeddings(positions)\n", " return embedded_tokens + embedded_positions\n", " \n", " def compute_mask(self, inputs, mask=None):\n", " if mask is not None:\n", " return tf.not_equal(inputs, 0)\n", " else:\n", " return None\n", " \n", " def get_config(self):\n", " config = super().get_config()\n", " config.update({\n", " \"vocab_size\": self.vocab_size,\n", " \"sequence_length\": self.sequence_length,\n", " \"embed_dim\": self.embed_dim,\n", " })\n", " return config\n", " \n", "# Creating a Decoder\n", "class TransformerDecoder(layers.Layer):\n", " def __init__(self, embed_dim, latent_dim, num_heads, **kwargs):\n", " super().__init__(**kwargs)\n", " self.embed_dim = embed_dim\n", " self.latent_dim = latent_dim\n", " self.num_heads = num_heads\n", " self.attention_1 = layers.MultiHeadAttention(\n", " num_heads = num_heads, key_dim = embed_dim\n", " )\n", " self.attention_2 = layers.MultiHeadAttention(\n", " num_heads = num_heads, key_dim = embed_dim\n", " )\n", " self.dense_proj = keras.Sequential(\n", " [\n", " layers.Dense(latent_dim, activation = \"relu\"),\n", " layers.Dense(embed_dim),\n", " ]\n", " )\n", " self.layernorm_1 = layers.LayerNormalization()\n", " self.layernorm_2 = layers.LayerNormalization()\n", " self.layernorm_3 = layers.LayerNormalization()\n", " self.supports_masking = True\n", " \n", " def call(self, inputs, encoder_outputs, mask=None):\n", " casual_mask = self.get_causal_attention_mask(inputs)\n", " if mask is not None:\n", " padding_mask = tf.cast(mask[:, None, :], dtype = tf.int32)\n", " padding_mask = tf.minimum(padding_mask, casual_mask)\n", " else:\n", " padding_mask = None\n", " \n", " attention_output_1 = self.attention_1(\n", " query = inputs,\n", " value = inputs,\n", " key = inputs,\n", " attention_mask = casual_mask,\n", " )\n", " out_1 = self.layernorm_1(inputs + attention_output_1)\n", " \n", " attention_output_2 = self.attention_2(\n", " query = out_1,\n", " value = encoder_outputs,\n", " key = encoder_outputs,\n", " attention_mask = padding_mask,\n", " )\n", " \n", " out_2 = self.layernorm_2(out_1 + attention_output_2)\n", " proj_output = self.dense_proj(out_2)\n", " \n", " return self.layernorm_3(out_2 + proj_output)\n", " \n", " def get_causal_attention_mask(self, inputs):\n", " input_shape = tf.shape(inputs)\n", " batch_size, sequence_length = input_shape[0], input_shape[1]\n", " i = tf.range(sequence_length)[:, None]\n", " j = tf.range(sequence_length)\n", " mask = tf.cast(i >= j, tf.int32)\n", " mask = tf.reshape(mask,(1, input_shape[1], input_shape[1]))\n", " mult = tf.concat(\n", " [\n", " tf.expand_dims(batch_size, -1),\n", " tf.convert_to_tensor([1, 1]),\n", " ],\n", " axis = 0,\n", " )\n", " return tf.tile(mask, mult)\n", " \n", " def get_config(self):\n", " config = super().get_config()\n", " config.update({\n", " \"embed_dim\": self.embed_dim,\n", " \"latent_dim\": self.latent_dim,\n", " \"num_heads\": self.num_heads,\n", " })\n", " return config\n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# define emmbedding dimensions, latent dimensions, and number of heads\n", "embed_dim = 256\n", "latent_dim = 2048\n", "num_heads = 8\n", "\n", "#Encoder\n", "encoder_inputs = keras.Input(shape = (None,), dtype = \"int64\", name = \"encoder_inputs\")\n", "\n", "x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(encoder_inputs)\n", "\n", "encoder_outputs = TransformerEncoder(embed_dim, latent_dim, num_heads)(x)\n", "\n", "encoder = keras.Model(encoder_inputs, encoder_outputs, name = \"encoder\")\n", "\n", "#Decoder\n", "decoder_inputs = keras.Input(shape = (None,), dtype = \"int64\", name = \"decoder_inputs\")\n", "encoder_seq_inputs = keras.Input(shape = (None, embed_dim), name = \"encoder_seq_inputs\")\n", "\n", "x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(decoder_inputs)\n", "\n", "x = TransformerDecoder(embed_dim, latent_dim, num_heads)(x, encoder_seq_inputs)\n", "\n", "x = layers.Dropout(0.5)(x)\n", "\n", "decoder_outputs = layers.Dense(vocab_size, activation = \"softmax\")(x)\n", "\n", "decoder = keras.Model([decoder_inputs, encoder_seq_inputs], decoder_outputs, name = \"decoder\")\n", "\n", "# Define the final model\n", "decoder_outputs = decoder([decoder_inputs, encoder_outputs])\n", "\n", "transformer = keras.Model(\n", " [encoder_inputs, decoder_inputs], decoder_outputs, name = \"transformer\"\n", ")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"transformer\"\n", "__________________________________________________________________________________________________\n", " Layer (type) Output Shape Param # Connected to \n", "==================================================================================================\n", " encoder_inputs (InputLayer) [(None, None)] 0 [] \n", " \n", " positional_embedding (Position (None, None, 256) 3845120 ['encoder_inputs[0][0]'] \n", " alEmbedding) \n", " \n", " decoder_inputs (InputLayer) [(None, None)] 0 [] \n", " \n", " transformer_encoder (Transform (None, None, 256) 3155456 ['positional_embedding[0][0]'] \n", " erEncoder) \n", " \n", " decoder (Functional) (None, None, 15000) 12959640 ['decoder_inputs[0][0]', \n", " 'transformer_encoder[0][0]'] \n", " \n", "==================================================================================================\n", "Total params: 19,960,216\n", "Trainable params: 19,960,216\n", "Non-trainable params: 0\n", "__________________________________________________________________________________________________\n", "Epoch 1/20\n", "1302/1302 [==============================] - 186s 138ms/step - loss: 1.9811 - accuracy: 0.7206 - val_loss: 1.6848 - val_accuracy: 0.7416\n", "Epoch 2/20\n", "1302/1302 [==============================] - 179s 137ms/step - loss: 1.7133 - accuracy: 0.7508 - val_loss: 1.5721 - val_accuracy: 0.7638\n", "Epoch 3/20\n", "1302/1302 [==============================] - 178s 136ms/step - loss: 1.5370 - accuracy: 0.7759 - val_loss: 1.3673 - val_accuracy: 0.7908\n", "Epoch 4/20\n", "1302/1302 [==============================] - 178s 137ms/step - loss: 1.3938 - accuracy: 0.7957 - val_loss: 1.2725 - val_accuracy: 0.8077\n", "Epoch 5/20\n", "1302/1302 [==============================] - 177s 136ms/step - loss: 1.2863 - accuracy: 0.8128 - val_loss: 1.2105 - val_accuracy: 0.8216\n", "Epoch 6/20\n", "1302/1302 [==============================] - 178s 137ms/step - loss: 1.2217 - accuracy: 0.8268 - val_loss: 1.1603 - val_accuracy: 0.8325\n", "Epoch 7/20\n", "1302/1302 [==============================] - 178s 137ms/step - loss: 1.1745 - accuracy: 0.8369 - val_loss: 1.1168 - val_accuracy: 0.8407\n", "Epoch 8/20\n", "1302/1302 [==============================] - 178s 136ms/step - loss: 1.1379 - accuracy: 0.8448 - val_loss: 1.1005 - val_accuracy: 0.8429\n", "Epoch 9/20\n", "1302/1302 [==============================] - 177s 136ms/step - loss: 1.1092 - accuracy: 0.8506 - val_loss: 1.0816 - val_accuracy: 0.8485\n", "Epoch 10/20\n", "1302/1302 [==============================] - 178s 136ms/step - loss: 1.0849 - accuracy: 0.8552 - val_loss: 1.0628 - val_accuracy: 0.8515\n", "Epoch 11/20\n", "1302/1302 [==============================] - 177s 136ms/step - loss: 1.0652 - accuracy: 0.8590 - val_loss: 1.0541 - val_accuracy: 0.8510\n", "Epoch 12/20\n", "1302/1302 [==============================] - 178s 136ms/step - loss: 1.0469 - accuracy: 0.8622 - val_loss: 1.0370 - val_accuracy: 0.8556\n", "Epoch 13/20\n", "1302/1302 [==============================] - 178s 136ms/step - loss: 1.0317 - accuracy: 0.8649 - val_loss: 1.0275 - val_accuracy: 0.8577\n", "Epoch 14/20\n", "1302/1302 [==============================] - 178s 136ms/step - loss: 1.0163 - accuracy: 0.8674 - val_loss: 1.0242 - val_accuracy: 0.8584\n", "Epoch 15/20\n", "1302/1302 [==============================] - 178s 136ms/step - loss: 1.0016 - accuracy: 0.8696 - val_loss: 1.0176 - val_accuracy: 0.8602\n", "Epoch 16/20\n", "1302/1302 [==============================] - 178s 136ms/step - loss: 0.9884 - accuracy: 0.8717 - val_loss: 1.0102 - val_accuracy: 0.8606\n", "Epoch 17/20\n", "1302/1302 [==============================] - 178s 136ms/step - loss: 0.9767 - accuracy: 0.8736 - val_loss: 1.0234 - val_accuracy: 0.8591\n", "Epoch 18/20\n", "1302/1302 [==============================] - 178s 136ms/step - loss: 0.9665 - accuracy: 0.8752 - val_loss: 0.9950 - val_accuracy: 0.8634\n", "Epoch 19/20\n", "1302/1302 [==============================] - 178s 136ms/step - loss: 0.9549 - accuracy: 0.8771 - val_loss: 1.0000 - val_accuracy: 0.8621\n", "Epoch 20/20\n", "1302/1302 [==============================] - 178s 136ms/step - loss: 0.9456 - accuracy: 0.8783 - val_loss: 1.0106 - val_accuracy: 0.8617\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "epochs = 20\n", "\n", "transformer.summary()\n", "\n", "transformer.compile(\n", " \"rmsprop\", loss = \"sparse_categorical_crossentropy\", metrics = [\"accuracy\"]\n", ")\n", "\n", "transformer.fit(train_ds, epochs = epochs, validation_data = val_ds)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING:absl:Found untraced functions such as embedding_layer_call_fn, embedding_layer_call_and_return_conditional_losses, embedding_1_layer_call_fn, embedding_1_layer_call_and_return_conditional_losses, multi_head_attention_layer_call_fn while saving (showing 5 of 60). These functions will not be directly callable after loading.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: transformer_model\\assets\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: transformer_model\\assets\n" ] } ], "source": [ "transformer.save(\"transformer_model\")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "input: her days are numbered\n", "translated: [start] sus días están en el [UNK] [end]\n", "\n", "input: i heard the door close\n", "translated: [start] he oído la puerta [end]\n", "\n", "input: you go first\n", "translated: [start] vas al primero [end]\n", "\n", "input: you used to look up to your father\n", "translated: [start] te [UNK] a tu padre [end]\n", "\n", "input: i am acquainted with the chairman of the committee\n", "translated: [start] estoy en el [UNK] de la [UNK] [end]\n", "\n" ] } ], "source": [ "spa_vocab = spa_vectorization.get_vocabulary()\n", "spa_index_lookup = dict(zip(range(len(spa_vocab)), spa_vocab))\n", "max_decoded_sentence_length = sequence_length\n", "\n", "def decode_sentence(input_sentence):\n", " tokenized_input_sentence = eng_vectorization([input_sentence])\n", " decoded_sentence = \"[start]\"\n", " for i in range(max_decoded_sentence_length):\n", " tokenized_target_sentence = spa_vectorization([decoded_sentence])[:, :-1]\n", " predictions = transformer([tokenized_input_sentence, tokenized_target_sentence])\n", " sampled_token_index = tf.argmax(predictions[0, i, :]).numpy().item(0)\n", " sampled_token = spa_index_lookup[sampled_token_index]\n", " decoded_sentence += \" \" + sampled_token\n", " if sampled_token == \"[end]\":\n", " break\n", " return decoded_sentence\n", "\n", "test_eng_texts = [pair[0] for pair in test_pairs]\n", "for _ in range(5):\n", " input_sentence = random.choice(test_eng_texts)\n", " input_sentence = input_sentence.lower()\n", " input_sentence = input_sentence.translate(str.maketrans('', '', strip_chars))\n", " translated = decode_sentence(input_sentence)\n", " print(f\"input: {input_sentence}\")\n", " print(f\"translated: {translated}\")\n", " print()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.19" } }, "nbformat": 4, "nbformat_minor": 2 }