{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "# Load the tokens into the colab\n", "!git clone https://huggingface.co/datasets/codeShare/sd_tokens\n", "import torch\n", "from torch import linalg as LA\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", "%cd /content/sd_tokens\n", "token = torch.load('sd15_tensors.pt', map_location=device, weights_only=True)" ], "metadata": { "id": "Ch9puvwKH1s3" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "print(token[100].shape) #dimension of the tokens" ], "metadata": { "id": "S_Yh9gH_OUA1" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "def absolute_value(x):\n", " return max(x, -x)\n", "\n", "def similarity(id_A , id_B):\n", " #Tensors\n", " A = token[id_A]\n", " B = token[id_B]\n", "\n", " #Tensor vector length (2nd order, i.e (a^2 + b^2 + ....)^(1/2)\n", " _A = LA.vector_norm(A, ord=2)\n", " _B = LA.vector_norm(B, ord=2)\n", "\n", " result = torch.dot(A,B)/(_A*_B)\n", " similarity_pcnt = absolute_value(result.item()*100)\n", "\n", " similarity_pcnt_aprox = round(similarity_pcnt, 3)\n", "\n", " result = f'{similarity_pcnt_aprox} %'\n", "\n", " return result" ], "metadata": { "id": "fxquCxFaUxAZ" }, "execution_count": 16, "outputs": [] }, { "cell_type": "markdown", "source": [ "Valid ID ranges for id_for_token_A / id_for_token_B are between 0 and 49407" ], "metadata": { "id": "kX72bAuhOtlT" } }, { "cell_type": "code", "source": [ "id_for_token_A = 4567 # @param {type:'number'}\n", "id_for_token_B = 4343 # @param {type:'number'}\n", "\n", "similarity_str = 'The similarity between tokens A and B is ' + similarity(id_for_token_A , id_for_token_B)\n", "\n", "print(similarity_str)" ], "metadata": { "id": "MwmOdC9cNZty" }, "execution_count": null, "outputs": [] } ] }