{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4", "authorship_tag": "ABX9TyO5MrQiVjuL4OLj45xQoPv8", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "8Bf8SH8NYnz1", "outputId": "61d6ba24-b65f-4233-a65b-a3d7c391fb71" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mon Jun 5 07:10:02 2023 \n", "+-----------------------------------------------------------------------------+\n", "| NVIDIA-SMI 525.85.12 Driver Version: 525.85.12 CUDA Version: 12.0 |\n", "|-------------------------------+----------------------+----------------------+\n", "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|===============================+======================+======================|\n", "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", "| N/A 65C P8 11W / 70W | 0MiB / 15360MiB | 0% Default |\n", "| | | N/A |\n", "+-------------------------------+----------------------+----------------------+\n", " \n", "+-----------------------------------------------------------------------------+\n", "| Processes: |\n", "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=============================================================================|\n", "| No running processes found |\n", "+-----------------------------------------------------------------------------+\n" ] } ], "source": [ "!nvidia-smi" ] }, { "cell_type": "code", "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "A-kDhy1FZC5w", "outputId": "944edefa-378c-4337-bc3b-68053e7769e8" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Defining the path for the trained and saved SentenceTransformer model to produce encodings of the scraped dataset." ], "metadata": { "id": "rLy_4gxPZbPa" } }, { "cell_type": "code", "source": [ "model_path = '/content/drive/My Drive/trained_Model/sentence_transformer_nepali' \n", "csv_path = '/content/drive/MyDrive/Datasets/3k_News.csv'" ], "metadata": { "id": "DAWpVJCkZUya" }, "execution_count": 3, "outputs": [] }, { "cell_type": "code", "source": [ "import pandas as pd" ], "metadata": { "id": "7IQXU3luZ-9w" }, "execution_count": 4, "outputs": [] }, { "cell_type": "code", "source": [ "df = pd.read_csv(csv_path, index_col=0)\n", "df" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "pXORmeY3aBfV", "outputId": "38cc5eb0-d642-4acc-a696-1159d4daedda" }, "execution_count": 19, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " id title \\\n", "0 0 काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ? \n", "1 1 थाइराइडका बिरामीले के खाने, के नखाने ? \n", "2 2 बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के... \n", "3 3 फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु \n", "4 4 जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ... \n", "... ... ... \n", "3852 3852 ज्येष्ठ सदस्य जबरासहित ११ सांसदले बुझाएनन् सम्... \n", "3853 3853 गुल्मीमा बिभिन्न कार्यक्रम गरेर ४१ औं मगर दिवस... \n", "3854 3854 कास्कीमा ६ महिनामै बलात्कारका ३५ उजुरी \n", "3855 3855 प्रज्ञा प्रतिष्ठानका सदस्यले दोहोरो सुविधा नपाउने \n", "3856 3856 सिसडोलमा फोहोर फाल्ने स्वास्थ्य संस्थालाई महा... \n", "\n", " article \\\n", "0 चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ... \n", "1 काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न... \n", "2 सामान्य बच्चाको तुलनामा समयअगावै जन्मिएका बच्... \n", "3 १२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह... \n", "4 १२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ... \n", "... ... \n", "3852 १५ फागुन, काठमाडौं । प्रतिनिधिसभाका ११ सदस्यल... \n", "3853 १५ फागुन, गुल्मी । गुल्मी जिल्ला सदरमुकाम तम्... \n", "3854 १५ फागुन, पोखरा । पोखराको लेकसाइड, शान्तिनगरब... \n", "3855 १५ फागुन, काठमाडौं । नेपाल प्रज्ञा प्रतिष्ठान... \n", "3856 १५ फागुन, काठमाडौं । काठमाडौं महानगरपालिकाले ... \n", "\n", " link \n", "0 https://www.onlinekhabar.com/2023/05/1312396 \n", "1 https://www.onlinekhabar.com/2023/05/1312323 \n", "2 https://www.onlinekhabar.com/2023/05/1312266 \n", "3 https://www.onlinekhabar.com/2023/05/1312637 \n", "4 https://www.onlinekhabar.com/2023/05/1312564 \n", "... ... \n", "3852 https://www.onlinekhabar.com/2023/02/1269914 \n", "3853 https://www.onlinekhabar.com/2023/02/1269908 \n", "3854 https://www.onlinekhabar.com/2023/02/1269895 \n", "3855 https://www.onlinekhabar.com/2023/02/1269881 \n", "3856 https://www.onlinekhabar.com/2023/02/1269863 \n", "\n", "[3857 rows x 4 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idtitlearticlelink
00काँक्राका फाइदै-फाइदा, कुन समयमा खानु राम्रो ?चर्को गर्मीमा काँक्रा खानुको मज्जा नै बेग्लै ...https://www.onlinekhabar.com/2023/05/1312396
11थाइराइडका बिरामीले के खाने, के नखाने ?काठमाडौं । शरीरलाई राम्रोसँग काम गर्न विभिन्न...https://www.onlinekhabar.com/2023/05/1312323
22बच्चामा अन्धोपनको जोखिम बढाउने आरओपी समस्या के...सामान्य बच्चाको तुलनामा समयअगावै जन्मिएका बच्...https://www.onlinekhabar.com/2023/05/1312266
33फुङलिङमा सवारी दुर्घटनामा परी एक बालककाे मृत्यु१२ जेठ, ताप्लेजुङ। स्कार्पियाे दुर्घटनामा बिह...https://www.onlinekhabar.com/2023/05/1312637
44जोर्डनका युवराज र साउदी युवतीबीचको विवाह किन छ...१२ जेठ, काठमाडौं । जोर्डनका युवराज हुसेन बिन ...https://www.onlinekhabar.com/2023/05/1312564
...............
38523852ज्येष्ठ सदस्य जबरासहित ११ सांसदले बुझाएनन् सम्...१५ फागुन, काठमाडौं । प्रतिनिधिसभाका ११ सदस्यल...https://www.onlinekhabar.com/2023/02/1269914
38533853गुल्मीमा बिभिन्न कार्यक्रम गरेर ४१ औं मगर दिवस...१५ फागुन, गुल्मी । गुल्मी जिल्ला सदरमुकाम तम्...https://www.onlinekhabar.com/2023/02/1269908
38543854कास्कीमा ६ महिनामै बलात्कारका ३५ उजुरी१५ फागुन, पोखरा । पोखराको लेकसाइड, शान्तिनगरब...https://www.onlinekhabar.com/2023/02/1269895
38553855प्रज्ञा प्रतिष्ठानका सदस्यले दोहोरो सुविधा नपाउने१५ फागुन, काठमाडौं । नेपाल प्रज्ञा प्रतिष्ठान...https://www.onlinekhabar.com/2023/02/1269881
38563856सिसडोलमा फोहोर फाल्ने स्वास्थ्य संस्थालाई महा...१५ फागुन, काठमाडौं । काठमाडौं महानगरपालिकाले ...https://www.onlinekhabar.com/2023/02/1269863
\n", "

3857 rows × 4 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 19 } ] }, { "cell_type": "code", "source": [ "import torch \n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "device" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "aTtnTynzaGFc", "outputId": "30174b8d-bb3e-4408-d248-9a68f016ea9d" }, "execution_count": 6, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "device(type='cuda')" ] }, "metadata": {}, "execution_count": 6 } ] }, { "cell_type": "code", "source": [ "!pip install sentence_transformers" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "xonIK3n0asKX", "outputId": "31df9443-9ae7-4c41-f9c7-36a883018ef7" }, "execution_count": 7, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Collecting sentence_transformers\n", " Downloading sentence-transformers-2.2.2.tar.gz (85 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.0/86.0 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting transformers<5.0.0,>=4.6.0 (from sentence_transformers)\n", " Downloading transformers-4.29.2-py3-none-any.whl (7.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.1/7.1 MB\u001b[0m \u001b[31m85.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (4.65.0)\n", "Requirement already satisfied: torch>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (2.0.1+cu118)\n", "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (0.15.2+cu118)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.22.4)\n", "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.2.2)\n", "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.10.1)\n", "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (3.8.1)\n", "Collecting sentencepiece (from sentence_transformers)\n", " Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m87.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting huggingface-hub>=0.4.0 (from sentence_transformers)\n", " Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m33.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (3.12.0)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (2023.4.0)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (2.27.1)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (6.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (4.5.0)\n", "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (23.1)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (1.11.1)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (3.1)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (3.1.2)\n", "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (2.0.0)\n", "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.6.0->sentence_transformers) (3.25.2)\n", "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.6.0->sentence_transformers) (16.0.5)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (2022.10.31)\n", "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers<5.0.0,>=4.6.0->sentence_transformers)\n", " Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m118.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->sentence_transformers) (8.1.3)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->sentence_transformers) (1.2.0)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence_transformers) (3.1.0)\n", "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision->sentence_transformers) (8.4.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.6.0->sentence_transformers) (2.1.2)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (1.26.15)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (2022.12.7)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (2.0.12)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (3.4)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.6.0->sentence_transformers) (1.3.0)\n", "Building wheels for collected packages: sentence_transformers\n", " Building wheel for sentence_transformers (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for sentence_transformers: filename=sentence_transformers-2.2.2-py3-none-any.whl size=125926 sha256=1e2782dfecea84ab161a69e6c27c2aa8d5f446786217c3d9a8bdb35bece51686\n", " Stored in directory: /root/.cache/pip/wheels/62/f2/10/1e606fd5f02395388f74e7462910fe851042f97238cbbd902f\n", "Successfully built sentence_transformers\n", "Installing collected packages: tokenizers, sentencepiece, huggingface-hub, transformers, sentence_transformers\n", "Successfully installed huggingface-hub-0.15.1 sentence_transformers-2.2.2 sentencepiece-0.1.99 tokenizers-0.13.3 transformers-4.29.2\n" ] } ] }, { "cell_type": "code", "source": [ "from sentence_transformers import SentenceTransformer\n", "\n", "model = SentenceTransformer(model_path)\n", "model.to(device)\n", "model" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "uy5VBTrzaxur", "outputId": "d2db31d5-50fd-44d7-d346-7da6ec8653a9" }, "execution_count": 8, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "SentenceTransformer(\n", " (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel \n", " (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})\n", ")" ] }, "metadata": {}, "execution_count": 8 } ] }, { "cell_type": "code", "source": [ "embeddings = model.encode(df['article'])\n", "embeddings.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "URFuE0uHa7SV", "outputId": "e1f786a6-a442-4a76-8d5d-8efc47a730d5" }, "execution_count": 20, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(3857, 768)" ] }, "metadata": {}, "execution_count": 20 } ] }, { "cell_type": "code", "source": [ "from sklearn.metrics.pairwise import cosine_similarity\n", "\n", "cosine_similarities = cosine_similarity(embeddings, embeddings)\n", "\n", "cosine_similarities.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "MRuowI6xb9fS", "outputId": "25c16273-62c6-4cd7-afdb-c12cea2df943" }, "execution_count": 21, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(3857, 3857)" ] }, "metadata": {}, "execution_count": 21 } ] }, { "cell_type": "code", "source": [ "results = {}\n", "for idx, row in df.iterrows():\n", " similar_indices = cosine_similarities[idx].argsort()[:-100:-1]\n", " similar_items = [(cosine_similarities[idx][i], df['id'][i]) for i in similar_indices]\n", " results[row['id']] = similar_items[1:]\n", "print('done!')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2J0sw4Qmbmqg", "outputId": "592d7dd3-56f3-4ccb-fb00-3c426c98a05e" }, "execution_count": 22, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "done!\n" ] } ] }, { "cell_type": "code", "source": [ "similar_indices[:10]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lhWYDqOGbuXe", "outputId": "f46d88c3-54f2-45df-a936-4fd9ba39ad8f" }, "execution_count": 23, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([3856, 2090, 970, 998, 2451, 3485, 76, 667, 1058, 1663])" ] }, "metadata": {}, "execution_count": 23 } ] }, { "cell_type": "code", "source": [ "def item(id):\n", " return df.loc[df['id'] == id]['title'].tolist()[0].split(' - ')[0]\n", "\n", "# Just reads the results out of the dictionary.\n", "def recommend(item_id, num):\n", " print(\"Recommending \" + str(num) + \" products similar to \" + item(item_id) + \"...\")\n", " print(\"-------\")\n", " recs = results[item_id][:num]\n", " for rec in recs:\n", " print(\"Recommended : \" + item(rec[1]) + \" (score:\" + str(rec[0]) + \")\",end='\\n\\n')\n", "\n", "recommend(item_id=10, num=10)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "t_FFV5w8cPd0", "outputId": "fd95c622-2ebd-46cd-a933-e7e6d37e6a2d" }, "execution_count": 24, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Recommending 10 products similar to जनकपुर ११ का वडाध्यक्ष र वडासचिवविरुद्ध भ्रष्टाचारको मुद्दा...\n", "-------\n", "Recommended : शुद्धोधन-७ का वडा अध्यक्ष ५० हजार घुससहित पक्राउ (score:0.8842877)\n", "\n", "Recommended : नक्कली परीक्षार्थी राखेर एसईई दिएको आरोपमा मेयरविरुद्ध पक्राउ पुर्जी (score:0.88426626)\n", "\n", "Recommended : वृद्धभत्ताको रकम हिनामिनाको आरोपमा वडासचिव विरुद्ध भ्रष्टाचार मुद्दा (score:0.87886274)\n", "\n", "Recommended : सप्तरीको शम्भुनाथ नगरपालिकाका मेयरविरुद्ध भ्रष्टाचार मुद्दा (score:0.8761473)\n", "\n", "Recommended : नक्कली भुटानी शरणार्थी प्रकरणको अनुसन्धान प्रतिवेदन आज सरकारी वकिललाई बुझाइँदै (score:0.8713335)\n", "\n", "Recommended : १५ हजार घुस लिँदै गर्दा मालपोत बाराका खरदार र लेखापढी व्यवसायी पक्राउ (score:0.8681655)\n", "\n", "Recommended : बालिका बलात्कार अभियोग लागेका अनाथालय प्रमुख थुनामा पठाइए (score:0.8657491)\n", "\n", "Recommended : ढोरपाटनका मेयरलाई एमालेले गर्‍यो प्रदेश कमिटीबाट निलम्बन (score:0.86117494)\n", "\n", "Recommended : सिम्रौनगढका तत्कालीन प्रमुख प्रशासकीय अधिकृतसहित ३ जनाविरुद्ध भ्रष्टाचार मुद्दा दायर (score:0.86113524)\n", "\n", "Recommended : विश्व खाद्यले वितरण गर्ने खाद्यान्नको पूर्व परीक्षण अनिवार्य गर्न इन्सेकको माग (score:0.85944504)\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "def recomendation(idx,no_of_news_article):\n", " #get similarity values with other articles\n", " similarity_score = list(enumerate(cosine_similarities[idx]))\n", " similarity_score = sorted(similarity_score, key=lambda x: x[1], reverse=True)\n", " # Get the scores of the n most similar news articles. Ignore the first movie.\n", " similarity_score = similarity_score[1:no_of_news_article+1]\n", " \n", " print(\"Article Read -- \" + df['title'].iloc[idx] +\" link --\"+ df['link'].iloc[idx])\n", " print(\" ---------------------------------------------------------- \")\n", " news_indices = [i[0] for i in similarity_score]\n", " for i in range(len(news_indices)):\n", " print(\"Recomendation \"+ str(i+1)+\" --- \" +str(news_indices[i])+\"(IDX) \"+ df['title'].iloc[news_indices[i]] +\" || Link --\"+ df['link'].iloc[news_indices[i]] +\" score -- \"+ str(similarity_score[i][1]))\n", " print()" ], "metadata": { "id": "V79F2gOBcUGf" }, "execution_count": 25, "outputs": [] }, { "cell_type": "code", "source": [ "idx=10\n", "no_of_news_article=5\n", "recomendation(idx,no_of_news_article)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wJuC2D-acf0E", "outputId": "0a01f856-e6a8-429c-f011-1ec1354e053a" }, "execution_count": 27, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Article Read -- जनकपुर ११ का वडाध्यक्ष र वडासचिवविरुद्ध भ्रष्टाचारको मुद्दा link --https://www.onlinekhabar.com/2023/05/1312674\n", " ---------------------------------------------------------- \n", "Recomendation 1 --- 2368(IDX) शुद्धोधन-७ का वडा अध्यक्ष ५० हजार घुससहित पक्राउ || Link --https://www.onlinekhabar.com/2023/03/1284781 score -- 0.8842877\n", "\n", "Recomendation 2 --- 1982(IDX) नक्कली परीक्षार्थी राखेर एसईई दिएको आरोपमा मेयरविरुद्ध पक्राउ पुर्जी || Link --https://www.onlinekhabar.com/2023/04/1289031 score -- 0.88426626\n", "\n", "Recomendation 3 --- 2169(IDX) वृद्धभत्ताको रकम हिनामिनाको आरोपमा वडासचिव विरुद्ध भ्रष्टाचार मुद्दा || Link --https://www.onlinekhabar.com/2023/04/1286979 score -- 0.87886274\n", "\n", "Recomendation 4 --- 3779(IDX) सप्तरीको शम्भुनाथ नगरपालिकाका मेयरविरुद्ध भ्रष्टाचार मुद्दा || Link --https://www.onlinekhabar.com/2023/03/1270715 score -- 0.8761473\n", "\n", "Recomendation 5 --- 235(IDX) नक्कली भुटानी शरणार्थी प्रकरणको अनुसन्धान प्रतिवेदन आज सरकारी वकिललाई बुझाइँदै || Link --https://www.onlinekhabar.com/2023/05/1310396 score -- 0.8713335\n", "\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "jkbwgwJWckj-" }, "execution_count": null, "outputs": [] } ] }