{
"cells": [
{
"cell_type": "markdown",
"id": "1b95ba48",
"metadata": {
"id": "1b95ba48"
},
"source": [
"# Responsible Prompting\n",
"\n",
"## Recipe: Visualize Embeddings\n"
]
},
{
"cell_type": "markdown",
"id": "342f3b42-7d2b-4914-ac48-e01132744279",
"metadata": {
"id": "342f3b42-7d2b-4914-ac48-e01132744279"
},
"source": [
"### Imports"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "c5498911",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
},
"id": "c5498911",
"outputId": "9de2f7cc-3ea5-409c-de9d-46fd499289a9"
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import os\n",
"import os.path\n",
"import json\n",
"import re\n",
"import requests\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"from sklearn.manifold import TSNE\n",
"from sklearn.metrics.pairwise import cosine_similarity\n",
"# from umap import UMAP\n",
"# import tensorflow as tf\n",
"# from umap.parametric_umap import ParametricUMAP, load_ParametricUMAP\n",
"\n",
"import plotly.express as px\n",
"import plotly.offline as pyo\n",
"import plotly.io as pio\n",
"\n",
"pyo.init_notebook_mode(connected=True)\n",
"pio.templates.default = \"plotly\"\n",
"# pio.templates.default = \"plotly_white\"\n",
"# pio.templates.default = \"ggplot\"\n",
"# pio.templates.default = \"seaborn\""
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "kc4Emo9JB1VF",
"metadata": {
"id": "kc4Emo9JB1VF"
},
"outputs": [],
"source": [
"if os.getenv(\"COLAB_RELEASE_TAG\"):\n",
" COLAB = True\n",
" pio.renderers.default = 'colab'\n",
"else:\n",
" COLAB = False"
]
},
{
"cell_type": "markdown",
"id": "63d7cb62-3825-4ca9-be99-c94c2cf34127",
"metadata": {
"id": "63d7cb62-3825-4ca9-be99-c94c2cf34127"
},
"source": [
"### Sentence transformer model ids (from hugging face)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "95fb523c",
"metadata": {
"id": "95fb523c"
},
"outputs": [],
"source": [
"# Models with existing json sentences output files\n",
"model_ids = [\n",
" \"sentence-transformers/all-MiniLM-L6-v2\",\n",
" \"BAAI/bge-large-en-v1.5\",\n",
" \"intfloat/multilingual-e5-large\"\n",
"]"
]
},
{
"cell_type": "markdown",
"id": "0f11d170",
"metadata": {
"id": "0f11d170"
},
"source": [
"### Functions"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "ec527bce-27c3-4faf-99fd-b381ad3fbb15",
"metadata": {
"id": "ec527bce-27c3-4faf-99fd-b381ad3fbb15"
},
"outputs": [],
"source": [
"# Converts model_id into filenames\n",
"def model_id_to_filename( model_id ):\n",
" return model_id.split('/')[1].lower()\n",
"\n",
"# Performs TSNE for a given embeddings data frame\n",
"def perform_tsne( embeddings_df, n_components=2, columns=['embedding_x', 'embedding_y']):\n",
" tsne = TSNE(n_components, random_state=13, init=\"pca\", learning_rate=\"auto\")\n",
" embeddings_tsne = tsne.fit_transform(embeddings_df)\n",
" if( n_components == 3 ):\n",
" columns = ['embedding_x', 'embedding_y', 'embedding_z']\n",
" embeddings_df_tsne = pd.DataFrame(embeddings_tsne, columns=columns)\n",
" return embeddings_df_tsne\n",
"\n",
"# Performs UMAP for a given embeddings data frame\n",
"def perform_umap(embeddings_df, n_components=2, dimensions=384, columns=['embedding_x', 'embedding_y'], file_name=''):\n",
" dims = (dimensions,)\n",
" encoder = tf.keras.Sequential([\n",
" tf.keras.layers.Input(shape=(dimensions,)),\n",
" tf.keras.layers.Dense(256, activation='relu'),\n",
" tf.keras.layers.BatchNormalization(),\n",
" tf.keras.layers.Dense(128, activation='relu'),\n",
" tf.keras.layers.BatchNormalization(),\n",
" tf.keras.layers.Dense(64, activation='relu'),\n",
" tf.keras.layers.BatchNormalization(),\n",
" tf.keras.layers.Dense(2, activation=None) # No activation for UMAP compatibility\n",
" ])\n",
" encoder.summary()\n",
" umap_model = ParametricUMAP(encoder=encoder, dims=dims) # Parametric UMAP allowing to add new data points\n",
" embeddings_umap = umap_model.fit_transform(embeddings_df)\n",
" if( n_components == 3 ):\n",
" columns = ['embedding_x', 'embedding_y', 'embedding_z']\n",
" embeddings_df_umap = pd.DataFrame(embeddings_umap, columns=columns)\n",
" # Saves model if a file name is provided\n",
" if( file_name != ''):\n",
" umap_model.save( file_name )\n",
"\n",
" return embeddings_df_umap\n",
"\n",
"# Create a 2d plot for a given embedding dataframe\n",
"def plot_embedding_2d_interactive(embeddings_df, texts = None, colors = None, labels = None ):\n",
" # Create a line plot using Plotly Express to visualize the embeddings\n",
" # on a 2D plane, where 'embedding_x' and 'embedding_y' are the coordinates,\n",
" # 'label' indicates whether the sentence is from the 'responsible' or 'harmful' prompt,\n",
" # and 'prompt_sentence' is the actual sentence.\n",
" fig = px.line(\n",
" embeddings_df,\n",
" x=\"embedding_x\",\n",
" y=\"embedding_y\",\n",
" color=\"label\",\n",
" text=texts,\n",
" labels={\n",
" \"embedding_x\": \"Semantic Dimension 1\",\n",
" \"embedding_y\": \"Semantic Dimension 2\",\n",
" \"label\": \"Values\"\n",
" },\n",
" width=1200, height=800,\n",
" title=\"Comparing sentences' embeddings\")\n",
"\n",
" # Adjust the position of the text labels to be at the bottom right of each point\n",
" fig.update_traces(mode=\"markers\")\n",
"\n",
" # Display the plot\n",
" fig.show()\n",
"\n",
"# Compares two sets of prompts by:\n",
"# Performing queries, setting different colors, creating embeddings,\n",
"# and then ploting the resuling embedding comparison.\n",
"# set 1 is colored as red and set 2 as green\n",
"def compare_prompts_json( s1, s2, method='tsne', labels = None ):\n",
" # Merging the prompts\n",
" texts = []\n",
" all_embeddings = []\n",
" p1 = []\n",
" p2 = []\n",
" values = []\n",
" for value in s1:\n",
" for prompt in value['prompts']:\n",
" if( prompt['text'] != '' and prompt['embedding'] != [] ):\n",
" p1.append( prompt['text'] )\n",
" all_embeddings.append( prompt['embedding'] )\n",
" values.append( value['label'] )\n",
" for value in s2:\n",
" for prompt in value['prompts']:\n",
" if( prompt['text'] != '' and prompt['embedding'] != [] ):\n",
" p2.append( prompt['text'] )\n",
" all_embeddings.append( prompt['embedding'] )\n",
" values.append( value['label'] )\n",
"\n",
" texts = p1 + p2\n",
"\n",
" # Defining color values for different prompts\n",
" # For cmap='RdYlGn', p1 (negative value) can be considered the harmfull/bad ones\n",
" colors = [-1] * len( p1 ) + [1] * len( p2 )\n",
"\n",
" # Data frame\n",
" embeddings = pd.DataFrame(all_embeddings)\n",
"\n",
" # Visualizing sentences\n",
" # Dimensionality reduction\n",
" if( method=='umap' ):\n",
" embeddings_df_2d = perform_umap(embeddings, dimensions=embeddings.shape[1] )\n",
" else:\n",
" embeddings_df_2d = perform_tsne(embeddings)\n",
"\n",
" embeddings_df_2d['label'] = values\n",
" plot_embedding_2d_interactive(embeddings_df_2d, texts, colors, labels)\n"
]
},
{
"cell_type": "markdown",
"id": "c39191c3",
"metadata": {
"id": "c39191c3"
},
"source": [
"### Opening Input File"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "87316fa4-1fcf-41c4-9913-bc5704b25ea2",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "87316fa4-1fcf-41c4-9913-bc5704b25ea2",
"outputId": "fe03c906-3fe4-4665-9b94-227aadc75be2"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Opening existing file locally: ../prompt-sentences-main/prompt_sentences.json\n"
]
}
],
"source": [
"# JSON folder\n",
"if( COLAB ):\n",
" json_folder = 'https://raw.githubusercontent.com/IBM/responsible-prompting-api/refs/heads/main/prompt-sentences-main/'\n",
"else:\n",
" json_folder = '../prompt-sentences-main/'\n",
"\n",
"# INPUT FILE\n",
"# Default file with empty embeddings\n",
"json_in_file = json_folder + 'prompt_sentences.json'\n",
"\n",
"if( COLAB ):\n",
" prompt_json_in = requests.get( json_in_file ).json()\n",
" print( 'Opening file from GitHub repo: ', json_in_file )\n",
"else:\n",
" if( os.path.isfile( json_in_file ) ):\n",
" prompt_json_in = json.load( open( json_in_file ) )\n",
" print( 'Opening existing file locally: ', json_in_file )\n"
]
},
{
"cell_type": "markdown",
"id": "1fa8e5d2-bee1-4aac-b11f-568d4d792ce2",
"metadata": {
"id": "1fa8e5d2-bee1-4aac-b11f-568d4d792ce2"
},
"source": [
"### Verifying the number of sentences by positive values"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "80a0151c-1ad3-42d5-a25a-0e6459e66a1c",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "80a0151c-1ad3-42d5-a25a-0e6459e66a1c",
"outputId": "eb5bc42b-1416-4184-b8d2-7556dae24654"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"11; accountability\n",
"12; accuracy\n",
"9; advice\n",
"14; agreement\n",
"10; appropriate\n",
"14; awareness\n",
"10; collaboration\n",
"8; commitment\n",
"26; community and stakeholders\n",
"4; compliance\n",
"3; control\n",
"31; copyright, right to ownership\n",
"5; dedication\n",
"7; duty\n",
"25; education\n",
"15; effective and efficiency\n",
"9; expertise\n",
"30; explainability\n",
"17; fairness\n",
"9; family\n",
"9; flexible\n",
"19; forthright and honesty\n",
"24; impact\n",
"34; inclusion and diversity\n",
"8; indelible\n",
"8; integrity\n",
"32; integrity, compliance, trust, ethics, and dedication\n",
"7; leadership\n",
"15; measurability\n",
"8; money\n",
"10; moral\n",
"9; openness\n",
"21; participation\n",
"10; positivity\n",
"5; power\n",
"34; privacy\n",
"14; proactive\n",
"1; productivity\n",
"10; professional\n",
"12; progress\n",
"1; reliability\n",
"11; reputation\n",
"11; resolution\n",
"13; respect and social norms\n",
"22; responsibility\n",
"14; robustness\n",
"19; safety\n",
"14; scale\n",
"10; security\n",
"14; success\n",
"10; sustainability\n",
"11; transformation\n",
"17; transparency\n",
"12; trust\n",
"11; trust, compliance, and integrity\n",
"9; uniformity and indivisibility\n",
"10; universal\n",
"768 \t TOTAL\n"
]
}
],
"source": [
"total_sentences = 0\n",
"for v in prompt_json_in['positive_values']:\n",
" prompt_count_in = 0\n",
" for p in v['prompts']:\n",
" if( p['text'] != '' ):\n",
" prompt_count_in += 1\n",
" total_sentences += prompt_count_in\n",
" print( \"{0}; {1}\".format( prompt_count_in, v['label'] ) )\n",
"print( \"{0} \\t TOTAL\".format( total_sentences ) )"
]
},
{
"cell_type": "markdown",
"id": "9e169863-c951-4df7-bb4f-9689834a9ee2",
"metadata": {
"id": "9e169863-c951-4df7-bb4f-9689834a9ee2"
},
"source": [
"### Verifying the number of sentences by negative values"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "91b541b8-69bc-4acf-a2e7-f9c0f48f29a3",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "91b541b8-69bc-4acf-a2e7-f9c0f48f29a3",
"outputId": "6ebe1977-2c1e-4779-9f22-a304bf85508d"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"250; violent crimes\n",
"673; non-violent crimes\n",
"9; sex-related crimes\n",
"5; child sexual exploitation\n",
"3; circumvent intellectual property\n",
"117; indiscriminate weapons\n",
"40; hate\n",
"21; suicide and self-harm\n",
"330; misinformation and deception\n",
"1; immorality\n",
"1449 \t TOTAL\n"
]
}
],
"source": [
"total_sentences = 0\n",
"for v in prompt_json_in['negative_values']:\n",
" prompt_count_in = 0\n",
" for p in v['prompts']:\n",
" if( p['text'] != '' ):\n",
" prompt_count_in += 1\n",
" total_sentences += prompt_count_in\n",
" print( \"{0}; {1}\".format( prompt_count_in, v['label'] ) )\n",
"print( \"{0} \\t TOTAL\".format( total_sentences ) )\n"
]
},
{
"cell_type": "markdown",
"id": "cc8d428c-4e69-4718-a3b9-65d8a2251de5",
"metadata": {
"id": "cc8d428c-4e69-4718-a3b9-65d8a2251de5"
},
"source": [
"### Listing all references in the document"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "eb4dcb39-68b5-418c-991d-a1d3b0265499",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "eb4dcb39-68b5-418c-991d-a1d3b0265499",
"outputId": "13cad789-8e12-4f91-af67-df192edd4eb8"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"IBM Responsible Prompting Course templates\n",
"Jailbreak Chat\n",
"IBM Business Conduct Guidelines\n",
"IBM Trust and Compliance\n",
"IBM AttaQ Dataset Card\n",
"mistral-7b-instruct-v0-2_Temp0.7_p1_K50_nullseed_1.2RP_400mxt\n",
"Responsible And Inclusive Tech Framework\n",
"Society-Centered-Design-Principles\n",
"Datathon - Participatory practice with Responsible and Inclusive Tech Team\n",
"mistralai/mixtral-8x7b-instruct-v01_Temp0.7_p1-K500_nullseed_1.2RP_1000mxt\n",
"LLM Attacks - AdvBench\n",
"End-User Development of Automations for Explainable Green Smart Homes\n"
]
}
],
"source": [
"for r in prompt_json_in['references']:\n",
" print( r['name'] )"
]
},
{
"cell_type": "markdown",
"id": "6315c838-436b-4eb3-b3aa-f0faba1cfcab",
"metadata": {
"id": "6315c838-436b-4eb3-b3aa-f0faba1cfcab"
},
"source": [
"### Visualizing embeddings"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "3ca73fb3",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"id": "3ca73fb3",
"outputId": "bbd793f3-390b-4f14-d870-61478d2af9c9"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Opening existing file locally: ../prompt-sentences-main/prompt_sentences-all-minilm-l6-v2.json\n"
]
},
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"hovertemplate": "Values=violent crimes
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}
Semantic Dimension 1=%{x}
Semantic Dimension 2=%{y}
text=%{text}