# Responsible Prompting

## Recipe: Visualize Embeddings


### Imports

In [44]:
import os
import os.path
import json
import re
import requests
import pandas as pd
import numpy as np

from sklearn.manifold import TSNE
from sklearn.metrics.pairwise import cosine_similarity
# from umap import UMAP
# import tensorflow as tf
# from umap.parametric_umap import ParametricUMAP, load_ParametricUMAP

import plotly.express as px
import plotly.offline as pyo
import plotly.io as pio

pyo.init_notebook_mode(connected=True)
pio.templates.default = "plotly"
# pio.templates.default = "plotly_white"
# pio.templates.default = "ggplot"
# pio.templates.default = "seaborn"

In [45]:
if os.getenv("COLAB_RELEASE_TAG"):
    COLAB = True
    pio.renderers.default = 'colab'
else:
    COLAB = False

### Sentence transformer model ids (from hugging face)

In [46]:
# Models with existing json sentences output files
model_ids = [
    "sentence-transformers/all-MiniLM-L6-v2",
    "BAAI/bge-large-en-v1.5",
    "intfloat/multilingual-e5-large"
]

### Functions

In [47]:
# Converts model_id into filenames
def model_id_to_filename( model_id ):
    return model_id.split('/')[1].lower()

# Performs TSNE for a given embeddings data frame
def perform_tsne( embeddings_df, n_components=2, columns=['embedding_x', 'embedding_y']):
    tsne = TSNE(n_components, random_state=13, init="pca", learning_rate="auto")
    embeddings_tsne = tsne.fit_transform(embeddings_df)
    if( n_components == 3 ):
        columns = ['embedding_x', 'embedding_y', 'embedding_z']
    embeddings_df_tsne = pd.DataFrame(embeddings_tsne, columns=columns)
    return embeddings_df_tsne

# Performs UMAP for a given embeddings data frame
def perform_umap(embeddings_df, n_components=2, dimensions=384, columns=['embedding_x', 'embedding_y'], file_name=''):
    dims = (dimensions,)
    encoder = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(dimensions,)),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(2, activation=None)  # No activation for UMAP compatibility
    ])
    encoder.summary()
    umap_model = ParametricUMAP(encoder=encoder, dims=dims) # Parametric UMAP allowing to add new data points
    embeddings_umap = umap_model.fit_transform(embeddings_df)
    if( n_components == 3 ):
        columns = ['embedding_x', 'embedding_y', 'embedding_z']
    embeddings_df_umap = pd.DataFrame(embeddings_umap, columns=columns)
    # Saves model if a file name is provided
    if( file_name != ''):
        umap_model.save( file_name )

    return embeddings_df_umap

# Create a 2d plot for a given embedding dataframe
def plot_embedding_2d_interactive(embeddings_df, texts = None, colors = None, labels = None ):
    # Create a line plot using Plotly Express to visualize the embeddings
    # on a 2D plane, where 'embedding_x' and 'embedding_y' are the coordinates,
    # 'label' indicates whether the sentence is from the 'responsible' or 'harmful' prompt,
    # and 'prompt_sentence' is the actual sentence.
    fig = px.line(
        embeddings_df,
        x="embedding_x",
        y="embedding_y",
        color="label",
        text=texts,
        labels={
            "embedding_x": "Semantic Dimension 1",
            "embedding_y": "Semantic Dimension 2",
            "label": "Values"
        },
        width=1200, height=800,
        title="Comparing sentences' embeddings")

    # Adjust the position of the text labels to be at the bottom right of each point
    fig.update_traces(mode="markers")

    # Display the plot
    fig.show()

# Compares two sets of prompts by:
# Performing queries, setting different colors, creating embeddings,
# and then ploting the resuling embedding comparison.
# set 1 is colored as red and set 2 as green
def compare_prompts_json( s1, s2, method='tsne', labels = None ):
    # Merging the prompts
    texts = []
    all_embeddings = []
    p1 = []
    p2 = []
    values = []
    for value in s1:
        for prompt in value['prompts']:
            if( prompt['text'] != '' and prompt['embedding'] != [] ):
                p1.append( prompt['text'] )
                all_embeddings.append( prompt['embedding'] )
                values.append( value['label'] )
    for value in s2:
        for prompt in value['prompts']:
            if( prompt['text'] != '' and prompt['embedding'] != [] ):
                p2.append( prompt['text'] )
                all_embeddings.append( prompt['embedding'] )
                values.append( value['label'] )

    texts = p1 + p2

    # Defining color values for different prompts
    # For cmap='RdYlGn', p1 (negative value) can be considered the harmfull/bad ones
    colors = [-1] * len( p1 ) + [1] * len( p2 )

    # Data frame
    embeddings = pd.DataFrame(all_embeddings)

    # Visualizing sentences
    # Dimensionality reduction
    if( method=='umap' ):
        embeddings_df_2d = perform_umap(embeddings, dimensions=embeddings.shape[1] )
    else:
        embeddings_df_2d = perform_tsne(embeddings)

    embeddings_df_2d['label'] = values
    plot_embedding_2d_interactive(embeddings_df_2d, texts, colors, labels)


### Opening Input File

In [48]:
# JSON folder
if( COLAB ):
    json_folder = 'https://raw.githubusercontent.com/IBM/responsible-prompting-api/refs/heads/main/prompt-sentences-main/'
else:
    json_folder = '../prompt-sentences-main/'

# INPUT FILE
# Default file with empty embeddings
json_in_file = json_folder + 'prompt_sentences.json'

if( COLAB ):
    prompt_json_in = requests.get( json_in_file ).json()
    print( 'Opening file from GitHub repo: ', json_in_file )
else:
    if( os.path.isfile( json_in_file ) ):
        prompt_json_in = json.load( open( json_in_file ) )
        print( 'Opening existing file locally: ', json_in_file )


Opening existing file locally:  ../prompt-sentences-main/prompt_sentences.json


### Verifying the number of sentences by positive values

In [49]:
total_sentences = 0
for v in prompt_json_in['positive_values']:
    prompt_count_in = 0
    for p in v['prompts']:
        if( p['text'] != '' ):
            prompt_count_in += 1
    total_sentences += prompt_count_in
    print( "{0}; {1}".format( prompt_count_in, v['label'] ) )
print( "{0} \t TOTAL".format( total_sentences ) )

11; accountability
12; accuracy
9; advice
14; agreement
10; appropriate
14; awareness
10; collaboration
8; commitment
26; community and stakeholders
4; compliance
3; control
31; copyright, right to ownership
5; dedication
7; duty
25; education
15; effective and efficiency
9; expertise
30; explainability
17; fairness
9; family
9; flexible
19; forthright and honesty
24; impact
34; inclusion and diversity
8; indelible
8; integrity
32; integrity, compliance, trust, ethics, and dedication
7; leadership
15; measurability
8; money
10; moral
9; openness
21; participation
10; positivity
5; power
34; privacy
14; proactive
1; productivity
10; professional
12; progress
1; reliability
11; reputation
11; resolution
13; respect and social norms
22; responsibility
14; robustness
19; safety
14; scale
10; security
14; success
10; sustainability
11; transformation
17; transparency
12; trust
11; trust, compliance, and integrity
9; uniformity and indivisibility
10; universal
768 	 TOTAL


### Verifying the number of sentences by negative values

In [50]:
total_sentences = 0
for v in prompt_json_in['negative_values']:
    prompt_count_in = 0
    for p in v['prompts']:
        if( p['text'] != '' ):
            prompt_count_in += 1
    total_sentences += prompt_count_in
    print( "{0}; {1}".format( prompt_count_in, v['label'] ) )
print( "{0} \t TOTAL".format( total_sentences ) )


250; violent crimes
673; non-violent crimes
9; sex-related crimes
5; child sexual exploitation
3; circumvent intellectual property
117; indiscriminate weapons
40; hate
21; suicide and self-harm
330; misinformation and deception
1; immorality
1449 	 TOTAL


### Listing all references in the document

In [51]:
for r in prompt_json_in['references']:
    print( r['name'] )

IBM Responsible Prompting Course templates
Jailbreak Chat
IBM Business Conduct Guidelines
IBM Trust and Compliance
IBM AttaQ Dataset Card
mistral-7b-instruct-v0-2_Temp0.7_p1_K50_nullseed_1.2RP_400mxt
Responsible And Inclusive Tech Framework
Society-Centered-Design-Principles
Datathon - Participatory practice with Responsible and Inclusive Tech Team
mistralai/mixtral-8x7b-instruct-v01_Temp0.7_p1-K500_nullseed_1.2RP_1000mxt
LLM Attacks - AdvBench
End-User Development of Automations for Explainable Green Smart Homes


### Visualizing embeddings

In [52]:
for model_id in model_ids:
    # OUTPUT FILE
    json_out_file_suffix = model_id_to_filename( model_id )
    json_out_file = f"{json_folder}prompt_sentences-{json_out_file_suffix}.json"

    if( COLAB ):
        prompt_json_out = requests.get( json_out_file ).json()
        print( 'Opening file from GitHub repo: ', json_out_file )
    else:
        if( os.path.isfile( json_out_file ) ):
            prompt_json_out = json.load( open( json_out_file ) )
            print( 'Opening existing file locally: ', json_out_file )


    # Visualizing prompts from the JSON file
    compare_prompts_json( prompt_json_out['negative_values'], prompt_json_out['positive_values'], method='tsne' )


Opening existing file locally:  ../prompt-sentences-main/prompt_sentences-all-minilm-l6-v2.json


Opening existing file locally:  ../prompt-sentences-main/prompt_sentences-bge-large-en-v1.5.json


Opening existing file locally:  ../prompt-sentences-main/prompt_sentences-multilingual-e5-large.json
