File size: 4,393 Bytes
2915cca
2d261bf
3117925
 
 
 
 
 
 
 
 
 
 
f2133c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3117925
f2133c2
 
 
 
 
3117925
f2133c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3117925
 
f2133c2
 
 
 
 
 
 
 
 
 
 
 
 
3117925
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2133c2
11d98eb
 
f2133c2
11d98eb
802e398
 
 
 
 
 
8685c4f
2f6d0e9
8685c4f
 
 
 
 
 
 
2f6d0e9
f2133c2
802e398
2f6d0e9
8685c4f
 
 
 
 
11d98eb
8685c4f
f2133c2
11d98eb
8685c4f
 
51c75c3
 
11d98eb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# Hosted in https://huggingface.co/spaces/ArturoNereu/emojilator
import gradio as gr
import spacy
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load the NLP model, downloaded via python
nlp = spacy.load("en_core_web_sm")
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# Basic emoji dictionary
emoji_map = {
    # Food
    "pizza": "๐Ÿ•",
    "burger": "๐Ÿ”",
    "fries": "๐ŸŸ",
    "sushi": "๐Ÿฃ",
    "coffee": "โ˜•",
    "ice cream": "๐Ÿฆ",
    "cake": "๐ŸŽ‚",
    "beer": "๐Ÿบ",
    "wine": "๐Ÿท",
    "apple": "๐ŸŽ",
    "banana": "๐ŸŒ",
    "taco": "๐ŸŒฎ",
    "chocolate": "๐Ÿซ",

    # Animals
    "dog": "๐Ÿถ",
    "cat": "๐Ÿฑ",
    "lion": "๐Ÿฆ",
    "monkey": "๐Ÿ’",
    "panda": "๐Ÿผ",
    "elephant": "๐Ÿ˜",
    "fish": "๐ŸŸ",
    "bird": "๐Ÿฆ",
    "horse": "๐Ÿด",

    # Places & activities
    "beach": "๐Ÿ–๏ธ",
    "mountain": "โ›ฐ๏ธ",
    "city": "๐Ÿ™๏ธ",
    "school": "๐Ÿซ",
    "work": "๐Ÿ’ผ",
    "party": "๐ŸŽ‰",
    "swim": "๐ŸŠ",
    "run": "๐Ÿƒ",
    "travel": "โœˆ๏ธ",
    "camping": "๐Ÿ•๏ธ",
    "movie": "๐ŸŽฌ",

    # Objects
    "rocket": "๐Ÿš€",
    "book": "๐Ÿ“š",
    "phone": "๐Ÿ“ฑ",
    "computer": "๐Ÿ’ป",
    "car": "๐Ÿš—",
    "bicycle": "๐Ÿšฒ",
    "guitar": "๐ŸŽธ",
    "music": "๐ŸŽต",

    # Emotions
    "happy": "๐Ÿ˜„",
    "sad": "๐Ÿ˜ข",
    "angry": "๐Ÿ˜ก",
    "love": "โค๏ธ",
    "laugh": "๐Ÿ˜‚",
    "sleep": "๐Ÿ˜ด"
}

# Predefined sentences to transform
sentences = [
    "I love pizza and burgers",
    "My dog and cat are so cute",
    "Let's go swim at the beach",
    "The rocket launch was amazing",
    "I am reading a good book",
    "We are going camping in the mountains",
    "Happy birthday! I brought cake",
    "I need coffee and chocolate",
    "I want to travel and see new cities"
]

# Generate the embeddings on the emojis' names
emoji_names = list(emoji_map.keys())
emoji_embeddings = embedder.encode(emoji_names)

def translate_to_emoji(text):
    doc = nlp(text)
    keywords = [token.text for token in doc if token.pos_ in ["NOUN", "VERB"]]

    word_embeddings = embedder.encode(keywords)
    result = text

    for i, word in enumerate(keywords):
        sims = cosine_similarity([word_embeddings[i]], emoji_embeddings)[0]
        best_idx = np.argmax(sims)
        best_emoji = emoji_map[emoji_names[best_idx]]

        # Thee 0.5 is just a threshold to make sure the replacement makes some sense
        if sims[best_idx] > 0.5:
            result = result.replace(word, f"{word} {best_emoji}")

    return result

# Gradio UI
with gr.Blocks(theme="soft") as app:
    gr.Markdown("# Emojilator ๐Ÿฆ–")
    gr.Markdown("AI-powered emoji translator: make your text expressive with semantic emoji matching!")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Image("Frida_Dino.png", elem_id="mascot", show_label=False)
        with gr.Column(scale=2):
            gr.Markdown("""
            ### About this Space ๐Ÿง 
            This is a simple Space to help build intuition around the concept of embeddings.

            Most of the magic behind LLMs for developers revolves around understanding this concept: converting data into a numeric representation, or a vector.

            For example, the word apple could become [8348832, 3423943, 923493294]. If we use the ๐ŸŽ emoji, its embedding might be [432432, 43423, 3423423]. When we calculate the distance between them, they're close!

            In this demo, we use that distance to replace words with relevant emojis. What's interesting is that it's not only about exact matchesโ€”if you write "I want ICE CREAM", you might get ๐Ÿฆ, not ๐ŸงŠ + ๐Ÿงด.

            This means we're capturing meaning (semantics), not just character-to-character equality.

            This is why "ice cream" gives ๐Ÿฆ instead of ๐ŸงŠ + ๐Ÿงด.
            """)

            # Dropdown instead of textbox
            phrase_selector = gr.Dropdown(
                label="Select a phrase",
                choices=sentences,
                value=sentences[0]
            )

            output_box = gr.Textbox(label="Text with emojis", lines=3)

    # Automatically translate when a phrase is selected
    phrase_selector.change(translate_to_emoji, inputs=phrase_selector, outputs=output_box)

if __name__ == "__main__":
    app.launch()