Spaces:
Sleeping
Sleeping
# Hosted in https://huggingface.co/spaces/ArturoNereu/emojilator | |
import gradio as gr | |
import spacy | |
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import numpy as np | |
# Load the NLP model, downloaded via python | |
nlp = spacy.load("en_core_web_sm") | |
embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
# Basic emoji dictionary | |
emoji_map = { | |
# Food | |
"pizza": "๐", | |
"burger": "๐", | |
"fries": "๐", | |
"sushi": "๐ฃ", | |
"coffee": "โ", | |
"ice cream": "๐ฆ", | |
"cake": "๐", | |
"beer": "๐บ", | |
"wine": "๐ท", | |
"apple": "๐", | |
"banana": "๐", | |
"taco": "๐ฎ", | |
"chocolate": "๐ซ", | |
# Animals | |
"dog": "๐ถ", | |
"cat": "๐ฑ", | |
"lion": "๐ฆ", | |
"monkey": "๐", | |
"panda": "๐ผ", | |
"elephant": "๐", | |
"fish": "๐", | |
"bird": "๐ฆ", | |
"horse": "๐ด", | |
# Places & activities | |
"beach": "๐๏ธ", | |
"mountain": "โฐ๏ธ", | |
"city": "๐๏ธ", | |
"school": "๐ซ", | |
"work": "๐ผ", | |
"party": "๐", | |
"swim": "๐", | |
"run": "๐", | |
"travel": "โ๏ธ", | |
"camping": "๐๏ธ", | |
"movie": "๐ฌ", | |
# Objects | |
"rocket": "๐", | |
"book": "๐", | |
"phone": "๐ฑ", | |
"computer": "๐ป", | |
"car": "๐", | |
"bicycle": "๐ฒ", | |
"guitar": "๐ธ", | |
"music": "๐ต", | |
# Emotions | |
"happy": "๐", | |
"sad": "๐ข", | |
"angry": "๐ก", | |
"love": "โค๏ธ", | |
"laugh": "๐", | |
"sleep": "๐ด" | |
} | |
# Predefined sentences to transform | |
sentences = [ | |
"I love pizza and burgers", | |
"My dog and cat are so cute", | |
"Let's go swim at the beach", | |
"The rocket launch was amazing", | |
"I am reading a good book", | |
"We are going camping in the mountains", | |
"Happy birthday! I brought cake", | |
"I need coffee and chocolate", | |
"I want to travel and see new cities" | |
] | |
# Generate the embeddings on the emojis' names | |
emoji_names = list(emoji_map.keys()) | |
emoji_embeddings = embedder.encode(emoji_names) | |
def translate_to_emoji(text): | |
doc = nlp(text) | |
keywords = [token.text for token in doc if token.pos_ in ["NOUN", "VERB"]] | |
word_embeddings = embedder.encode(keywords) | |
result = text | |
for i, word in enumerate(keywords): | |
sims = cosine_similarity([word_embeddings[i]], emoji_embeddings)[0] | |
best_idx = np.argmax(sims) | |
best_emoji = emoji_map[emoji_names[best_idx]] | |
# Thee 0.5 is just a threshold to make sure the replacement makes some sense | |
if sims[best_idx] > 0.5: | |
result = result.replace(word, f"{word} {best_emoji}") | |
return result | |
# Gradio UI | |
with gr.Blocks(theme="soft") as app: | |
gr.Markdown("# Emojilator ๐ฆ") | |
gr.Markdown("AI-powered emoji translator: make your text expressive with semantic emoji matching!") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Image("Frida_Dino.png", elem_id="mascot", show_label=False) | |
with gr.Column(scale=2): | |
gr.Markdown(""" | |
### About this Space ๐ง | |
This is a simple Space to help build intuition around the concept of embeddings. | |
Most of the magic behind LLMs for developers revolves around understanding this concept: converting data into a numeric representation, or a vector. | |
For example, the word apple could become [8348832, 3423943, 923493294]. If we use the ๐ emoji, its embedding might be [432432, 43423, 3423423]. When we calculate the distance between them, they're close! | |
In this demo, we use that distance to replace words with relevant emojis. What's interesting is that it's not only about exact matchesโif you write "I want ICE CREAM", you might get ๐ฆ, not ๐ง + ๐งด. | |
This means we're capturing meaning (semantics), not just character-to-character equality. | |
This is why "ice cream" gives ๐ฆ instead of ๐ง + ๐งด. | |
""") | |
# Dropdown instead of textbox | |
phrase_selector = gr.Dropdown( | |
label="Select a phrase", | |
choices=sentences, | |
value=sentences[0] | |
) | |
output_box = gr.Textbox(label="Text with emojis", lines=3) | |
# Automatically translate when a phrase is selected | |
phrase_selector.change(translate_to_emoji, inputs=phrase_selector, outputs=output_box) | |
if __name__ == "__main__": | |
app.launch() | |