Spaces:

Ruurd
/

tini

Running on Zero

App Files Files

Ruurd commited on 9 days ago

Commit

31e34c4

1 Parent(s): fb56411

Noisify without remasking

Browse files

Files changed (3) hide show

.gitignore +4 -1
app.py +57 -68
infer.py +65 -0

.gitignore CHANGED Viewed

@@ -83,4 +83,7 @@ vendor/
 # Environment files #
 #####################
 .env
-.env.*

 # Environment files #
 #####################
 .env
+.env.*
+*.pem
+*.pth

app.py CHANGED Viewed

@@ -6,7 +6,9 @@ import time
 from transformers import AutoTokenizer
 import os
 import importlib
 from huggingface_hub import hf_hub_download
 import spaces
 from dotenv import load_dotenv
 from infer import (
@@ -15,7 +17,9 @@ from infer import (
     get_noising_schedule,
     noisify_answer,
     generate_diffusion_text,
-    filter_logits
 )
 from models import CustomTransformerModel
 from model_config import CustomTransformerConfig
@@ -31,48 +35,6 @@ if hf_token is None:
 rng = np.random.default_rng()
-# Add new noising function
-def confidence_guided_noising(input_ids, answer_start, confidences, noise_clipping, threshold=1.0, noise_start=1.0):
-    noised = input_ids.copy()
-    answer_len = len(input_ids) - answer_start
-    num_to_noise = int(threshold * answer_len * noise_start)
-    if num_to_noise == 0:
-        return noised, []
-    all_indices = np.arange(answer_start, len(input_ids))
-    eos_indices = [i for i in all_indices if input_ids[i] == eos_token_id]
-    non_eos_indices = [i for i in all_indices if input_ids[i] != eos_token_id]
-    # Proportionally split how many to noise
-    num_non_eos_to_noise = int(num_to_noise * len(non_eos_indices) / (len(non_eos_indices) + len(eos_indices) + 1e-5))
-    num_eos_to_noise = num_to_noise - num_non_eos_to_noise
-    noised_indices = []
-    # --- Non-EOS ---
-    if non_eos_indices:
-        raw_weights = 1.0 - np.array([confidences[i - answer_start] for i in non_eos_indices])
-        raw_weights = np.clip(raw_weights, a_min=noise_clipping, a_max=None)
-        weights = raw_weights / raw_weights.sum()
-        chosen = rng.choice(non_eos_indices, size=min(num_non_eos_to_noise, len(non_eos_indices)), replace=False, p=weights)
-        noised_indices.extend(chosen.tolist())
-    # --- EOS ---
-    if eos_indices and num_eos_to_noise > 0:
-        raw_weights = 1.0 - np.array([confidences[i - answer_start] for i in eos_indices])
-        raw_weights = np.clip(raw_weights, a_min=noise_clipping, a_max=None)
-        weights = raw_weights / raw_weights.sum()
-        chosen = rng.choice(eos_indices, size=min(num_eos_to_noise, len(eos_indices)), replace=False, p=weights)
-        noised_indices.extend(chosen.tolist())
-    for idx in noised_indices:
-        noised[idx] = mask_token_id
-    noised_indices = sorted(noised_indices)
-    return noised, noised_indices
 @spaces.GPU
 def generate_diffusion_text(input_ids, top_p, top_k):
     with torch.no_grad():
@@ -104,22 +66,23 @@ def format_chat_prompt(question):
 def render_html(label, text):
     return f"<b>{label}</b><br><div style='white-space: pre-wrap; line-height:1.8'>{text}</div>"
-def highlight_tokens(tokens, color_indices=None, color="green"):
     highlighted = []
     for j, tok in enumerate(tokens):
         if tokenizer.convert_tokens_to_ids(tok) == eos_token_id:
             continue
-        token_str = tokenizer.convert_tokens_to_string([tok])
-        if color_indices and j in color_indices:
-            highlighted.append(f'<span style="color:{color}">{token_str}</span>')
         else:
-            highlighted.append(token_str)
     return "".join(highlighted)
-# --- Inference Wrapper ---
 def diffusion_chat(question, max_it, pause_length, sharpness,
                    clustering, noise_start, use_confidence_noising,
-                   noise_clipping, top_p, top_k):
     if question.strip() == "":
         question = "What do you know about the city of Amsterdam?"
@@ -134,53 +97,69 @@ def diffusion_chat(question, max_it, pause_length, sharpness,
     input_ids = (input_ids + [mask_token_id] * (256 - len(input_ids)))[:256]
     ori_input_tokens = input_ids
     current_tokens, just_noised_indices = noisify_answer(
         input_ids, answer_start, tokenizer, threshold=1.0, clustering=clustering, noise_start=1.0
     )
     yield render_html("Iteration 0 (initial noise)",
-                      tokenizer.decode(current_tokens[answer_start:], skip_special_tokens=True))
     time.sleep(pause_length)
     last_tokens = []
-    prev_tokens = []
     for i in range(max_it):
         generated_tokens, confidences = generate_diffusion_text(current_tokens, top_p, top_k)
         current_tokens = ori_input_tokens[:answer_start] + generated_tokens[answer_start:]
-        decoded = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
-        diff_indices = [j for j in range(len(decoded)) if j >= len(prev_tokens) or decoded[j] != prev_tokens[j]]
-        prev_tokens = decoded
         yield render_html(f"Iteration {i+1}/{max_it} (after generation)",
-                          highlight_tokens(decoded, diff_indices, color="green"))
         time.sleep(pause_length)
         # Early stopping
         last_tokens.append(current_tokens)
         if len(last_tokens) > 3:
             last_tokens.pop(0)
-        if len(last_tokens) == 3 and len(set(map(tuple, last_tokens))) == 1:
             yield render_html("Stopped early", f"After {i+1} iterations.")
             break
-        # Noising step
         threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
         if use_confidence_noising:
             noised_answer, just_noised_indices = confidence_guided_noising(
-                current_tokens, answer_start, confidences, noise_clipping,
                 threshold=threshold, noise_start=noise_start
             )
         else:
             noised_answer, just_noised_indices = noisify_answer(
                 current_tokens, answer_start, tokenizer,
                 threshold=threshold, clustering=clustering, noise_start=noise_start
             )
-        decoded = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
-        red_indices = [j for j in range(len(decoded)) if (answer_start + j) in just_noised_indices]
         yield render_html(f"Iteration {i+1}/{max_it} (before noising)",
-                          highlight_tokens(decoded, red_indices, color="red"))
         current_tokens = ori_input_tokens[:answer_start] + noised_answer[answer_start:]
@@ -195,13 +174,22 @@ def diffusion_chat(question, max_it, pause_length, sharpness,
     yield render_html(f"Final Output ({len(final_ids)} tokens after {i+1} iterations)", final_output)
-# --- Gradio Interface ---
 print("Loading model...")
-ckpt_path = hf_hub_download(
-    repo_id="ruurd/tini_model",
-    filename="diffusion-model-8B.pth",
-    token=os.getenv("HF_TOKEN")
-)
 model, tokenizer = load_trained_model(checkpoint_path=ckpt_path)
 print("✅ Model loaded.")
@@ -220,6 +208,7 @@ demo = gr.Interface(
         gr.Slider(0.0, 1.0, value=0.0, step=0.05, label="Clustering: ↑ = more clustered noising"),
         gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Noise start fraction: ↑ = more noise"),
         gr.Checkbox(value=False, label="Use confidence-guided noising"),
         gr.Slider(0.01, 1.0, value=0.01, step=0.01, label="Noise clipping: ↓ = more confidence guidance"),
         gr.Slider(1, 1000, value = 100, step = 1, label = "Top-p: ↑ = more random answers"),
         gr.Slider(0.0, 1.0, value = 0.9, step = 0.01, label = "Top-k: ↑ = more random answers")

 from transformers import AutoTokenizer
 import os
 import importlib
+import os
 from huggingface_hub import hf_hub_download
 import spaces
 from dotenv import load_dotenv
 from infer import (
     get_noising_schedule,
     noisify_answer,
     generate_diffusion_text,
+    filter_logits,
+    confidence_guided_noising,
+    noisify_answer_without_remasking
 )
 from models import CustomTransformerModel
 from model_config import CustomTransformerConfig
 rng = np.random.default_rng()
 @spaces.GPU
 def generate_diffusion_text(input_ids, top_p, top_k):
     with torch.no_grad():
 def render_html(label, text):
     return f"<b>{label}</b><br><div style='white-space: pre-wrap; line-height:1.8'>{text}</div>"
+def highlight_tokens(token_ids, answer_start, changed_indices, color):
+    tokens = tokenizer.convert_ids_to_tokens(token_ids)
     highlighted = []
     for j, tok in enumerate(tokens):
         if tokenizer.convert_tokens_to_ids(tok) == eos_token_id:
             continue
+        tok_str = tokenizer.convert_tokens_to_string([tok])
+        if (answer_start + j) in changed_indices:
+            highlighted.append(f'<span style="color:{color}">{tok_str}</span>')
         else:
+            highlighted.append(tok_str)
     return "".join(highlighted)
 def diffusion_chat(question, max_it, pause_length, sharpness,
                    clustering, noise_start, use_confidence_noising,
+                   use_permanent_unmasking, noise_clipping, top_p,
+                   top_k):
     if question.strip() == "":
         question = "What do you know about the city of Amsterdam?"
     input_ids = (input_ids + [mask_token_id] * (256 - len(input_ids)))[:256]
     ori_input_tokens = input_ids
+    # Initial noising
     current_tokens, just_noised_indices = noisify_answer(
         input_ids, answer_start, tokenizer, threshold=1.0, clustering=clustering, noise_start=1.0
     )
     yield render_html("Iteration 0 (initial noise)",
+                      highlight_tokens(current_tokens[answer_start:], answer_start, just_noised_indices, color="red"))
     time.sleep(pause_length)
     last_tokens = []
+    prev_decoded = []
+    unmasked_mask = [False] * len(current_tokens)
     for i in range(max_it):
         generated_tokens, confidences = generate_diffusion_text(current_tokens, top_p, top_k)
         current_tokens = ori_input_tokens[:answer_start] + generated_tokens[answer_start:]
+        # GREEN highlighting: compare to previous tokens
+        new_decoded = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
+        diff_indices = {
+            answer_start + j for j, tok in enumerate(new_decoded)
+            if j >= len(prev_decoded) or tok != prev_decoded[j]
+        }
+        prev_decoded = new_decoded
         yield render_html(f"Iteration {i+1}/{max_it} (after generation)",
+                          highlight_tokens(current_tokens[answer_start:], answer_start,  diff_indices, color="green"))
         time.sleep(pause_length)
         # Early stopping
         last_tokens.append(current_tokens)
         if len(last_tokens) > 3:
             last_tokens.pop(0)
+        if len(last_tokens) == 3 and last_tokens[0] == last_tokens[1] == last_tokens[2]:
             yield render_html("Stopped early", f"After {i+1} iterations.")
             break
+        # NOISING
         threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
         if use_confidence_noising:
             noised_answer, just_noised_indices = confidence_guided_noising(
+                current_tokens, answer_start, tokenizer, confidences, noise_clipping,
                 threshold=threshold, noise_start=noise_start
             )
+        elif use_permanent_unmasking:
+            noised_answer, just_noised_indices = noisify_answer_without_remasking(
+                current_tokens, answer_start, tokenizer, threshold=threshold,
+                noise_start=noise_start, unmasked_mask=unmasked_mask
+            )
         else:
             noised_answer, just_noised_indices = noisify_answer(
                 current_tokens, answer_start, tokenizer,
                 threshold=threshold, clustering=clustering, noise_start=noise_start
             )
+        for idx in range(answer_start, len(current_tokens)):
+            if noised_answer[idx] != mask_token_id:
+                unmasked_mask[idx] = True
         yield render_html(f"Iteration {i+1}/{max_it} (before noising)",
+                          highlight_tokens(current_tokens[answer_start:], answer_start, just_noised_indices, color="red"))
         current_tokens = ori_input_tokens[:answer_start] + noised_answer[answer_start:]
     yield render_html(f"Final Output ({len(final_ids)} tokens after {i+1} iterations)", final_output)
+def is_running_on_spaces():
+    return os.getenv("SPACE_ID") is not None
 print("Loading model...")
+if is_running_on_spaces():
+    # Load from Hugging Face Hub
+    ckpt_path = hf_hub_download(
+        repo_id="ruurd/tini_model",
+        filename="diffusion-model-8B.pth",
+        token=os.getenv("HF_TOKEN")
+    )
+else:
+    # Load from local path
+    ckpt_path = "diffusion-model-3B.pth"  # change to your actual local path
 model, tokenizer = load_trained_model(checkpoint_path=ckpt_path)
 print("✅ Model loaded.")
         gr.Slider(0.0, 1.0, value=0.0, step=0.05, label="Clustering: ↑ = more clustered noising"),
         gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Noise start fraction: ↑ = more noise"),
         gr.Checkbox(value=False, label="Use confidence-guided noising"),
+        gr.Checkbox(value=False, label="Use permanent unmasking"),
         gr.Slider(0.01, 1.0, value=0.01, step=0.01, label="Noise clipping: ↓ = more confidence guidance"),
         gr.Slider(1, 1000, value = 100, step = 1, label = "Top-p: ↑ = more random answers"),
         gr.Slider(0.0, 1.0, value = 0.9, step = 0.01, label = "Top-k: ↑ = more random answers")

infer.py CHANGED Viewed

@@ -125,6 +125,71 @@ def noisify_answer(input_ids, answer_start, tokenizer, threshold=1.0, clustering
 import torch.nn.functional as F
 def generate_diffusion_text(model, input_ids, answer_start, top_k=0, top_p=1.0, temperature=1.0,
                             eos_token_id=None, eos_boost=0.0):
     model.eval()

 import torch.nn.functional as F
+def noisify_answer_without_remasking(input_ids, answer_start, tokenizer, threshold=1.0, noise_start=1.0, unmasked_mask=None):
+    noised = input_ids.copy()
+    mask_token_id = tokenizer.encode('MASK', add_special_tokens=False)[0]
+    eligible_indices = list(range(answer_start, len(noised)))
+    if unmasked_mask is not None:
+        eligible_indices = [i for i in eligible_indices if not unmasked_mask[i]]
+    answer_len = len(noised) - answer_start
+    num_to_noise = int(threshold * answer_len * noise_start)
+    if num_to_noise == 0 or len(eligible_indices) == 0:
+        return noised, []
+    selected = rng.choice(eligible_indices, size=num_to_noise, replace=False).tolist()
+    for idx in selected:
+        noised[idx] = mask_token_id
+    return noised, selected
+def confidence_guided_noising(input_ids, answer_start, tokenizer, confidences, noise_clipping, threshold=1.0, noise_start=1.0):
+    noised = input_ids.copy()
+    answer_len = len(input_ids) - answer_start
+    num_to_noise = int(threshold * answer_len * noise_start)
+    mask_token_id = tokenizer.encode('MASK', add_special_tokens=False)[0]
+    eos_token_id = tokenizer.eos_token_id
+    if num_to_noise == 0:
+        return noised, []
+    all_indices = np.arange(answer_start, len(input_ids))
+    eos_indices = [i for i in all_indices if input_ids[i] == eos_token_id]
+    non_eos_indices = [i for i in all_indices if input_ids[i] != eos_token_id]
+    # Proportionally split how many to noise
+    num_non_eos_to_noise = int(num_to_noise * len(non_eos_indices) / (len(non_eos_indices) + len(eos_indices) + 1e-5))
+    num_eos_to_noise = num_to_noise - num_non_eos_to_noise
+    noised_indices = []
+    # --- Non-EOS ---
+    if non_eos_indices:
+        raw_weights = 1.0 - np.array([confidences[i - answer_start] for i in non_eos_indices])
+        raw_weights = np.clip(raw_weights, a_min=noise_clipping, a_max=None)
+        weights = raw_weights / raw_weights.sum()
+        chosen = rng.choice(non_eos_indices, size=min(num_non_eos_to_noise, len(non_eos_indices)), replace=False, p=weights)
+        noised_indices.extend(chosen.tolist())
+    # --- EOS ---
+    if eos_indices and num_eos_to_noise > 0:
+        raw_weights = 1.0 - np.array([confidences[i - answer_start] for i in eos_indices])
+        raw_weights = np.clip(raw_weights, a_min=noise_clipping, a_max=None)
+        weights = raw_weights / raw_weights.sum()
+        chosen = rng.choice(eos_indices, size=min(num_eos_to_noise, len(eos_indices)), replace=False, p=weights)
+        noised_indices.extend(chosen.tolist())
+    for idx in noised_indices:
+        noised[idx] = mask_token_id
+    noised_indices = sorted(noised_indices)
+    return noised, noised_indices
 def generate_diffusion_text(model, input_ids, answer_start, top_k=0, top_p=1.0, temperature=1.0,
                             eos_token_id=None, eos_boost=0.0):
     model.eval()