Spaces:

Ruurd
/

tini

Running on Zero

App Files Files

Ruurd commited on 8 days ago

Commit

fb56411

1 Parent(s): 0e840df

Improve white space display

Browse files

Files changed (3) hide show

app.py +50 -76
infer.py +50 -0
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -101,131 +101,105 @@ def format_chat_prompt(question):
         "<|start_header_id|>assistant<|end_header_id|>\n"
     )
 # --- Inference Wrapper ---
 def diffusion_chat(question, max_it, pause_length, sharpness,
                    clustering, noise_start, use_confidence_noising,
                    noise_clipping, top_p, top_k):
-    placeholder = "What do you know about the city of Amsterdam?"
     if question.strip() == "":
-        question = placeholder
-    print('started generation')
     prompt = format_chat_prompt(question)
     input_ids = tokenizer.encode(prompt, add_special_tokens=False)
     answer_start = find_answer_start(input_ids, assistant_marker_ids)
     if answer_start is None:
-        yield "Error: Could not find Assistant marker in input."
         return
-    if len(input_ids) < 256:
-        input_ids += [mask_token_id] * (256 - len(input_ids))
-    else:
-        input_ids = input_ids[:256]
     ori_input_tokens = input_ids
     current_tokens, just_noised_indices = noisify_answer(
-                input_ids, answer_start, tokenizer, threshold=1.0, clustering=clustering, noise_start = 1.0,
-            )
-    yield f"<b>Iteration 0 (initial noise):</b><br>" + tokenizer.decode(current_tokens[answer_start:], skip_special_tokens=True).replace('\n', '<br>')
     time.sleep(pause_length)
-    last_tokens = []
-    prev_decoded_tokens = []
-    generation_start = time.time()
     for i in range(max_it):
-        print('Generating output')
-        # Model step
         generated_tokens, confidences = generate_diffusion_text(current_tokens, top_p, top_k)
-        elapsed = time.time() - generation_start
-        remaining = pause_length - elapsed
-        if remaining > 0:
-            time.sleep(remaining)
-        # Save full output for noising step
         current_tokens = ori_input_tokens[:answer_start] + generated_tokens[answer_start:]
-        # --- GREEN HIGHLIGHT ---
-        decoded_tokens = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
-        highlighted = []
-        for j, tok in enumerate(decoded_tokens):
-            tok_id = tokenizer.convert_tokens_to_ids(tok)
-            if tok_id == eos_token_id:
-                continue
-            token_str = tokenizer.convert_tokens_to_string([tok])
-            if prev_decoded_tokens and j < len(prev_decoded_tokens) and tok != prev_decoded_tokens[j]:
-                highlighted.append(f'<span style="color:green">{token_str}</span>')
-            else:
-                highlighted.append(token_str)
-        prev_decoded_tokens = decoded_tokens
-        yield f"<b>Iteration {i+1}/{max_it} (after generation):</b><br>" + "".join(highlighted).replace('\n', '<br>')
         time.sleep(pause_length)
-        # --- Early stopping ---
         last_tokens.append(current_tokens)
         if len(last_tokens) > 3:
             last_tokens.pop(0)
-        if len(last_tokens) == 3 and last_tokens[0] == last_tokens[1] == last_tokens[2]:
-            yield f"<b>Stopped early after {i+1} iterations.</b>"
             break
-        previous_tokens = current_tokens.copy()
-        # --- NOISING STEP ---
         threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
         if use_confidence_noising:
             noised_answer, just_noised_indices = confidence_guided_noising(
-                current_tokens, answer_start, confidences, noise_clipping, threshold=threshold, noise_start=noise_start
             )
-            # just_noised_indices = []
         else:
             noised_answer, just_noised_indices = noisify_answer(
-                current_tokens, answer_start, tokenizer, threshold=threshold, clustering=clustering, noise_start = noise_start,
             )
-        # --- RED HIGHLIGHT ---
-        decoded_tokens = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
-        highlighted = []
-        for j, tok in enumerate(decoded_tokens):
-            tok_id = tokenizer.convert_tokens_to_ids(tok)
-            if tok_id == eos_token_id:
-                continue
-            token_str = tokenizer.convert_tokens_to_string([tok])
-            abs_idx = answer_start + j
-            if abs_idx in just_noised_indices:
-                highlighted.append(f'<span style="color:red">{token_str}</span>')
-            else:
-                highlighted.append(token_str)
-        # Compose full input again: prompt + noised answer
-        current_tokens = ori_input_tokens[:answer_start] + noised_answer[answer_start:]
-        yield f"<b>Iteration {i+1}/{max_it} (before noising):</b><br>" + "".join(highlighted).replace('\n', '<br>')
-        generation_start = time.time()
     answer_ids = current_tokens[answer_start:]
     try:
-        eos_index = answer_ids.index(eos_token_id)
-        final_ids = answer_ids[:eos_index]
     except ValueError:
         final_ids = answer_ids
-    num_tokens = len(final_ids)
     final_output = tokenizer.decode(final_ids, skip_special_tokens=True)
-    print(final_output)
-    yield f"<b>Final Output ({num_tokens} tokens after {i+1} iterations):</b><br>" + final_output.replace('\n', '<br>')
 # --- Gradio Interface ---
 print("Loading model...")
 ckpt_path = hf_hub_download(
     repo_id="ruurd/tini_model",
-    filename="diffusion-model.pth",
     token=os.getenv("HF_TOKEN")
 )
 model, tokenizer = load_trained_model(checkpoint_path=ckpt_path)

         "<|start_header_id|>assistant<|end_header_id|>\n"
     )
+def render_html(label, text):
+    return f"<b>{label}</b><br><div style='white-space: pre-wrap; line-height:1.8'>{text}</div>"
+def highlight_tokens(tokens, color_indices=None, color="green"):
+    highlighted = []
+    for j, tok in enumerate(tokens):
+        if tokenizer.convert_tokens_to_ids(tok) == eos_token_id:
+            continue
+        token_str = tokenizer.convert_tokens_to_string([tok])
+        if color_indices and j in color_indices:
+            highlighted.append(f'<span style="color:{color}">{token_str}</span>')
+        else:
+            highlighted.append(token_str)
+    return "".join(highlighted)
 # --- Inference Wrapper ---
 def diffusion_chat(question, max_it, pause_length, sharpness,
                    clustering, noise_start, use_confidence_noising,
                    noise_clipping, top_p, top_k):
     if question.strip() == "":
+        question = "What do you know about the city of Amsterdam?"
     prompt = format_chat_prompt(question)
     input_ids = tokenizer.encode(prompt, add_special_tokens=False)
     answer_start = find_answer_start(input_ids, assistant_marker_ids)
     if answer_start is None:
+        yield render_html("Error", "Could not find Assistant marker in input.")
         return
+    input_ids = (input_ids + [mask_token_id] * (256 - len(input_ids)))[:256]
     ori_input_tokens = input_ids
     current_tokens, just_noised_indices = noisify_answer(
+        input_ids, answer_start, tokenizer, threshold=1.0, clustering=clustering, noise_start=1.0
+    )
+    yield render_html("Iteration 0 (initial noise)",
+                      tokenizer.decode(current_tokens[answer_start:], skip_special_tokens=True))
     time.sleep(pause_length)
+    last_tokens = []
+    prev_tokens = []
     for i in range(max_it):
         generated_tokens, confidences = generate_diffusion_text(current_tokens, top_p, top_k)
         current_tokens = ori_input_tokens[:answer_start] + generated_tokens[answer_start:]
+        decoded = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
+        diff_indices = [j for j in range(len(decoded)) if j >= len(prev_tokens) or decoded[j] != prev_tokens[j]]
+        prev_tokens = decoded
+        yield render_html(f"Iteration {i+1}/{max_it} (after generation)",
+                          highlight_tokens(decoded, diff_indices, color="green"))
         time.sleep(pause_length)
+        # Early stopping
         last_tokens.append(current_tokens)
         if len(last_tokens) > 3:
             last_tokens.pop(0)
+        if len(last_tokens) == 3 and len(set(map(tuple, last_tokens))) == 1:
+            yield render_html("Stopped early", f"After {i+1} iterations.")
             break
+        # Noising step
         threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
         if use_confidence_noising:
             noised_answer, just_noised_indices = confidence_guided_noising(
+                current_tokens, answer_start, confidences, noise_clipping,
+                threshold=threshold, noise_start=noise_start
             )
         else:
             noised_answer, just_noised_indices = noisify_answer(
+                current_tokens, answer_start, tokenizer,
+                threshold=threshold, clustering=clustering, noise_start=noise_start
             )
+        decoded = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
+        red_indices = [j for j in range(len(decoded)) if (answer_start + j) in just_noised_indices]
+        yield render_html(f"Iteration {i+1}/{max_it} (before noising)",
+                          highlight_tokens(decoded, red_indices, color="red"))
+        current_tokens = ori_input_tokens[:answer_start] + noised_answer[answer_start:]
+    # Final output
     answer_ids = current_tokens[answer_start:]
     try:
+        final_ids = answer_ids[:answer_ids.index(eos_token_id)]
     except ValueError:
         final_ids = answer_ids
     final_output = tokenizer.decode(final_ids, skip_special_tokens=True)
+    yield render_html(f"Final Output ({len(final_ids)} tokens after {i+1} iterations)", final_output)
 # --- Gradio Interface ---
 print("Loading model...")
 ckpt_path = hf_hub_download(
     repo_id="ruurd/tini_model",
+    filename="diffusion-model-8B.pth",
     token=os.getenv("HF_TOKEN")
 )
 model, tokenizer = load_trained_model(checkpoint_path=ckpt_path)

infer.py CHANGED Viewed

@@ -6,6 +6,7 @@ import random
 import importlib
 import torch.nn as nn
 import os
 from transformers import AutoTokenizer
@@ -162,6 +163,55 @@ def calculate_answer_perplexity(prompt, answer, model_name='gpt2-large'):
         labels[0, :prompt_len] = -100
         loss = model(input_ids, labels=labels).loss
         return torch.exp(loss).item()
 def generate_answer(question: str, model, tokenizer, max_it=16, noise_start=0.5,

 import importlib
 import torch.nn as nn
 import os
+from IPython.display import display, HTML, Markdown, clear_output
 from transformers import AutoTokenizer
         labels[0, :prompt_len] = -100
         loss = model(input_ids, labels=labels).loss
         return torch.exp(loss).item()
+def format_token_colored_inline(token_id, conf, tokenizer, mask_token_id=128000):
+    token_str = tokenizer.decode([token_id]).replace("\n", "<br>")
+    # token_str = token_str.replace(" ", "&nbsp;")  # Preserve spaces for inline display
+    # token_str = token_str.replace("\t", "&nbsp;&nbsp;&nbsp;&nbsp;")  # Replace tabs with spaces
+    if token_id == mask_token_id:
+        color = "black"
+    else:
+        color = f"hsl({int(conf * 120)}, 100%, 25%)"
+    return f"<span style='color:{color}' title='Conf: {conf:.2f}'>{token_str}</span>"
+def display_diffusion_output(i, max_it, question, ori_input_tokens, generated_tokens, confidences, answer_start, tokenizer):
+    clear_output(wait=True)
+    display(Markdown(f"### Iteration {i}/{max_it-1}"))
+    display(Markdown(f"**Question:** {tokenizer.decode(ori_input_tokens[:answer_start])}"))
+    mask_token_id = tokenizer.encode('MASK', add_special_tokens=False)[0]
+    output_html = ''.join([
+        format_token_colored_inline(tok, conf, tokenizer, mask_token_id)
+        for tok, conf in zip(generated_tokens[answer_start:], confidences[answer_start:])
+        if tok != 128001  # skip EOT
+    ])
+    output_html = f"<div style='white-space: pre-wrap'>{output_html}</div>"
+    html = HTML(f"<b>Diffusion Output with Confidence:</b><br><div style='line-height:1.8; white-space: pre-wrap'>{output_html}</div>")
+    display(html)
+    return output_html
+def save_html_colored_output(filename, html_content):
+    with open(filename, "w", encoding="utf-8") as f:
+        f.write(f"""
+        <html>
+        <head>
+            <meta charset="utf-8">
+            <style>
+                body {{ font-family: sans-serif; line-height: 1.6; }}
+                span {{ padding: 0 2px; }}
+            </style>
+        </head>
+        <body>
+            {html_content}
+        </body>
+        </html>
+        """)
 def generate_answer(question: str, model, tokenizer, max_it=16, noise_start=0.5,

requirements.txt CHANGED Viewed

@@ -6,3 +6,4 @@ accelerate>=0.24.1
 gradio>=4.10.0
 numpy
 load_dotenv

 gradio>=4.10.0
 numpy
 load_dotenv
+ipython