Spaces:

Ruurd
/

tini

Running on Zero

App Files Files

Ruurd commited on 2 days ago

Commit

a8d72d4

1 Parent(s): ec83427

add eos_bias

Browse files

Files changed (1) hide show

app.py +11 -4

app.py CHANGED Viewed

@@ -36,11 +36,16 @@ if hf_token is None:
 rng = np.random.default_rng()
 @spaces.GPU
-def generate_diffusion_text(input_ids, top_p, top_k):
     with torch.no_grad():
         input_tensor = torch.tensor([input_ids], dtype=torch.long).to(model.device)
-        with torch.amp.autocast('cuda', dtype=torch.float16):
             logits = model(input_ids=input_tensor)["logits"]
         logits = filter_logits(logits, top_k=top_p, top_p=top_k)
         logits = logits.clamp(min=-1e8, max=1e4)
         probs = torch.nn.functional.softmax(logits, dim=-1)[0]
@@ -79,11 +84,12 @@ def highlight_tokens(token_ids, answer_start, changed_indices, color):
             highlighted.append(tok_str)
     return "".join(highlighted)
-def diffusion_chat(question, max_it, pause_length, sharpness,
                    clustering, noise_start, use_confidence_noising,
                    use_permanent_unmasking, noise_clipping, top_p,
                    top_k):
     if question.strip() == "":
         question = "What do you know about the city of Amsterdam?"
@@ -111,7 +117,7 @@ def diffusion_chat(question, max_it, pause_length, sharpness,
     unmasked_mask = [False] * len(current_tokens)
     for i in range(max_it):
-        generated_tokens, confidences = generate_diffusion_text(current_tokens, top_p, top_k)
         current_tokens = ori_input_tokens[:answer_start] + generated_tokens[answer_start:]
         # GREEN highlighting: compare to previous tokens
@@ -205,6 +211,7 @@ demo = gr.Interface(
         gr.Textbox(label="User Question", lines=2, placeholder="What do you know about the city of Amsterdam?"),
         gr.Slider(1, 512, value=64, step=1, label="Number of iterarions: ↑ = more iterations"),
         gr.Slider(0.01, 5, value=0.01, step=0.01, label="Pause between iteration ↑ = longer pause"),
         gr.Slider(1.0, 20.0, value=1.0, step=0.5, label="Noise decay sharpness: ↓ = more noise in later iterations"),
         gr.Slider(0.0, 1.0, value=0.0, step=0.05, label="Clustering: ↑ = more clustered noising"),
         gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="Noise start fraction: ↑ = more noise"),

 rng = np.random.default_rng()
 @spaces.GPU
+def generate_diffusion_text(input_ids, top_p, top_k, eos_bias=0.0):
     with torch.no_grad():
         input_tensor = torch.tensor([input_ids], dtype=torch.long).to(model.device)
+        with torch.cuda.amp.autocast(dtype=torch.float16):
             logits = model(input_ids=input_tensor)["logits"]
+        # Apply eos_bias
+        if eos_bias != 0.0:
+            logits[0, :, eos_token_id] += eos_bias
         logits = filter_logits(logits, top_k=top_p, top_p=top_k)
         logits = logits.clamp(min=-1e8, max=1e4)
         probs = torch.nn.functional.softmax(logits, dim=-1)[0]
             highlighted.append(tok_str)
     return "".join(highlighted)
+def diffusion_chat(question, max_it, pause_length, eos_bias, sharpness,
                    clustering, noise_start, use_confidence_noising,
                    use_permanent_unmasking, noise_clipping, top_p,
                    top_k):
+    eos_bias = -eos_bias
     if question.strip() == "":
         question = "What do you know about the city of Amsterdam?"
     unmasked_mask = [False] * len(current_tokens)
     for i in range(max_it):
+        generated_tokens, confidences = generate_diffusion_text(current_tokens, top_p, top_k, eos_bias = eos_bias)
         current_tokens = ori_input_tokens[:answer_start] + generated_tokens[answer_start:]
         # GREEN highlighting: compare to previous tokens
         gr.Textbox(label="User Question", lines=2, placeholder="What do you know about the city of Amsterdam?"),
         gr.Slider(1, 512, value=64, step=1, label="Number of iterarions: ↑ = more iterations"),
         gr.Slider(0.01, 5, value=0.01, step=0.01, label="Pause between iteration ↑ = longer pause"),
+        gr.Slider(-5.0, 5.0, value=0.0, step=0.1, label="Generation length: ↑ = more output tokens by decreasing eos token probability"),
         gr.Slider(1.0, 20.0, value=1.0, step=0.5, label="Noise decay sharpness: ↓ = more noise in later iterations"),
         gr.Slider(0.0, 1.0, value=0.0, step=0.05, label="Clustering: ↑ = more clustered noising"),
         gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="Noise start fraction: ↑ = more noise"),