File size: 4,044 Bytes

f74b683

#!/usr/bin/env python3
"""
Deep debugging of the chat template issue.
"""

import transformers
from transformers import AutoTokenizer
import jinja2
import json

MODEL_PATH = "/home/hotaisle/workspace/models/DeepSeek-R1-0528"

print(f"Transformers version: {transformers.__version__}")
print("-" * 60)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

# Test 1: Check if the tokenizer supports custom kwargs
print("\nTest 1: Checking tokenizer's apply_chat_template signature")
import inspect
sig = inspect.signature(tokenizer.apply_chat_template)
print(f"Parameters: {list(sig.parameters.keys())}")

# Test 2: Try to apply template manually with Jinja2
print("\n\nTest 2: Manual Jinja2 template application")
try:
    from jinja2 import Environment, BaseLoader
    
    # Create Jinja2 environment
    env = Environment(loader=BaseLoader())
    template_str = tokenizer.chat_template
    template = env.from_string(template_str)
    
    # Prepare variables
    messages = [{"role": "user", "content": "What is 2+2?"}]
    
    # Test with enable_thinking=False
    output = template.render(
        messages=messages,
        bos_token=tokenizer.bos_token,
        eos_token=tokenizer.eos_token,
        add_generation_prompt=True,
        enable_thinking=False  # This is what we're testing
    )
    
    print(f"Manual render with enable_thinking=False:")
    print(f"Output ends with: {repr(output[-130:])}")
    print(f"Contains empty think block: {'<think>\\n\\n</think>\\n\\n' in output}")
    
except Exception as e:
    print(f"Error in manual rendering: {e}")

# Test 3: Check the exact template condition
print("\n\nTest 3: Analyzing template condition")
template_str = tokenizer.chat_template
enable_thinking_idx = template_str.find("enable_thinking")
if enable_thinking_idx != -1:
    # Extract the condition
    start = template_str.rfind("{%", 0, enable_thinking_idx)
    end = template_str.find("%}", enable_thinking_idx) + 2
    condition = template_str[start:end]
    print(f"Found condition: {condition}")
    
    # Check for potential issues
    if "is false" in condition:
        print("✓ Uses 'is false' (correct for Jinja2)")
    elif "== false" in condition:
        print("⚠ Uses '== false' (might need 'is false')")
    elif "== False" in condition:
        print("⚠ Uses '== False' (Python style, might need 'is false')")

# Test 4: Try different ways to pass the parameter
print("\n\nTest 4: Testing different parameter passing methods")

# Method 1: Direct kwargs (what we've been trying)
try:
    result1 = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True,
        enable_thinking=False
    )
    print("Method 1 (kwargs): Works")
except Exception as e:
    print(f"Method 1 (kwargs): Error - {e}")

# Method 2: Through a dict
try:
    result2 = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True,
        **{"enable_thinking": False}
    )
    print("Method 2 (dict unpacking): Works")
except Exception as e:
    print(f"Method 2 (dict unpacking): Error - {e}")

# Test 5: Check if newer transformers supports it
print("\n\nTest 5: Checking transformers version compatibility")
print(f"Current version: {transformers.__version__}")
print("Note: Custom chat template parameters require transformers >= 4.34.0")

# Parse version
version_parts = transformers.__version__.split('.')
major = int(version_parts[0])
minor = int(version_parts[1].split('.')[0] if '.' in version_parts[1] else version_parts[1])
if major > 4 or (major == 4 and minor >= 34):
    print("✓ Version should support custom parameters")
else:
    print("✗ Version too old for custom parameters!")

# Test 6: Alternative - modify the template to always inject empty think
print("\n\nTest 6: Testing a simpler template modification")
print("If all else fails, you could modify the template to always inject empty think")
print("when a specific string is in the user message, like 'NOTHINK'")