File size: 4,044 Bytes
f74b683 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
#!/usr/bin/env python3
"""
Deep debugging of the chat template issue.
"""
import transformers
from transformers import AutoTokenizer
import jinja2
import json
MODEL_PATH = "/home/hotaisle/workspace/models/DeepSeek-R1-0528"
print(f"Transformers version: {transformers.__version__}")
print("-" * 60)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
# Test 1: Check if the tokenizer supports custom kwargs
print("\nTest 1: Checking tokenizer's apply_chat_template signature")
import inspect
sig = inspect.signature(tokenizer.apply_chat_template)
print(f"Parameters: {list(sig.parameters.keys())}")
# Test 2: Try to apply template manually with Jinja2
print("\n\nTest 2: Manual Jinja2 template application")
try:
from jinja2 import Environment, BaseLoader
# Create Jinja2 environment
env = Environment(loader=BaseLoader())
template_str = tokenizer.chat_template
template = env.from_string(template_str)
# Prepare variables
messages = [{"role": "user", "content": "What is 2+2?"}]
# Test with enable_thinking=False
output = template.render(
messages=messages,
bos_token=tokenizer.bos_token,
eos_token=tokenizer.eos_token,
add_generation_prompt=True,
enable_thinking=False # This is what we're testing
)
print(f"Manual render with enable_thinking=False:")
print(f"Output ends with: {repr(output[-130:])}")
print(f"Contains empty think block: {'<think>\\n\\n</think>\\n\\n' in output}")
except Exception as e:
print(f"Error in manual rendering: {e}")
# Test 3: Check the exact template condition
print("\n\nTest 3: Analyzing template condition")
template_str = tokenizer.chat_template
enable_thinking_idx = template_str.find("enable_thinking")
if enable_thinking_idx != -1:
# Extract the condition
start = template_str.rfind("{%", 0, enable_thinking_idx)
end = template_str.find("%}", enable_thinking_idx) + 2
condition = template_str[start:end]
print(f"Found condition: {condition}")
# Check for potential issues
if "is false" in condition:
print("✓ Uses 'is false' (correct for Jinja2)")
elif "== false" in condition:
print("⚠ Uses '== false' (might need 'is false')")
elif "== False" in condition:
print("⚠ Uses '== False' (Python style, might need 'is false')")
# Test 4: Try different ways to pass the parameter
print("\n\nTest 4: Testing different parameter passing methods")
# Method 1: Direct kwargs (what we've been trying)
try:
result1 = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=False
)
print("Method 1 (kwargs): Works")
except Exception as e:
print(f"Method 1 (kwargs): Error - {e}")
# Method 2: Through a dict
try:
result2 = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
**{"enable_thinking": False}
)
print("Method 2 (dict unpacking): Works")
except Exception as e:
print(f"Method 2 (dict unpacking): Error - {e}")
# Test 5: Check if newer transformers supports it
print("\n\nTest 5: Checking transformers version compatibility")
print(f"Current version: {transformers.__version__}")
print("Note: Custom chat template parameters require transformers >= 4.34.0")
# Parse version
version_parts = transformers.__version__.split('.')
major = int(version_parts[0])
minor = int(version_parts[1].split('.')[0] if '.' in version_parts[1] else version_parts[1])
if major > 4 or (major == 4 and minor >= 34):
print("✓ Version should support custom parameters")
else:
print("✗ Version too old for custom parameters!")
# Test 6: Alternative - modify the template to always inject empty think
print("\n\nTest 6: Testing a simpler template modification")
print("If all else fails, you could modify the template to always inject empty think")
print("when a specific string is in the user message, like 'NOTHINK'")
|