|
logit_gpt-4o,logit_DeepSeek-V3-0324,logit_Llama-4-maverick-17b-128e-instruct-fp8,logit_qwen25-coder-32b-instruct,logit_gpt-4.1-mini,chosen_executor,true_gpt-4o,true_DeepSeek-V3-0324,true_Llama-4-maverick-17b-128e-instruct-fp8,true_qwen25-coder-32b-instruct,true_gpt-4.1-mini
|
|
-0.84765625,-0.69140625,-0.76171875,-0.7265625,-0.68359375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.9296875,-0.8125,-0.84375,-0.80078125,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.9609375,-0.8515625,-0.8671875,-0.7890625,-0.765625,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-0.953125,-0.8046875,-0.83984375,-0.8125,-0.80859375,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
|
-0.953125,-0.8125,-0.8359375,-0.8046875,-0.79296875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.95703125,-0.8046875,-0.83984375,-0.8125,-0.78515625,gpt-4.1-mini,0.0,0.0,1.0,0.0,1.0
|
|
-0.88671875,-0.7578125,-0.83984375,-0.77734375,-0.74609375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-0.9453125,-0.796875,-0.828125,-0.7890625,-0.7578125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.8671875,-0.78515625,-0.72265625,-0.69140625,-0.75,qwen25-coder-32b-instruct,0.0,1.0,1.0,1.0,1.0
|
|
-0.93359375,-0.79296875,-0.80859375,-0.8046875,-0.74609375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.97265625,-0.85546875,-0.89453125,-0.82421875,-0.79296875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.94140625,-0.80078125,-0.8203125,-0.80078125,-0.78125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.9609375,-0.8515625,-0.84765625,-0.81640625,-0.8125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.96875,-0.7421875,-0.8671875,-0.78515625,-0.69921875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-1.0078125,-0.890625,-0.88671875,-0.8359375,-0.8203125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.94921875,-0.80078125,-0.8125,-0.7890625,-0.75,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-1.0,-0.8203125,-0.91015625,-0.80078125,-0.765625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.91796875,-0.8125,-0.85546875,-0.78515625,-0.75,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-1.0,-0.90625,-0.88671875,-0.83203125,-0.81640625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.96875,-0.90625,-0.890625,-0.8125,-0.8125,qwen25-coder-32b-instruct,1.0,1.0,0.0,1.0,1.0
|
|
-0.9296875,-0.83203125,-0.82421875,-0.78515625,-0.796875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
|
-0.90625,-0.73046875,-0.81640625,-0.8046875,-0.71484375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-1.0,-0.875,-0.95703125,-0.83203125,-0.81640625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.9609375,-0.859375,-0.84765625,-0.8125,-0.8203125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
|
-0.9609375,-0.86328125,-0.86328125,-0.828125,-0.828125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
|
-0.85546875,-0.71875,-0.81640625,-0.76171875,-0.62109375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-0.96484375,-0.8125,-0.84765625,-0.7890625,-0.80078125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
|
-0.98046875,-0.83984375,-0.87109375,-0.828125,-0.8125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-0.984375,-0.91015625,-0.88671875,-0.78125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.9296875,-0.74609375,-0.7890625,-0.75,-0.703125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.91796875,-0.7890625,-0.8125,-0.8046875,-0.79296875,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
|
-0.92578125,-0.8046875,-0.84375,-0.796875,-0.78125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-0.97265625,-0.76953125,-0.85546875,-0.7734375,-0.73046875,gpt-4.1-mini,1.0,0.0,1.0,1.0,1.0
|
|
-0.9609375,-0.78515625,-0.8515625,-0.78515625,-0.7578125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-0.98046875,-0.87109375,-0.921875,-0.8125,-0.765625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.953125,-0.84375,-0.8359375,-0.8125,-0.7890625,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-0.9609375,-0.81640625,-0.85546875,-0.83203125,-0.80078125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.96484375,-0.84765625,-0.84375,-0.80859375,-0.80859375,qwen25-coder-32b-instruct,1.0,1.0,0.0,1.0,1.0
|
|
-0.95703125,-0.81640625,-0.84375,-0.80859375,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.98046875,-0.8125,-0.87109375,-0.81640625,-0.7890625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.921875,-0.8359375,-0.828125,-0.8046875,-0.78125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-1.0,-0.83984375,-0.83984375,-0.79296875,-0.77734375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-0.97265625,-0.890625,-0.875,-0.80859375,-0.7890625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.984375,-0.875,-0.8515625,-0.8203125,-0.82421875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
|
-0.97265625,-0.85546875,-0.859375,-0.828125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.9296875,-0.79296875,-0.79296875,-0.7734375,-0.7734375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
|
-0.94140625,-0.78515625,-0.80859375,-0.78125,-0.74609375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-0.97265625,-0.90234375,-0.91015625,-0.8125,-0.8046875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-1.0078125,-0.91015625,-0.94140625,-0.82421875,-0.8125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-0.92578125,-0.82421875,-0.828125,-0.81640625,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.95703125,-0.765625,-0.85546875,-0.8046875,-0.75,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.99609375,-0.77734375,-0.87890625,-0.765625,-0.73046875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.96875,-0.84375,-0.8203125,-0.7890625,-0.765625,gpt-4.1-mini,1.0,1.0,1.0,0.0,1.0
|
|
-0.85546875,-0.6640625,-0.7109375,-0.7421875,-0.63671875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.984375,-0.87890625,-0.8828125,-0.8125,-0.796875,gpt-4.1-mini,1.0,0.0,0.0,0.0,0.0
|
|
-0.984375,-0.8515625,-0.87890625,-0.84375,-0.828125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.98046875,-0.90625,-0.85546875,-0.79296875,-0.8203125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
|
-0.9609375,-0.79296875,-0.84765625,-0.765625,-0.703125,gpt-4.1-mini,0.0,1.0,0.0,0.0,0.0
|
|
-0.9296875,-0.78125,-0.8515625,-0.8125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.9609375,-0.89453125,-0.87109375,-0.8046875,-0.78125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.9765625,-0.84765625,-0.890625,-0.8203125,-0.796875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-0.98828125,-0.90234375,-0.87890625,-0.81640625,-0.78515625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.98828125,-0.890625,-0.86328125,-0.8359375,-0.82421875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-0.9453125,-0.82421875,-0.828125,-0.828125,-0.796875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.96484375,-0.796875,-0.828125,-0.8203125,-0.76953125,gpt-4.1-mini,1.0,1.0,1.0,1.0,0.0
|
|
-0.91796875,-0.734375,-0.796875,-0.78515625,-0.7421875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
|
-0.9296875,-0.76171875,-0.83203125,-0.79296875,-0.7421875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.96875,-0.82421875,-0.8671875,-0.76953125,-0.75390625,gpt-4.1-mini,1.0,0.0,0.0,1.0,1.0
|
|
-0.87109375,-0.71484375,-0.79296875,-0.78125,-0.72265625,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
|
-0.9609375,-0.90234375,-0.87109375,-0.8046875,-0.8203125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
|
-0.953125,-0.8359375,-0.859375,-0.80078125,-0.8125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
|
-0.96875,-0.8046875,-0.84765625,-0.78515625,-0.73828125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.96875,-0.82421875,-0.8515625,-0.80078125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.94921875,-0.8515625,-0.88671875,-0.859375,-0.83203125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.9765625,-0.84375,-0.90625,-0.796875,-0.79296875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-1.0,-0.828125,-0.87109375,-0.82421875,-0.7734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
|
-0.921875,-0.828125,-0.84375,-0.796875,-0.77734375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-0.97265625,-0.83984375,-0.85546875,-0.83203125,-0.80859375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-1.015625,-0.8828125,-0.94140625,-0.859375,-0.828125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
-0.94140625,-0.8203125,-0.8515625,-0.82421875,-0.796875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
|
|
|
predicted_proportions,0.0000,0.0500,0.0000,0.1500,0.8000
|
|
true_proportions,0.0848,0.0806,0.0713,0.0723,0.0785
|
|
|