|
<!DOCTYPE html> |
|
<html> |
|
<head> |
|
<meta charset="utf-8"> |
|
<meta name="description" |
|
content="NOVER: Incentive Training for Language Models via Verifier-Free Reinforcement Learning"> |
|
<meta name="keywords" content="NOVER, Reinforcement Learning, Language Models, Reasoning"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1"> |
|
<title>NOVER: NO-VERifier Reinforcement Learning</title> |
|
|
|
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&display=swap" |
|
rel="stylesheet"> |
|
|
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/bulma/0.9.4/css/bulma.min.css"> |
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css"> |
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css"> |
|
|
|
<style> |
|
body { |
|
background: #fafafa; |
|
font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; |
|
color: #1a1a1a; |
|
position: relative; |
|
overflow-x: hidden; |
|
} |
|
|
|
body::before { |
|
content: ''; |
|
position: fixed; |
|
top: 0; |
|
left: 0; |
|
width: 100%; |
|
height: 100%; |
|
background: |
|
radial-gradient(circle at 20% 80%, rgba(16, 185, 129, 0.03) 0%, transparent 50%), |
|
radial-gradient(circle at 80% 20%, rgba(59, 130, 246, 0.03) 0%, transparent 50%), |
|
radial-gradient(circle at 40% 40%, rgba(16, 185, 129, 0.02) 0%, transparent 50%); |
|
pointer-events: none; |
|
z-index: -1; |
|
} |
|
|
|
.geometric-bg { |
|
position: absolute; |
|
top: 0; |
|
left: 0; |
|
width: 100%; |
|
height: 100%; |
|
opacity: 0.03; |
|
background-image: |
|
linear-gradient(45deg, transparent 45%, #10b981 45%, #10b981 55%, transparent 55%), |
|
linear-gradient(-45deg, transparent 45%, #3b82f6 45%, #3b82f6 55%, transparent 55%); |
|
background-size: 60px 60px; |
|
background-position: 0 0, 30px 30px; |
|
pointer-events: none; |
|
z-index: -1; |
|
} |
|
|
|
|
|
.hero { |
|
background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%); |
|
border-radius: 12px; |
|
margin: 2rem; |
|
box-shadow: 0 4px 24px rgba(0,0,0,0.06); |
|
border: 1px solid #e5e7eb; |
|
position: relative; |
|
overflow: hidden; |
|
} |
|
|
|
.hero::before { |
|
content: ''; |
|
position: absolute; |
|
top: 0; |
|
left: 0; |
|
width: 100%; |
|
height: 100%; |
|
background-image: |
|
repeating-linear-gradient( |
|
45deg, |
|
transparent, |
|
transparent 40px, |
|
rgba(16, 185, 129, 0.04) 40px, |
|
rgba(16, 185, 129, 0.04) 80px |
|
), |
|
repeating-linear-gradient( |
|
-45deg, |
|
transparent, |
|
transparent 40px, |
|
rgba(59, 130, 246, 0.03) 40px, |
|
rgba(59, 130, 246, 0.03) 80px |
|
); |
|
background-size: 80px 80px, 80px 80px; |
|
background-position: 0 0, 40px 40px; |
|
opacity: 0.3; |
|
pointer-events: none; |
|
z-index: 0; |
|
} |
|
|
|
|
|
.section:nth-of-type(1) { |
|
background: linear-gradient(135deg, #ffffff 0%, #f0f9ff 100%); |
|
margin: 2rem; |
|
border-radius: 12px; |
|
box-shadow: 0 2px 8px rgba(0,0,0,0.04); |
|
border: 1px solid #e5e7eb; |
|
position: relative; |
|
overflow: hidden; |
|
} |
|
|
|
.section:nth-of-type(1)::before { |
|
content: ''; |
|
position: absolute; |
|
top: 0; |
|
left: 0; |
|
width: 100%; |
|
height: 100%; |
|
background-image: |
|
repeating-linear-gradient( |
|
90deg, |
|
transparent, |
|
transparent 30px, |
|
rgba(59, 130, 246, 0.06) 30px, |
|
rgba(59, 130, 246, 0.06) 60px |
|
), |
|
repeating-linear-gradient( |
|
0deg, |
|
transparent, |
|
transparent 30px, |
|
rgba(59, 130, 246, 0.06) 30px, |
|
rgba(59, 130, 246, 0.06) 60px |
|
); |
|
background-size: 60px 60px, 60px 60px; |
|
background-position: 0 0, 30px 30px; |
|
opacity: 0.3; |
|
pointer-events: none; |
|
z-index: 0; |
|
} |
|
|
|
|
|
.section:nth-of-type(2) { |
|
background: linear-gradient(135deg, #ffffff 0%, #f0fdf4 100%); |
|
margin: 2rem; |
|
border-radius: 12px; |
|
box-shadow: 0 2px 8px rgba(0,0,0,0.04); |
|
border: 1px solid #e5e7eb; |
|
position: relative; |
|
overflow: hidden; |
|
} |
|
|
|
.section:nth-of-type(2)::before { |
|
content: ''; |
|
position: absolute; |
|
top: 0; |
|
left: 0; |
|
width: 100%; |
|
height: 100%; |
|
background-image: |
|
repeating-linear-gradient( |
|
45deg, |
|
transparent, |
|
transparent 50px, |
|
rgba(16, 185, 129, 0.07) 50px, |
|
rgba(16, 185, 129, 0.07) 100px |
|
), |
|
repeating-linear-gradient( |
|
-45deg, |
|
transparent, |
|
transparent 50px, |
|
rgba(16, 185, 129, 0.05) 50px, |
|
rgba(16, 185, 129, 0.05) 100px |
|
); |
|
background-size: 100px 100px, 100px 100px; |
|
background-position: 0 0, 50px 50px; |
|
opacity: 0.3; |
|
pointer-events: none; |
|
z-index: 0; |
|
} |
|
|
|
|
|
.section:nth-of-type(3) { |
|
background: linear-gradient(135deg, #ffffff 0%, #fefce8 100%); |
|
margin: 2rem; |
|
border-radius: 12px; |
|
box-shadow: 0 2px 8px rgba(0,0,0,0.04); |
|
border: 1px solid #e5e7eb; |
|
position: relative; |
|
overflow: hidden; |
|
} |
|
|
|
.section:nth-of-type(3)::before { |
|
content: ''; |
|
position: absolute; |
|
top: 0; |
|
left: 0; |
|
width: 100%; |
|
height: 100%; |
|
background-image: |
|
repeating-linear-gradient( |
|
30deg, |
|
transparent, |
|
transparent 40px, |
|
rgba(245, 158, 11, 0.06) 40px, |
|
rgba(245, 158, 11, 0.06) 80px |
|
), |
|
repeating-linear-gradient( |
|
-30deg, |
|
transparent, |
|
transparent 40px, |
|
rgba(245, 158, 11, 0.05) 40px, |
|
rgba(245, 158, 11, 0.05) 80px |
|
); |
|
background-size: 80px 80px, 80px 80px; |
|
background-position: 0 0, 40px 40px; |
|
opacity: 0.3; |
|
pointer-events: none; |
|
z-index: 0; |
|
} |
|
|
|
|
|
.section:nth-of-type(4) { |
|
background: linear-gradient(135deg, #ffffff 0%, #fef2f2 100%); |
|
margin: 2rem; |
|
border-radius: 12px; |
|
box-shadow: 0 2px 8px rgba(0,0,0,0.04); |
|
border: 1px solid #e5e7eb; |
|
position: relative; |
|
overflow: hidden; |
|
} |
|
|
|
.section:nth-of-type(4)::before { |
|
content: ''; |
|
position: absolute; |
|
top: 0; |
|
left: 0; |
|
width: 100%; |
|
height: 100%; |
|
background-image: |
|
repeating-linear-gradient( |
|
0deg, |
|
transparent, |
|
transparent 35px, |
|
rgba(239, 68, 68, 0.06) 35px, |
|
rgba(239, 68, 68, 0.06) 70px |
|
), |
|
repeating-linear-gradient( |
|
90deg, |
|
transparent, |
|
transparent 35px, |
|
rgba(239, 68, 68, 0.05) 35px, |
|
rgba(239, 68, 68, 0.05) 70px |
|
); |
|
background-size: 70px 70px, 70px 70px; |
|
background-position: 0 0, 35px 35px; |
|
opacity: 0.3; |
|
pointer-events: none; |
|
z-index: 0; |
|
} |
|
|
|
|
|
.section:nth-of-type(5) { |
|
background: linear-gradient(135deg, #ffffff 0%, #f0f9ff 100%); |
|
margin: 2rem; |
|
border-radius: 12px; |
|
box-shadow: 0 2px 8px rgba(0,0,0,0.04); |
|
border: 1px solid #e5e7eb; |
|
position: relative; |
|
overflow: hidden; |
|
} |
|
|
|
.section:nth-of-type(5)::before { |
|
content: ''; |
|
position: absolute; |
|
top: 0; |
|
left: 0; |
|
width: 100%; |
|
height: 100%; |
|
background-image: |
|
repeating-linear-gradient( |
|
60deg, |
|
transparent, |
|
transparent 60px, |
|
rgba(14, 165, 233, 0.07) 60px, |
|
rgba(14, 165, 233, 0.07) 120px |
|
), |
|
repeating-linear-gradient( |
|
-60deg, |
|
transparent, |
|
transparent 60px, |
|
rgba(14, 165, 233, 0.05) 60px, |
|
rgba(14, 165, 233, 0.05) 120px |
|
); |
|
background-size: 120px 120px, 120px 120px; |
|
background-position: 0 0, 60px 60px; |
|
opacity: 0.3; |
|
pointer-events: none; |
|
z-index: 0; |
|
} |
|
|
|
|
|
.section:nth-of-type(6) { |
|
background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%); |
|
margin: 2rem; |
|
border-radius: 12px; |
|
box-shadow: 0 2px 8px rgba(0,0,0,0.04); |
|
border: 1px solid #e5e7eb; |
|
position: relative; |
|
overflow: hidden; |
|
} |
|
|
|
.section:nth-of-type(6)::before { |
|
content: ''; |
|
position: absolute; |
|
top: 0; |
|
left: 0; |
|
width: 100%; |
|
height: 100%; |
|
background-image: |
|
repeating-linear-gradient( |
|
25deg, |
|
transparent, |
|
transparent 45px, |
|
rgba(107, 114, 128, 0.06) 45px, |
|
rgba(107, 114, 128, 0.06) 90px |
|
), |
|
repeating-linear-gradient( |
|
-25deg, |
|
transparent, |
|
transparent 45px, |
|
rgba(107, 114, 128, 0.05) 45px, |
|
rgba(107, 114, 128, 0.05) 90px |
|
); |
|
background-size: 90px 90px, 90px 90px; |
|
background-position: 0 0, 45px 45px; |
|
opacity: 0.3; |
|
pointer-events: none; |
|
z-index: 0; |
|
} |
|
|
|
|
|
footer.section { |
|
background: linear-gradient(135deg, #ffffff 0%, #f9fafb 100%); |
|
border-top: 1px solid #e5e7eb; |
|
margin-top: 4rem; |
|
position: relative; |
|
overflow: hidden; |
|
} |
|
|
|
footer.section::before { |
|
content: ''; |
|
position: absolute; |
|
top: 0; |
|
left: 0; |
|
width: 100%; |
|
height: 100%; |
|
background-image: |
|
repeating-linear-gradient( |
|
45deg, |
|
transparent, |
|
transparent 80px, |
|
rgba(16, 185, 129, 0.06) 80px, |
|
rgba(16, 185, 129, 0.06) 160px |
|
), |
|
repeating-linear-gradient( |
|
-45deg, |
|
transparent, |
|
transparent 80px, |
|
rgba(59, 130, 246, 0.05) 80px, |
|
rgba(59, 130, 246, 0.05) 160px |
|
); |
|
background-size: 160px 160px, 160px 160px; |
|
background-position: 0 0, 80px 80px; |
|
opacity: 0.3; |
|
pointer-events: none; |
|
z-index: 0; |
|
} |
|
|
|
|
|
.hero-body, |
|
.section .container, |
|
footer .container { |
|
position: relative; |
|
z-index: 1; |
|
} |
|
|
|
.publication-title { |
|
color: #1a1a1a; |
|
font-weight: 600; |
|
letter-spacing: -0.02em; |
|
} |
|
|
|
.nover { |
|
color: #10b981; |
|
font-weight: 600; |
|
} |
|
|
|
.glass-card { |
|
background: #f9fafb; |
|
border-radius: 12px; |
|
padding: 2rem; |
|
box-shadow: 0 1px 3px rgba(0,0,0,0.1); |
|
border: 1px solid #e5e7eb; |
|
margin: 1rem 0; |
|
transition: all 0.2s ease; |
|
position: relative; |
|
z-index: 1; |
|
} |
|
|
|
.glass-card:hover { |
|
box-shadow: 0 4px 12px rgba(0,0,0,0.08); |
|
border-color: #d1d5db; |
|
} |
|
|
|
.figure-container { |
|
background: white; |
|
border-radius: 12px; |
|
padding: 2rem; |
|
box-shadow: 0 1px 3px rgba(0,0,0,0.1); |
|
border: 1px solid #e5e7eb; |
|
margin: 2rem 0; |
|
text-align: center; |
|
position: relative; |
|
z-index: 1; |
|
} |
|
|
|
.figure-placeholder { |
|
background: #f3f4f6; |
|
border: 2px dashed #9ca3af; |
|
border-radius: 8px; |
|
padding: 3rem 2rem; |
|
color: #6b7280; |
|
font-weight: 500; |
|
font-size: 1rem; |
|
margin-bottom: 1rem; |
|
position: relative; |
|
transition: all 0.3s ease; |
|
} |
|
|
|
.figure-placeholder:hover { |
|
background: #f9fafb; |
|
border-color: #6b7280; |
|
} |
|
|
|
.figure-placeholder.analysis { |
|
background: linear-gradient(135deg, #eff6ff 0%, #dbeafe 100%); |
|
border-color: #3b82f6; |
|
color: #1e40af; |
|
} |
|
|
|
.figure-placeholder.experiment { |
|
background: linear-gradient(135deg, #f0fdf4 0%, #dcfce7 100%); |
|
border-color: #10b981; |
|
color: #047857; |
|
} |
|
|
|
.figure-placeholder.comparison { |
|
background: linear-gradient(135deg, #fefce8 0%, #fef3c7 100%); |
|
border-color: #f59e0b; |
|
color: #92400e; |
|
} |
|
|
|
.analysis-grid { |
|
display: grid; |
|
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); |
|
gap: 1.5rem; |
|
margin: 2rem 0; |
|
} |
|
|
|
.insight-card { |
|
background: white; |
|
border: 1px solid #e5e7eb; |
|
padding: 1.5rem; |
|
border-radius: 12px; |
|
text-align: left; |
|
transition: all 0.2s ease; |
|
box-shadow: 0 1px 3px rgba(0,0,0,0.1); |
|
position: relative; |
|
z-index: 1; |
|
} |
|
|
|
.insight-card:hover { |
|
box-shadow: 0 4px 12px rgba(0,0,0,0.08); |
|
border-color: #10b981; |
|
} |
|
|
|
.insight-title { |
|
font-size: 1.1rem; |
|
font-weight: 600; |
|
margin-bottom: 0.5rem; |
|
color: #1a1a1a; |
|
} |
|
|
|
.insight-description { |
|
font-size: 0.9rem; |
|
color: #6b7280; |
|
line-height: 1.5; |
|
} |
|
|
|
.comparison-table { |
|
background: white; |
|
border-radius: 12px; |
|
overflow: hidden; |
|
box-shadow: 0 1px 3px rgba(0,0,0,0.1); |
|
border: 1px solid #e5e7eb; |
|
margin: 2rem 0; |
|
position: relative; |
|
z-index: 1; |
|
} |
|
|
|
.comparison-table table { |
|
width: 100%; |
|
border-collapse: collapse; |
|
} |
|
|
|
.comparison-table th { |
|
background: #f9fafb; |
|
color: #374151; |
|
padding: 1rem; |
|
font-weight: 600; |
|
border-bottom: 1px solid #e5e7eb; |
|
} |
|
|
|
.comparison-table td { |
|
padding: 1rem; |
|
border-bottom: 1px solid #f3f4f6; |
|
text-align: center; |
|
} |
|
|
|
.comparison-table tr:nth-child(even) { |
|
background: #fafbfc; |
|
} |
|
|
|
.highlight-row { |
|
background: #f0fdf4 !important; |
|
border-left: 3px solid #10b981; |
|
} |
|
|
|
.highlight-number { |
|
color: #10b981; |
|
font-weight: 600; |
|
font-size: 1.05em; |
|
} |
|
|
|
.metrics-container { |
|
background: #1f2937; |
|
border-radius: 12px; |
|
padding: 2rem; |
|
color: #e5e7eb; |
|
font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Roboto Mono', monospace; |
|
box-shadow: 0 4px 12px rgba(0,0,0,0.15); |
|
margin: 2rem 0; |
|
position: relative; |
|
z-index: 1; |
|
} |
|
|
|
.metrics-header { |
|
color: #10b981; |
|
font-weight: 600; |
|
margin-bottom: 1rem; |
|
border-bottom: 1px solid #374151; |
|
padding-bottom: 0.5rem; |
|
} |
|
|
|
.method-comparison { |
|
display: grid; |
|
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); |
|
gap: 1.5rem; |
|
margin: 2rem 0; |
|
} |
|
|
|
.method-card { |
|
background: white; |
|
border-radius: 12px; |
|
padding: 1.5rem; |
|
text-align: center; |
|
box-shadow: 0 1px 3px rgba(0,0,0,0.1); |
|
border: 1px solid #e5e7eb; |
|
transition: all 0.2s ease; |
|
position: relative; |
|
z-index: 1; |
|
} |
|
|
|
.method-card:hover { |
|
box-shadow: 0 4px 12px rgba(0,0,0,0.08); |
|
border-color: #10b981; |
|
} |
|
|
|
.method-icon { |
|
font-size: 2.5rem; |
|
margin-bottom: 1rem; |
|
color: #6b7280; |
|
} |
|
|
|
.method-card.nover .method-icon { |
|
color: #10b981; |
|
} |
|
|
|
.abstract-card { |
|
background: #f9fafb; |
|
border: 1px solid #e5e7eb; |
|
padding: 2rem; |
|
border-radius: 12px; |
|
margin: 2rem 0; |
|
box-shadow: 0 1px 3px rgba(0,0,0,0.1); |
|
position: relative; |
|
z-index: 1; |
|
} |
|
|
|
.stats-grid { |
|
display: grid; |
|
grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); |
|
gap: 1rem; |
|
margin: 2rem 0; |
|
} |
|
|
|
.stat-item { |
|
background: white; |
|
border: 1px solid #e5e7eb; |
|
border-radius: 8px; |
|
padding: 1.5rem; |
|
text-align: center; |
|
box-shadow: 0 1px 3px rgba(0,0,0,0.1); |
|
transition: all 0.2s ease; |
|
position: relative; |
|
z-index: 1; |
|
} |
|
|
|
.stat-item:hover { |
|
box-shadow: 0 4px 12px rgba(0,0,0,0.08); |
|
border-color: #10b981; |
|
} |
|
|
|
.stat-number { |
|
font-size: 2rem; |
|
font-weight: 600; |
|
color: #10b981; |
|
margin-bottom: 0.5rem; |
|
} |
|
|
|
.stat-label { |
|
font-size: 0.9rem; |
|
color: #6b7280; |
|
font-weight: 500; |
|
} |
|
|
|
.results-table { |
|
background: white; |
|
border-radius: 12px; |
|
overflow: hidden; |
|
box-shadow: 0 2px 12px rgba(0,0,0,0.08); |
|
border: 1px solid #e5e7eb; |
|
margin: 0; |
|
width: 100%; |
|
position: relative; |
|
z-index: 1; |
|
} |
|
|
|
.results-table table { |
|
width: 100%; |
|
border-collapse: collapse; |
|
font-size: 0.8rem; |
|
} |
|
|
|
.results-table th { |
|
background: #f8fafc; |
|
color: #374155; |
|
padding: 1rem 0.8rem; |
|
font-weight: 600; |
|
border-bottom: 2px solid #e5e7eb; |
|
text-align: center !important; |
|
position: sticky; |
|
top: 0; |
|
z-index: 10; |
|
} |
|
|
|
.results-table td { |
|
padding: 0.8rem; |
|
border-bottom: 1px solid #f3f4f6; |
|
text-align: center !important; |
|
} |
|
|
|
|
|
.results-table.table-1 td { |
|
padding: 0.8rem; |
|
line-height: 0.9; |
|
} |
|
|
|
.results-table.table-1 th { |
|
padding: 1.2rem 0.8rem; |
|
} |
|
|
|
|
|
.results-table.table-2 td { |
|
padding: 0.8rem 0.8rem; |
|
line-height: 1.3; |
|
} |
|
|
|
.results-table.table-2 th { |
|
padding: 0.8rem 0.8rem; |
|
} |
|
|
|
|
|
.results-table.table-2 .model-group-header td { |
|
padding: 0.8rem 0.6rem; |
|
line-height: 1.3; |
|
} |
|
|
|
|
|
.results-table.table-1 tbody tr { |
|
height: 48px; |
|
} |
|
|
|
.results-table.table-2 tbody tr { |
|
height: 42px; |
|
} |
|
|
|
|
|
.table-1 + .table-caption { |
|
margin-top: 1.5rem; |
|
} |
|
|
|
.table-2 + .table-caption { |
|
margin-top: 1rem; |
|
} |
|
|
|
.results-table .method-name { |
|
text-align: center !important; |
|
font-weight: 600; |
|
color: #1f2937; |
|
} |
|
|
|
|
|
.results-table .model-type-column { |
|
width: 100px; |
|
min-width: 100px; |
|
max-width: 100px; |
|
} |
|
|
|
.results-table .model-name-column { |
|
width: 100px; |
|
min-width: 100px; |
|
max-width: 100px; |
|
} |
|
|
|
.results-table .method-column { |
|
width: 100px; |
|
min-width: 100px; |
|
max-width: 100px; |
|
} |
|
|
|
.results-table .metric-column { |
|
width: 80px; |
|
min-width: 80px; |
|
max-width: 80px; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
.results-table .nover-row { |
|
background: #f0fdf4; |
|
border-left: 3px solid #10b981; |
|
} |
|
|
|
|
|
|
|
.results-table .best-score { |
|
color: #10b981; |
|
font-weight: 700; |
|
position: relative; |
|
} |
|
|
|
|
|
|
|
.table-section { |
|
margin: 0; |
|
} |
|
|
|
.table-title { |
|
font-size: 1.5rem; |
|
font-weight: 600; |
|
color: #1f2937; |
|
margin-bottom: 1rem; |
|
text-align: center; |
|
} |
|
|
|
.table-caption { |
|
font-size: 0.9rem; |
|
color: #6b7280; |
|
text-align: center; |
|
margin-top: 1rem; |
|
line-height: 1.5; |
|
max-width: 800px; |
|
margin-left: auto; |
|
margin-right: auto; |
|
} |
|
|
|
.model-group-header { |
|
background: #f1f5f9 !important; |
|
color: #334155; |
|
font-weight: 700; |
|
text-align: center !important; |
|
} |
|
|
|
.model-group-header td { |
|
text-align: center !important; |
|
} |
|
|
|
.score-cell { |
|
position: relative; |
|
} |
|
|
|
@media (max-width: 768px) { |
|
.results-table { |
|
font-size: 0.8rem; |
|
} |
|
|
|
.results-table th, |
|
.results-table td { |
|
padding: 0.5rem 0.3rem; |
|
} |
|
|
|
.table-section { |
|
margin: 2rem 0; |
|
} |
|
|
|
.table-title { |
|
font-size: 1.2rem; |
|
} |
|
|
|
div[style*="grid-template-columns: 1fr 1fr"] { |
|
display: block !important; |
|
} |
|
|
|
|
|
} |
|
|
|
.formula-container { |
|
background: #f8fafc; |
|
border: 1px solid #e2e8f0; |
|
border-radius: 12px; |
|
padding: 2rem; |
|
margin: 2rem 0; |
|
text-align: center; |
|
position: relative; |
|
z-index: 1; |
|
} |
|
|
|
.formula-container::before { |
|
content: '🧮'; |
|
position: absolute; |
|
top: 1rem; |
|
left: 1rem; |
|
font-size: 1.2rem; |
|
} |
|
|
|
.formula-title { |
|
font-size: 1.1rem; |
|
font-weight: 600; |
|
color: #334155; |
|
margin-bottom: 1rem; |
|
} |
|
|
|
.formula-description { |
|
font-size: 0.9rem; |
|
color: #64748b; |
|
margin-top: 1rem; |
|
line-height: 1.5; |
|
} |
|
|
|
.diagram-grid { |
|
display: grid; |
|
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); |
|
gap: 2rem; |
|
margin: 3rem 0; |
|
} |
|
|
|
.diagram-card { |
|
background: white; |
|
border-radius: 12px; |
|
padding: 2rem; |
|
box-shadow: 0 2px 8px rgba(0,0,0,0.06); |
|
border: 1px solid #e5e7eb; |
|
text-align: center; |
|
transition: all 0.3s ease; |
|
position: relative; |
|
z-index: 1; |
|
} |
|
|
|
.diagram-card:hover { |
|
transform: translateY(-2px); |
|
box-shadow: 0 8px 24px rgba(0,0,0,0.12); |
|
} |
|
|
|
.diagram-placeholder { |
|
height: 200px; |
|
background: #f1f5f9; |
|
border: 2px dashed #94a3b8; |
|
border-radius: 8px; |
|
display: flex; |
|
flex-direction: column; |
|
align-items: center; |
|
justify-content: center; |
|
margin-bottom: 1rem; |
|
transition: all 0.3s ease; |
|
} |
|
|
|
.diagram-placeholder:hover { |
|
background: #e2e8f0; |
|
border-color: #64748b; |
|
} |
|
|
|
.diagram-icon { |
|
font-size: 3rem; |
|
color: #64748b; |
|
margin-bottom: 0.5rem; |
|
} |
|
|
|
.diagram-label { |
|
font-size: 0.9rem; |
|
color: #475569; |
|
font-weight: 500; |
|
} |
|
|
|
.diagram-description { |
|
font-size: 0.85rem; |
|
color: #64748b; |
|
line-height: 1.4; |
|
} |
|
|
|
|
|
.external-link.button { |
|
position: relative; |
|
overflow: hidden; |
|
} |
|
|
|
.external-link.button::before { |
|
content: ''; |
|
position: absolute; |
|
top: 0; |
|
left: -100%; |
|
width: 100%; |
|
height: 100%; |
|
background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent); |
|
transition: left 0.5s; |
|
} |
|
|
|
.external-link.button:hover { |
|
transform: translateY(-2px); |
|
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15), 0 4px 8px rgba(0, 0, 0, 0.1); |
|
} |
|
|
|
.external-link.button:hover::before { |
|
left: 100%; |
|
} |
|
|
|
.external-link.button:active { |
|
transform: translateY(0); |
|
transition: transform 0.1s; |
|
} |
|
</style> |
|
|
|
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script> |
|
<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> |
|
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script> |
|
<script> |
|
window.MathJax = { |
|
tex: { |
|
inlineMath: [['$', '$'], ['\\(', '\\)']], |
|
displayMath: [['$$', '$$'], ['\\[', '\\]']] |
|
} |
|
}; |
|
</script> |
|
</head> |
|
<body> |
|
<div class="geometric-bg"></div> |
|
|
|
<section class="hero"> |
|
<div class="hero-body"> |
|
<div class="container is-widescreen"> |
|
<div class="columns is-centered"> |
|
<div class="column has-text-centered"> |
|
<h1 class="title is-1 publication-title">NOVER</h1> |
|
<p class="subtitle is-4" style="color: #888;">Incentive Training for Language Models via Verifier-Free Reinforcement Learning</p> |
|
|
|
<div class="is-size-5 publication-authors" style="margin: 2rem 0;"> |
|
<span class="author-block">Wei Liu¹ • Siya Qi¹ • Xinyu Wang¹ • Chen Qian² • Yali Du¹·³ • Yulan He¹·³</span> |
|
<div style="margin-top: 0.5rem; font-size: 0.9rem; color: #666;"> |
|
¹King's College London • ²Shanghai Jiao Tong University • ³The Alan Turing Institute |
|
</div> |
|
</div> |
|
|
|
<div class="publication-links" style="display: flex; justify-content: center; gap: 1rem; flex-wrap: wrap;"> |
|
<a href="https://arxiv.org/pdf/2505.16022.pdf" target="_blank" |
|
class="external-link button is-normal" style="background: linear-gradient(135deg, #B31B1B 0%, #D32F2F 100%); color: white; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(179, 27, 27, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);"> |
|
<span class="icon"><i class="fas fa-file-pdf"></i></span> |
|
<span>Paper</span> |
|
</a> |
|
<a href="https://github.com/thinkwee/NOVER" target="_blank" |
|
class="external-link button is-normal" style="background: linear-gradient(135deg, #24292e 0%, #2f363d 100%); color: white; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(36, 41, 46, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);"> |
|
<span class="icon"><i class="fab fa-github"></i></span> |
|
<span>Code</span> |
|
</a> |
|
<a href="https://huggingface.co/collections/thinkwee/novereason-68937ca75331dfaddaf24016" target="_blank" |
|
class="external-link button is-normal" style="background: linear-gradient(135deg, #FFD43B 0%, #FFE066 100%); color: #000; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(255, 212, 59, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);"> |
|
<span class="icon"><i class="fas fa-database"></i></span> |
|
<span>Dataset</span> |
|
</a> |
|
<a href="https://huggingface.co/collections/thinkwee/nover1-68a6524eac725c915abd77e3" target="_blank" |
|
class="external-link button is-normal" style="background: linear-gradient(135deg, #0EA5E9 0%, #38BDF8 100%); color: white; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(14, 165, 233, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);"> |
|
<span class="icon"><i class="fas fa-cube"></i></span> |
|
<span>Model</span> |
|
</a> |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
</section> |
|
|
|
<section class="section"> |
|
<div class="container is-widescreen"> |
|
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 3rem; align-items: start;"> |
|
|
|
<div class="abstract-card"> |
|
<h2 class="title is-3" style="color: #1a1a1a; margin-bottom: 1.5rem;">TL;DR</h2> |
|
<p class="is-size-5" style="color: #374151; line-height: 1.6;"> |
|
<span class="nover">NOVER</span> (NO-Verifier Reinforcement Learning) enables |
|
incentive training on any text-to-text task without external verifiers. It utilizes policy model's reasoning perplexity to estimate the reward. |
|
<br> |
|
<br> |
|
<strong>• Your LLM is secretly a verifier. |
|
<br> |
|
• Your LLM only reason on <s>Easy-to-Verify</s> tasks. |
|
<br> |
|
• Your LLM can <s>reason</s> on ANY tasks. |
|
<br> |
|
• Your LLM can be incentivized to do more than reasoning.</strong> |
|
<br> |
|
</p> |
|
</div> |
|
|
|
|
|
<div class="figure-container"> |
|
<div class="figure-placeholder" style="height: 280px; display: flex; flex-direction: column; justify-content: center; align-items: center;"> |
|
<img src="logo.png" alt="NOVER Framework Overview" style="width: 100%; height: 100%; object-fit: contain;"> |
|
</div> |
|
<div style="font-size: 1.1rem; color: #2e3036; text-align: center; margin-top: 0.5rem;"> |
|
<div><span class="nover">NOVER</span> extends RLVR on any text-to-text task</div> |
|
<div>beyond easy-to-verify math/coding problems.</div> |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
</section> |
|
|
|
<section class="section"> |
|
<div class="container is-widescreen"> |
|
<h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Incentivize Reasoning on Any Task</h2> |
|
<p class="is-size-5" style="color: #6b7280; margin-bottom: 3rem; max-width: 800px; margin-left: auto; margin-right: auto;"> |
|
NOVER enables training large reasoning models on any text data and any task.<br> |
|
NO verifiers/models/rules needed, just ground truth answer, and policy model itself.<br> |
|
<strong>General Reasoning:</strong> ⚛️ physics • ⚖️ law • 🏥 medical • 💰 finance<br> |
|
<strong>Creative Tasks:</strong> 🎨 creative writing<br> |
|
<strong>Social Intelligence:</strong> 🧠 theory of mind • 😊 emotion detection • 🤝 social reasoning<br> |
|
<strong>Nautral Language Generation:</strong> 🌍 translation • 📚 summarization |
|
</p> |
|
|
|
<div class="figure-container"> |
|
<img src="example.png" alt="NOVER Framework Overview" style="width: 100%; height: 100%; object-fit: contain;"> |
|
</div> |
|
</div> |
|
</section> |
|
|
|
<section class="section"> |
|
<div class="container is-widescreen"> |
|
<h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">NOVER Methodology</h2> |
|
|
|
|
|
<div style="display: flex; justify-content: space-between; align-items: center; margin: 2rem 0; padding: 0 1rem;"> |
|
<div style="width: 600px; height: 420px;"> |
|
<img src="paradigm.png" alt="paradigm" style="width: 100%; height: 100%; object-fit: contain;"> |
|
</div> |
|
<div style="width: 600px; height: 420px;"> |
|
<img src="overall.png" alt="overall" style="width: 100%; height: 100%; object-fit: contain;"> |
|
</div> |
|
</div> |
|
|
|
|
|
<div class="method-comparison"> |
|
<div class="method-card"> |
|
<div class="method-icon"><i class="fas fa-graduation-cap"></i></div> |
|
<h3 class="title is-5">SFT</h3> |
|
<p>Memorize Input-Output Patterns</p> |
|
</div> |
|
<div class="method-card"> |
|
<div class="method-icon"><i class="fas fa-robot"></i></div> |
|
<h3 class="title is-5">RLHF</h3> |
|
<p>Train Reward Model <br>Give Preference Feedback</p> |
|
</div> |
|
<div class="method-card"> |
|
<div class="method-icon"><i class="fas fa-balance-scale"></i></div> |
|
<h3 class="title is-5">RLVR</h3> |
|
<p>Rule-based Reward <br>End2End Outcome RL</p> |
|
</div> |
|
<div class="method-card nover"> |
|
<div class="method-icon"><i class="fas fa-brain"></i></div> |
|
<h3 class="title is-5"><span class="nover">NOVER</span></h3> |
|
<p>Reasoning Perplexity as Reward<br>Reason on Any Task</p> |
|
</div> |
|
</div> |
|
|
|
|
|
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 2rem; margin: 3rem 0;"> |
|
|
|
<div class="formula-container"> |
|
<div class="formula-title">Reasoning Perplexity</div> |
|
<div style="font-size: 0.9rem; margin: 1rem 0;"> |
|
$P_r(p, t, g) = \exp\left(-\frac{\sum_{i=1}^{|g|} \log \pi_{p}(g_i \mid p, t, g_{<i})}{|g| \cdot N(|t|)}\right)$ |
|
</div> |
|
<div class="formula-description"> |
|
Use perplexity of policy model on ground truth conditioned on reasoning trajectory as reward proxy |
|
</div> |
|
</div> |
|
|
|
|
|
<div class="formula-container"> |
|
<div class="formula-title">Rewards</div> |
|
<div style="font-size: 1.1rem; margin: 1rem 0;"> |
|
$$R_{\mathrm{total}} = w_{\mathrm{f}} R_{\mathrm{f}} + \mathbb{I}(R_{\mathrm{f}} = 1) \cdot (w_{\mathrm{r}} R_{\mathrm{r}} + w_{\mathrm{e}} R_{\mathrm{e}})$$ |
|
</div> |
|
<div class="formula-description"> |
|
Combined reward function incorporating reasoning, efficiency, and format components |
|
</div> |
|
</div> |
|
|
|
|
|
<div class="formula-container"> |
|
<div class="formula-title">Policy-Proxy Synchronization</div> |
|
<div style="font-size: 1.1rem; margin: 1rem 0;"> |
|
$$\pi_{\mathrm{p}} \leftarrow \alpha \cdot \pi_{\mathrm{p}} + (1-\alpha) \cdot \pi_{\theta}$$ |
|
</div> |
|
<div class="formula-description"> |
|
Smooth synchronization between policy and proxy ensures stable training with limited resource |
|
</div> |
|
</div> |
|
</div> |
|
|
|
|
|
|
|
|
|
</div> |
|
</section> |
|
|
|
<section class="section"> |
|
<div class="container is-widescreen"> |
|
<h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Experimental Results</h2> |
|
|
|
|
|
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 3rem; margin-top: 3rem; align-items: start;"> |
|
|
|
<div> |
|
<h3 class="table-title">Overall on NOVEReason Dataset</h3> |
|
|
|
<div class="table-section"> |
|
<div class="results-table table-1"> |
|
<table> |
|
<thead> |
|
<tr> |
|
<th>Method</th> |
|
<th>NR</th> |
|
<th>GT</th> |
|
<th>WI</th> |
|
<th>SGN</th> |
|
<th>EB</th> |
|
<th>TB</th> |
|
<th>OPUS</th> |
|
</tr> |
|
</thead> |
|
<tbody> |
|
<tr class="model-group-header"> |
|
<td colspan="8"><strong>Qwen2.5-3B</strong></td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">Base</td> |
|
<td class="score-cell">21.80%</td> |
|
<td class="score-cell">43.10%</td> |
|
<td class="score-cell">18.40%</td> |
|
<td class="score-cell">18.70%</td> |
|
<td class="score-cell">32.03%</td> |
|
<td class="score-cell">46.79%</td> |
|
<td class="score-cell">16.70%</td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">+ CoT</td> |
|
<td class="score-cell">24.40%</td> |
|
<td class="score-cell">48.90%</td> |
|
<td class="score-cell">24.20%</td> |
|
<td class="score-cell">14.76%</td> |
|
<td class="score-cell">28.12%</td> |
|
<td class="score-cell">51.23%</td> |
|
<td class="score-cell">1.40%</td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">+ SFT</td> |
|
<td class="score-cell">27.00%</td> |
|
<td class="score-cell">36.20%</td> |
|
<td class="score-cell">27.30%</td> |
|
<td class="score-cell">20.08%</td> |
|
<td class="score-cell">36.72%</td> |
|
<td class="score-cell">48.66%</td> |
|
<td class="score-cell">17.30%</td> |
|
</tr> |
|
<tr class="nover-row"> |
|
<td class="method-name"><strong>+ NOVER</strong></td> |
|
<td class="score-cell best-score">28.60%</td> |
|
<td class="score-cell best-score">60.30%</td> |
|
<td class="score-cell best-score">28.10%</td> |
|
<td class="score-cell best-score">41.64%</td> |
|
<td class="score-cell best-score">38.28%</td> |
|
<td class="score-cell best-score">57.88%</td> |
|
<td class="score-cell best-score">20.70%</td> |
|
</tr> |
|
<tr class="model-group-header"> |
|
<td colspan="8"><strong>Qwen2.5-7B</strong></td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">Base</td> |
|
<td class="score-cell">31.80%</td> |
|
<td class="score-cell">48.50%</td> |
|
<td class="score-cell">20.70%</td> |
|
<td class="score-cell">24.21%</td> |
|
<td class="score-cell">28.91%</td> |
|
<td class="score-cell">44.22%</td> |
|
<td class="score-cell">19.30%</td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">+ CoT</td> |
|
<td class="score-cell">31.20%</td> |
|
<td class="score-cell">57.60%</td> |
|
<td class="score-cell">29.20%</td> |
|
<td class="score-cell">33.46%</td> |
|
<td class="score-cell">38.28%</td> |
|
<td class="score-cell">50.99%</td> |
|
<td class="score-cell">1.60%</td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">+ SFT</td> |
|
<td class="score-cell">27.50%</td> |
|
<td class="score-cell">45.20%</td> |
|
<td class="score-cell">33.50%</td> |
|
<td class="score-cell">37.85%</td> |
|
<td class="score-cell">47.66%</td> |
|
<td class="score-cell">57.06%</td> |
|
<td class="score-cell">23.30%</td> |
|
</tr> |
|
<tr class="nover-row"> |
|
<td class="method-name"><strong>+ NOVER</strong></td> |
|
<td class="score-cell best-score">38.20%</td> |
|
<td class="score-cell best-score">61.80%</td> |
|
<td class="score-cell best-score">36.60%</td> |
|
<td class="score-cell best-score">50.79%</td> |
|
<td class="score-cell best-score">49.22%</td> |
|
<td class="score-cell best-score">67.79%</td> |
|
<td class="score-cell best-score">26.80%</td> |
|
</tr> |
|
<tr class="model-group-header"> |
|
<td colspan="8"><strong>Other Baselines</strong></td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">Qwen2.5-3B-Instruct</td> |
|
<td class="score-cell">27.10%</td> |
|
<td class="score-cell">50.00%</td> |
|
<td class="score-cell">31.50%</td> |
|
<td class="score-cell">21.25%</td> |
|
<td class="score-cell">40.62%</td> |
|
<td class="score-cell">58.69%</td> |
|
<td class="score-cell">19.90%</td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">Qwen2.5-7B-Instruct</td> |
|
<td class="score-cell">29.90%</td> |
|
<td class="score-cell">56.20%</td> |
|
<td class="score-cell">35.60%</td> |
|
<td class="score-cell">67.72%</td> |
|
<td class="score-cell">46.88%</td> |
|
<td class="score-cell">65.23%</td> |
|
<td class="score-cell">23.50%</td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">R1-Distill-Qwen-7B</td> |
|
<td class="score-cell">41.00%</td> |
|
<td class="score-cell">60.20%</td> |
|
<td class="score-cell">38.00%</td> |
|
<td class="score-cell">40.16%</td> |
|
<td class="score-cell">35.16%</td> |
|
<td class="score-cell">54.61%</td> |
|
<td class="score-cell">8.20%</td> |
|
</tr> |
|
</tbody> |
|
</table> |
|
</div> |
|
<div class="table-caption"> |
|
<strong>NR:</strong> Natural Reasoning, <strong>GT:</strong> General Thoughts-430k, <strong>WI:</strong> WebInstruct, <strong>SGN:</strong> SS-GEN, |
|
<strong>EB:</strong> EmoBench, <strong>TB:</strong> TomBench, <strong>OPUS:</strong> OPUS-BOOK-TRANSLATION. |
|
</div> |
|
</div> |
|
</div> |
|
|
|
|
|
<div> |
|
<h3 class="table-title">General Reasoning with Different Backends</h3> |
|
<div class="table-section"> |
|
<div class="results-table table-2"> |
|
<table> |
|
<thead> |
|
<tr> |
|
<th class="model-type-column">Model Type</th> |
|
<th class="model-name-column">Model</th> |
|
<th class="method-column">Method</th> |
|
<th class="metric-column">NR</th> |
|
<th class="metric-column">GT</th> |
|
<th class="metric-column">WI</th> |
|
</tr> |
|
</thead> |
|
<tbody> |
|
<tr class="model-group-header"> |
|
<td class="model-type-column" rowspan="8" style="vertical-align: middle; background: #f1f5f9 !important; color: #334155; font-weight: 700;">Base</td> |
|
<td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Qwen2.5 3B</td> |
|
<td class="method-name">Base</td> |
|
<td class="score-cell">21.80%</td> |
|
<td class="score-cell">43.10%</td> |
|
<td class="score-cell">18.40%</td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">+ CoT</td> |
|
<td class="score-cell">24.40%</td> |
|
<td class="score-cell">48.90%</td> |
|
<td class="score-cell">24.20%</td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">+ SFT</td> |
|
<td class="score-cell">27.00%</td> |
|
<td class="score-cell">36.20%</td> |
|
<td class="score-cell">27.30%</td> |
|
</tr> |
|
<tr class="nover-row"> |
|
<td class="method-name"><strong>+ NOVER</strong></td> |
|
<td class="score-cell best-score"><strong>28.60%</strong></td> |
|
<td class="score-cell best-score"><strong>60.30%</strong></td> |
|
<td class="score-cell best-score"><strong>28.10%</strong></td> |
|
</tr> |
|
<tr class="model-group-header"> |
|
<td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Qwen 2.5 7B</td> |
|
<td class="method-name">Base</td> |
|
<td class="score-cell">31.80%</td> |
|
<td class="score-cell">48.50%</td> |
|
<td class="score-cell">20.70%</td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">+ CoT</td> |
|
<td class="score-cell">31.20%</td> |
|
<td class="score-cell">57.60%</td> |
|
<td class="score-cell">29.20%</td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">+ SFT</td> |
|
<td class="score-cell">27.50%</td> |
|
<td class="score-cell">45.20%</td> |
|
<td class="score-cell">33.50%</td> |
|
</tr> |
|
<tr class="nover-row"> |
|
<td class="method-name"><strong>+ NOVER</strong></td> |
|
<td class="score-cell best-score"><strong>38.20%</strong></td> |
|
<td class="score-cell best-score"><strong>61.80%</strong></td> |
|
<td class="score-cell best-score"><strong>36.60%</strong></td> |
|
</tr> |
|
<tr class="model-group-header"> |
|
<td class="model-type-column" rowspan="8" style="vertical-align: middle; background: #f1f5f9 !important; color: #334155; font-weight: 700;">Instruct</td> |
|
<td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Llama-3.1-8B</td> |
|
<td class="method-name">Base</td> |
|
<td class="score-cell">34.20%</td> |
|
<td class="score-cell">36.70%</td> |
|
<td class="score-cell">29.90%</td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">+ CoT</td> |
|
<td class="score-cell">28.10%</td> |
|
<td class="score-cell">35.10%</td> |
|
<td class="score-cell">30.00%</td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">+ SFT</td> |
|
<td class="score-cell">23.60%</td> |
|
<td class="score-cell">23.40%</td> |
|
<td class="score-cell best-score"><strong>34.50%</strong></td> |
|
</tr> |
|
<tr class="nover-row"> |
|
<td class="method-name"><strong>+ NOVER</strong></td> |
|
<td class="score-cell best-score"><strong>40.70%</strong></td> |
|
<td class="score-cell best-score"><strong>41.50%</strong></td> |
|
<td class="score-cell">34.00%</td> |
|
</tr> |
|
<tr class="model-group-header"> |
|
<td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Mistral-7B</td> |
|
<td class="method-name">Base</td> |
|
<td class="score-cell best-score"><strong>33.00%</strong></td> |
|
<td class="score-cell">17.80%</td> |
|
<td class="score-cell">27.00%</td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">+ CoT</td> |
|
<td class="score-cell">29.20%</td> |
|
<td class="score-cell">18.60%</td> |
|
<td class="score-cell">27.10%</td> |
|
</tr> |
|
<tr> |
|
<td class="method-name">+ SFT</td> |
|
<td class="score-cell">22.50%</td> |
|
<td class="score-cell">20.70%</td> |
|
<td class="score-cell">27.80%</td> |
|
</tr> |
|
<tr class="nover-row"> |
|
<td class="method-name"><strong>+ NOVER</strong></td> |
|
<td class="score-cell">32.20%</td> |
|
<td class="score-cell best-score"><strong>21.90%</strong></td> |
|
<td class="score-cell best-score"><strong>29.30%</strong></td> |
|
</tr> |
|
</tbody> |
|
</table> |
|
</div> |
|
<div class="table-caption"> |
|
<strong>NR:</strong> Natural Reasoning, <strong>GT:</strong> General Thoughts-430k, <strong>WI:</strong> WebInstruct. |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
|
|
<div style="margin-top: 3rem;"> |
|
<div class="glass-card"> |
|
<h3 class="title is-4" style="color: #1a1a1a; margin-bottom: 1.5rem;">Key Takeaways</h3> |
|
<ul style="color: #374151; line-height: 1.8; font-size: 0.9rem;"> |
|
<li>• NOVER trains successfully on both pretrained and instruct models, with larger gains on stronger base models</li> |
|
<li>• Despite the free-form nature of answers, NOVER still prefer objective solutions instead of subjective ones</li> |
|
<li>• On general reasoning, NOVER inherits base model boundaries, which have been observed in math reasoning. It struggles on false-premise tasks like FANToM</li> |
|
<li>• NOVER's design prevent reward hacking, avoiding issues such as reasoning explosion and collapse</li> |
|
<li>• Unlike closed-source or verifier-based rewards that suffer from cold start and hacking risks, NOVER remains stable</li> |
|
<li>• Its dense reward signals allow greater error tolerance and encourage diverse reasoning patterns</li> |
|
</ul> |
|
</div> |
|
</div> |
|
|
|
|
|
|
|
</div> |
|
</section> |
|
|
|
<section class="section"> |
|
<div class="container is-widescreen"> |
|
<h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Inverse Incentive Training</h2> |
|
|
|
<div style="display: flex; justify-content: space-between; align-items: center; margin: 2rem 0; padding: 0 1rem;"> |
|
<div style="width: 600px; height: 420px;"> |
|
<img src="iit.png" alt="iit" style="width: 100%; height: 100%; object-fit: contain;"> |
|
</div> |
|
<div style="width: 600px; height: 420px;"> |
|
<img src="iit_result.png" alt="iit_result" style="width: 100%; height: 100%; object-fit: contain;"> |
|
</div> |
|
</div> |
|
|
|
<div class="glass-card"> |
|
<div style="text-align: center;"> |
|
<div style="display: flex; justify-content: center; align-items: center; gap: 2rem; margin-bottom: 1rem;"> |
|
<div style="text-align: center;"> |
|
<i class="fas fa-fish" style="font-size: 3rem; margin-bottom: 0.5rem; color: #0e41a8;"></i> |
|
<div style="font-size: 1.0rem; color: #0e41a8;">Reward the Outcome, Incentivize Process</div> |
|
</div> |
|
<div style="font-size: 1.5rem;">→</div> |
|
<div style="text-align: center;"> |
|
<i class="fas fa-graduation-cap" style="font-size: 3rem; margin-bottom: 0.5rem; color: #d736d2;"></i> |
|
<div style="font-size: 1.0rem; color: #d736d2;">Write Rubrics in the Outcome, Process as Result</div> |
|
</div> |
|
</div> |
|
<div style="font-size: 1.2rem; color: #000000;">Teaching Models "How to Fish" Rather Than Giving Them Fish</div> |
|
</div> |
|
</div> |
|
</div> |
|
</section> |
|
|
|
|
|
|
|
<section class="section" id="BibTeX"> |
|
<div class="container is-widescreen"> |
|
<div class="glass-card"> |
|
<h2 class="title is-3">Citation</h2> |
|
<pre style="background: #f8f9fa; padding: 1.5rem; border-radius: 10px; overflow-x: auto;"><code>@article{liu2025nover, |
|
title={NOVER: Incentive Training for Language Models via Verifier-Free Reinforcement Learning}, |
|
author={Liu, Wei and Qi, Siya and Wang, Xinyu and Qian, Chen and Du, Yali and He, Yulan}, |
|
journal={arXiv preprint arXiv:2505.16022}, |
|
year={2025} |
|
}</code></pre> |
|
</div> |
|
</div> |
|
</section> |
|
|
|
<footer class="section" style="background: white; border-top: 1px solid #e5e7eb; margin-top: 4rem;"> |
|
<div class="container has-text-centered"> |
|
<div class="content"> |
|
<div style="margin-bottom: 2rem;"> |
|
<p>Find me on <a href="https://thinkwee.top/about" target="_blank" style="color: #10b981;">thinkwee.top/about</a>, with other interesting works on LLM Agent🤖, NLP and more~</p> |
|
</div> |
|
<p style="color: #6b7280;"> |
|
Licensed under <a href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank" style="color: #10b981;">CC BY-SA 4.0</a> |
|
</p> |
|
</div> |
|
</div> |
|
</footer> |
|
|
|
</body> |
|
</html> |
|
|