NOVER / index.html
thinkwee's picture
Update index.html
91eeccf verified
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="description"
content="NOVER: Incentive Training for Language Models via Verifier-Free Reinforcement Learning">
<meta name="keywords" content="NOVER, Reinforcement Learning, Language Models, Reasoning">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>NOVER: NO-VERifier Reinforcement Learning</title>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&display=swap"
rel="stylesheet">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/bulma/0.9.4/css/bulma.min.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
<style>
body {
background: #fafafa;
font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
color: #1a1a1a;
position: relative;
overflow-x: hidden;
}
body::before {
content: '';
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background:
radial-gradient(circle at 20% 80%, rgba(16, 185, 129, 0.03) 0%, transparent 50%),
radial-gradient(circle at 80% 20%, rgba(59, 130, 246, 0.03) 0%, transparent 50%),
radial-gradient(circle at 40% 40%, rgba(16, 185, 129, 0.02) 0%, transparent 50%);
pointer-events: none;
z-index: -1;
}
.geometric-bg {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
opacity: 0.03;
background-image:
linear-gradient(45deg, transparent 45%, #10b981 45%, #10b981 55%, transparent 55%),
linear-gradient(-45deg, transparent 45%, #3b82f6 45%, #3b82f6 55%, transparent 55%);
background-size: 60px 60px;
background-position: 0 0, 30px 30px;
pointer-events: none;
z-index: -1;
}
/* Hero Section - Brain/Neural Network Pattern */
.hero {
background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
border-radius: 12px;
margin: 2rem;
box-shadow: 0 4px 24px rgba(0,0,0,0.06);
border: 1px solid #e5e7eb;
position: relative;
overflow: hidden;
}
.hero::before {
content: '';
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-image:
repeating-linear-gradient(
45deg,
transparent,
transparent 40px,
rgba(16, 185, 129, 0.04) 40px,
rgba(16, 185, 129, 0.04) 80px
),
repeating-linear-gradient(
-45deg,
transparent,
transparent 40px,
rgba(59, 130, 246, 0.03) 40px,
rgba(59, 130, 246, 0.03) 80px
);
background-size: 80px 80px, 80px 80px;
background-position: 0 0, 40px 40px;
opacity: 0.3;
pointer-events: none;
z-index: 0;
}
/* Abstract Section - Circuit/Technology Pattern */
.section:nth-of-type(1) {
background: linear-gradient(135deg, #ffffff 0%, #f0f9ff 100%);
margin: 2rem;
border-radius: 12px;
box-shadow: 0 2px 8px rgba(0,0,0,0.04);
border: 1px solid #e5e7eb;
position: relative;
overflow: hidden;
}
.section:nth-of-type(1)::before {
content: '';
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-image:
repeating-linear-gradient(
90deg,
transparent,
transparent 30px,
rgba(59, 130, 246, 0.06) 30px,
rgba(59, 130, 246, 0.06) 60px
),
repeating-linear-gradient(
0deg,
transparent,
transparent 30px,
rgba(59, 130, 246, 0.06) 30px,
rgba(59, 130, 246, 0.06) 60px
);
background-size: 60px 60px, 60px 60px;
background-position: 0 0, 30px 30px;
opacity: 0.3;
pointer-events: none;
z-index: 0;
}
/* Incentivize Reasoning Section - Puzzle/Logic Pattern */
.section:nth-of-type(2) {
background: linear-gradient(135deg, #ffffff 0%, #f0fdf4 100%);
margin: 2rem;
border-radius: 12px;
box-shadow: 0 2px 8px rgba(0,0,0,0.04);
border: 1px solid #e5e7eb;
position: relative;
overflow: hidden;
}
.section:nth-of-type(2)::before {
content: '';
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-image:
repeating-linear-gradient(
45deg,
transparent,
transparent 50px,
rgba(16, 185, 129, 0.07) 50px,
rgba(16, 185, 129, 0.07) 100px
),
repeating-linear-gradient(
-45deg,
transparent,
transparent 50px,
rgba(16, 185, 129, 0.05) 50px,
rgba(16, 185, 129, 0.05) 100px
);
background-size: 100px 100px, 100px 100px;
background-position: 0 0, 50px 50px;
opacity: 0.3;
pointer-events: none;
z-index: 0;
}
/* NOVER Methodology Section - Mathematical/Formula Pattern */
.section:nth-of-type(3) {
background: linear-gradient(135deg, #ffffff 0%, #fefce8 100%);
margin: 2rem;
border-radius: 12px;
box-shadow: 0 2px 8px rgba(0,0,0,0.04);
border: 1px solid #e5e7eb;
position: relative;
overflow: hidden;
}
.section:nth-of-type(3)::before {
content: '';
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-image:
repeating-linear-gradient(
30deg,
transparent,
transparent 40px,
rgba(245, 158, 11, 0.06) 40px,
rgba(245, 158, 11, 0.06) 80px
),
repeating-linear-gradient(
-30deg,
transparent,
transparent 40px,
rgba(245, 158, 11, 0.05) 40px,
rgba(245, 158, 11, 0.05) 80px
);
background-size: 80px 80px, 80px 80px;
background-position: 0 0, 40px 40px;
opacity: 0.3;
pointer-events: none;
z-index: 0;
}
/* Experimental Results Section - Data/Chart Pattern */
.section:nth-of-type(4) {
background: linear-gradient(135deg, #ffffff 0%, #fef2f2 100%);
margin: 2rem;
border-radius: 12px;
box-shadow: 0 2px 8px rgba(0,0,0,0.04);
border: 1px solid #e5e7eb;
position: relative;
overflow: hidden;
}
.section:nth-of-type(4)::before {
content: '';
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-image:
repeating-linear-gradient(
0deg,
transparent,
transparent 35px,
rgba(239, 68, 68, 0.06) 35px,
rgba(239, 68, 68, 0.06) 70px
),
repeating-linear-gradient(
90deg,
transparent,
transparent 35px,
rgba(239, 68, 68, 0.05) 35px,
rgba(239, 68, 68, 0.05) 70px
);
background-size: 70px 70px, 70px 70px;
background-position: 0 0, 35px 35px;
opacity: 0.3;
pointer-events: none;
z-index: 0;
}
/* Inverse Incentive Training Section - Fish/Teaching Pattern */
.section:nth-of-type(5) {
background: linear-gradient(135deg, #ffffff 0%, #f0f9ff 100%);
margin: 2rem;
border-radius: 12px;
box-shadow: 0 2px 8px rgba(0,0,0,0.04);
border: 1px solid #e5e7eb;
position: relative;
overflow: hidden;
}
.section:nth-of-type(5)::before {
content: '';
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-image:
repeating-linear-gradient(
60deg,
transparent,
transparent 60px,
rgba(14, 165, 233, 0.07) 60px,
rgba(14, 165, 233, 0.07) 120px
),
repeating-linear-gradient(
-60deg,
transparent,
transparent 60px,
rgba(14, 165, 233, 0.05) 60px,
rgba(14, 165, 233, 0.05) 120px
);
background-size: 120px 120px, 120px 120px;
background-position: 0 0, 60px 60px;
opacity: 0.3;
pointer-events: none;
z-index: 0;
}
/* Citation Section - Book/Paper Pattern */
.section:nth-of-type(6) {
background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
margin: 2rem;
border-radius: 12px;
box-shadow: 0 2px 8px rgba(0,0,0,0.04);
border: 1px solid #e5e7eb;
position: relative;
overflow: hidden;
}
.section:nth-of-type(6)::before {
content: '';
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-image:
repeating-linear-gradient(
25deg,
transparent,
transparent 45px,
rgba(107, 114, 128, 0.06) 45px,
rgba(107, 114, 128, 0.06) 90px
),
repeating-linear-gradient(
-25deg,
transparent,
transparent 45px,
rgba(107, 114, 128, 0.05) 45px,
rgba(107, 114, 128, 0.05) 90px
);
background-size: 90px 90px, 90px 90px;
background-position: 0 0, 45px 45px;
opacity: 0.3;
pointer-events: none;
z-index: 0;
}
/* Footer Section - Social/Network Pattern */
footer.section {
background: linear-gradient(135deg, #ffffff 0%, #f9fafb 100%);
border-top: 1px solid #e5e7eb;
margin-top: 4rem;
position: relative;
overflow: hidden;
}
footer.section::before {
content: '';
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-image:
repeating-linear-gradient(
45deg,
transparent,
transparent 80px,
rgba(16, 185, 129, 0.06) 80px,
rgba(16, 185, 129, 0.06) 160px
),
repeating-linear-gradient(
-45deg,
transparent,
transparent 80px,
rgba(59, 130, 246, 0.05) 80px,
rgba(59, 130, 246, 0.05) 160px
);
background-size: 160px 160px, 160px 160px;
background-position: 0 0, 80px 80px;
opacity: 0.3;
pointer-events: none;
z-index: 0;
}
/* Ensure content is above patterns */
.hero-body,
.section .container,
footer .container {
position: relative;
z-index: 1;
}
.publication-title {
color: #1a1a1a;
font-weight: 600;
letter-spacing: -0.02em;
}
.nover {
color: #10b981;
font-weight: 600;
}
.glass-card {
background: #f9fafb;
border-radius: 12px;
padding: 2rem;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
border: 1px solid #e5e7eb;
margin: 1rem 0;
transition: all 0.2s ease;
position: relative;
z-index: 1;
}
.glass-card:hover {
box-shadow: 0 4px 12px rgba(0,0,0,0.08);
border-color: #d1d5db;
}
.figure-container {
background: white;
border-radius: 12px;
padding: 2rem;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
border: 1px solid #e5e7eb;
margin: 2rem 0;
text-align: center;
position: relative;
z-index: 1;
}
.figure-placeholder {
background: #f3f4f6;
border: 2px dashed #9ca3af;
border-radius: 8px;
padding: 3rem 2rem;
color: #6b7280;
font-weight: 500;
font-size: 1rem;
margin-bottom: 1rem;
position: relative;
transition: all 0.3s ease;
}
.figure-placeholder:hover {
background: #f9fafb;
border-color: #6b7280;
}
.figure-placeholder.analysis {
background: linear-gradient(135deg, #eff6ff 0%, #dbeafe 100%);
border-color: #3b82f6;
color: #1e40af;
}
.figure-placeholder.experiment {
background: linear-gradient(135deg, #f0fdf4 0%, #dcfce7 100%);
border-color: #10b981;
color: #047857;
}
.figure-placeholder.comparison {
background: linear-gradient(135deg, #fefce8 0%, #fef3c7 100%);
border-color: #f59e0b;
color: #92400e;
}
.analysis-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
gap: 1.5rem;
margin: 2rem 0;
}
.insight-card {
background: white;
border: 1px solid #e5e7eb;
padding: 1.5rem;
border-radius: 12px;
text-align: left;
transition: all 0.2s ease;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
position: relative;
z-index: 1;
}
.insight-card:hover {
box-shadow: 0 4px 12px rgba(0,0,0,0.08);
border-color: #10b981;
}
.insight-title {
font-size: 1.1rem;
font-weight: 600;
margin-bottom: 0.5rem;
color: #1a1a1a;
}
.insight-description {
font-size: 0.9rem;
color: #6b7280;
line-height: 1.5;
}
.comparison-table {
background: white;
border-radius: 12px;
overflow: hidden;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
border: 1px solid #e5e7eb;
margin: 2rem 0;
position: relative;
z-index: 1;
}
.comparison-table table {
width: 100%;
border-collapse: collapse;
}
.comparison-table th {
background: #f9fafb;
color: #374151;
padding: 1rem;
font-weight: 600;
border-bottom: 1px solid #e5e7eb;
}
.comparison-table td {
padding: 1rem;
border-bottom: 1px solid #f3f4f6;
text-align: center;
}
.comparison-table tr:nth-child(even) {
background: #fafbfc;
}
.highlight-row {
background: #f0fdf4 !important;
border-left: 3px solid #10b981;
}
.highlight-number {
color: #10b981;
font-weight: 600;
font-size: 1.05em;
}
.metrics-container {
background: #1f2937;
border-radius: 12px;
padding: 2rem;
color: #e5e7eb;
font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Roboto Mono', monospace;
box-shadow: 0 4px 12px rgba(0,0,0,0.15);
margin: 2rem 0;
position: relative;
z-index: 1;
}
.metrics-header {
color: #10b981;
font-weight: 600;
margin-bottom: 1rem;
border-bottom: 1px solid #374151;
padding-bottom: 0.5rem;
}
.method-comparison {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 1.5rem;
margin: 2rem 0;
}
.method-card {
background: white;
border-radius: 12px;
padding: 1.5rem;
text-align: center;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
border: 1px solid #e5e7eb;
transition: all 0.2s ease;
position: relative;
z-index: 1;
}
.method-card:hover {
box-shadow: 0 4px 12px rgba(0,0,0,0.08);
border-color: #10b981;
}
.method-icon {
font-size: 2.5rem;
margin-bottom: 1rem;
color: #6b7280;
}
.method-card.nover .method-icon {
color: #10b981;
}
.abstract-card {
background: #f9fafb;
border: 1px solid #e5e7eb;
padding: 2rem;
border-radius: 12px;
margin: 2rem 0;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
position: relative;
z-index: 1;
}
.stats-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
gap: 1rem;
margin: 2rem 0;
}
.stat-item {
background: white;
border: 1px solid #e5e7eb;
border-radius: 8px;
padding: 1.5rem;
text-align: center;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
transition: all 0.2s ease;
position: relative;
z-index: 1;
}
.stat-item:hover {
box-shadow: 0 4px 12px rgba(0,0,0,0.08);
border-color: #10b981;
}
.stat-number {
font-size: 2rem;
font-weight: 600;
color: #10b981;
margin-bottom: 0.5rem;
}
.stat-label {
font-size: 0.9rem;
color: #6b7280;
font-weight: 500;
}
.results-table {
background: white;
border-radius: 12px;
overflow: hidden;
box-shadow: 0 2px 12px rgba(0,0,0,0.08);
border: 1px solid #e5e7eb;
margin: 0;
width: 100%;
position: relative;
z-index: 1;
}
.results-table table {
width: 100%;
border-collapse: collapse;
font-size: 0.8rem;
}
.results-table th {
background: #f8fafc;
color: #374155;
padding: 1rem 0.8rem;
font-weight: 600;
border-bottom: 2px solid #e5e7eb;
text-align: center !important;
position: sticky;
top: 0;
z-index: 10;
}
.results-table td {
padding: 0.8rem;
border-bottom: 1px solid #f3f4f6;
text-align: center !important;
}
/* 微调表格高度 - 为Table 1增加行高 */
.results-table.table-1 td {
padding: 0.8rem;
line-height: 0.9;
}
.results-table.table-1 th {
padding: 1.2rem 0.8rem;
}
/* 微调表格高度 - 为Table 2减少行高 */
.results-table.table-2 td {
padding: 0.8rem 0.8rem;
line-height: 1.3;
}
.results-table.table-2 th {
padding: 0.8rem 0.8rem;
}
/* 调整模型组标题的行高 */
.results-table.table-2 .model-group-header td {
padding: 0.8rem 0.6rem;
line-height: 1.3;
}
/* 进一步微调表格间距 */
.results-table.table-1 tbody tr {
height: 48px;
}
.results-table.table-2 tbody tr {
height: 42px;
}
/* 调整表格标题间距 */
.table-1 + .table-caption {
margin-top: 1.5rem;
}
.table-2 + .table-caption {
margin-top: 1rem;
}
.results-table .method-name {
text-align: center !important;
font-weight: 600;
color: #1f2937;
}
/* 控制Table 2列宽度的CSS */
.results-table .model-type-column {
width: 100px;
min-width: 100px;
max-width: 100px;
}
.results-table .model-name-column {
width: 100px;
min-width: 100px;
max-width: 100px;
}
.results-table .method-column {
width: 100px;
min-width: 100px;
max-width: 100px;
}
.results-table .metric-column {
width: 80px;
min-width: 80px;
max-width: 80px;
}
.results-table .nover-row {
background: #f0fdf4;
border-left: 3px solid #10b981;
}
.results-table .best-score {
color: #10b981;
font-weight: 700;
position: relative;
}
.table-section {
margin: 0;
}
.table-title {
font-size: 1.5rem;
font-weight: 600;
color: #1f2937;
margin-bottom: 1rem;
text-align: center;
}
.table-caption {
font-size: 0.9rem;
color: #6b7280;
text-align: center;
margin-top: 1rem;
line-height: 1.5;
max-width: 800px;
margin-left: auto;
margin-right: auto;
}
.model-group-header {
background: #f1f5f9 !important;
color: #334155;
font-weight: 700;
text-align: center !important;
}
.model-group-header td {
text-align: center !important;
}
.score-cell {
position: relative;
}
@media (max-width: 768px) {
.results-table {
font-size: 0.8rem;
}
.results-table th,
.results-table td {
padding: 0.5rem 0.3rem;
}
.table-section {
margin: 2rem 0;
}
.table-title {
font-size: 1.2rem;
}
div[style*="grid-template-columns: 1fr 1fr"] {
display: block !important;
}
}
.formula-container {
background: #f8fafc;
border: 1px solid #e2e8f0;
border-radius: 12px;
padding: 2rem;
margin: 2rem 0;
text-align: center;
position: relative;
z-index: 1;
}
.formula-container::before {
content: '🧮';
position: absolute;
top: 1rem;
left: 1rem;
font-size: 1.2rem;
}
.formula-title {
font-size: 1.1rem;
font-weight: 600;
color: #334155;
margin-bottom: 1rem;
}
.formula-description {
font-size: 0.9rem;
color: #64748b;
margin-top: 1rem;
line-height: 1.5;
}
.diagram-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: 2rem;
margin: 3rem 0;
}
.diagram-card {
background: white;
border-radius: 12px;
padding: 2rem;
box-shadow: 0 2px 8px rgba(0,0,0,0.06);
border: 1px solid #e5e7eb;
text-align: center;
transition: all 0.3s ease;
position: relative;
z-index: 1;
}
.diagram-card:hover {
transform: translateY(-2px);
box-shadow: 0 8px 24px rgba(0,0,0,0.12);
}
.diagram-placeholder {
height: 200px;
background: #f1f5f9;
border: 2px dashed #94a3b8;
border-radius: 8px;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
margin-bottom: 1rem;
transition: all 0.3s ease;
}
.diagram-placeholder:hover {
background: #e2e8f0;
border-color: #64748b;
}
.diagram-icon {
font-size: 3rem;
color: #64748b;
margin-bottom: 0.5rem;
}
.diagram-label {
font-size: 0.9rem;
color: #475569;
font-weight: 500;
}
.diagram-description {
font-size: 0.85rem;
color: #64748b;
line-height: 1.4;
}
/* Apple Design Button Styles */
.external-link.button {
position: relative;
overflow: hidden;
}
.external-link.button::before {
content: '';
position: absolute;
top: 0;
left: -100%;
width: 100%;
height: 100%;
background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
transition: left 0.5s;
}
.external-link.button:hover {
transform: translateY(-2px);
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15), 0 4px 8px rgba(0, 0, 0, 0.1);
}
.external-link.button:hover::before {
left: 100%;
}
.external-link.button:active {
transform: translateY(0);
transition: transform 0.1s;
}
</style>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
<script>
window.MathJax = {
tex: {
inlineMath: [['$', '$'], ['\\(', '\\)']],
displayMath: [['$$', '$$'], ['\\[', '\\]']]
}
};
</script>
</head>
<body>
<div class="geometric-bg"></div>
<section class="hero">
<div class="hero-body">
<div class="container is-widescreen">
<div class="columns is-centered">
<div class="column has-text-centered">
<h1 class="title is-1 publication-title">NOVER</h1>
<p class="subtitle is-4" style="color: #888;">Incentive Training for Language Models via Verifier-Free Reinforcement Learning</p>
<div class="is-size-5 publication-authors" style="margin: 2rem 0;">
<span class="author-block">Wei Liu¹ • Siya Qi¹ • Xinyu Wang¹ • Chen Qian² • Yali Du¹·³ • Yulan He¹·³</span>
<div style="margin-top: 0.5rem; font-size: 0.9rem; color: #666;">
¹King's College London • ²Shanghai Jiao Tong University • ³The Alan Turing Institute
</div>
</div>
<div class="publication-links" style="display: flex; justify-content: center; gap: 1rem; flex-wrap: wrap;">
<a href="https://arxiv.org/pdf/2505.16022.pdf" target="_blank"
class="external-link button is-normal" style="background: linear-gradient(135deg, #B31B1B 0%, #D32F2F 100%); color: white; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(179, 27, 27, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);">
<span class="icon"><i class="fas fa-file-pdf"></i></span>
<span>Paper</span>
</a>
<a href="https://github.com/thinkwee/NOVER" target="_blank"
class="external-link button is-normal" style="background: linear-gradient(135deg, #24292e 0%, #2f363d 100%); color: white; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(36, 41, 46, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);">
<span class="icon"><i class="fab fa-github"></i></span>
<span>Code</span>
</a>
<a href="https://huggingface.co/collections/thinkwee/novereason-68937ca75331dfaddaf24016" target="_blank"
class="external-link button is-normal" style="background: linear-gradient(135deg, #FFD43B 0%, #FFE066 100%); color: #000; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(255, 212, 59, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);">
<span class="icon"><i class="fas fa-database"></i></span>
<span>Dataset</span>
</a>
<a href="https://huggingface.co/collections/thinkwee/nover1-68a6524eac725c915abd77e3" target="_blank"
class="external-link button is-normal" style="background: linear-gradient(135deg, #0EA5E9 0%, #38BDF8 100%); color: white; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(14, 165, 233, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);">
<span class="icon"><i class="fas fa-cube"></i></span>
<span>Model</span>
</a>
</div>
</div>
</div>
</div>
</div>
</section>
<section class="section">
<div class="container is-widescreen">
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 3rem; align-items: start;">
<div class="abstract-card">
<h2 class="title is-3" style="color: #1a1a1a; margin-bottom: 1.5rem;">TL;DR</h2>
<p class="is-size-5" style="color: #374151; line-height: 1.6;">
<span class="nover">NOVER</span> (NO-Verifier Reinforcement Learning) enables
incentive training on any text-to-text task without external verifiers. It utilizes policy model's reasoning perplexity to estimate the reward.
<br>
<br>
<strong>• Your LLM is secretly a verifier.
<br>
• Your LLM only reason on <s>Easy-to-Verify</s> tasks.
<br>
• Your LLM can <s>reason</s> on ANY tasks.
<br>
• Your LLM can be incentivized to do more than reasoning.</strong>
<br>
</p>
</div>
<div class="figure-container">
<div class="figure-placeholder" style="height: 280px; display: flex; flex-direction: column; justify-content: center; align-items: center;">
<img src="logo.png" alt="NOVER Framework Overview" style="width: 100%; height: 100%; object-fit: contain;">
</div>
<div style="font-size: 1.1rem; color: #2e3036; text-align: center; margin-top: 0.5rem;">
<div><span class="nover">NOVER</span> extends RLVR on any text-to-text task</div>
<div>beyond easy-to-verify math/coding problems.</div>
</div>
</div>
</div>
</div>
</section>
<section class="section">
<div class="container is-widescreen">
<h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Incentivize Reasoning on Any Task</h2>
<p class="is-size-5" style="color: #6b7280; margin-bottom: 3rem; max-width: 800px; margin-left: auto; margin-right: auto;">
NOVER enables training large reasoning models on any text data and any task.<br>
NO verifiers/models/rules needed, just ground truth answer, and policy model itself.<br>
<strong>General Reasoning:</strong> ⚛️ physics • ⚖️ law • 🏥 medical • 💰 finance<br>
<strong>Creative Tasks:</strong> 🎨 creative writing<br>
<strong>Social Intelligence:</strong> 🧠 theory of mind • 😊 emotion detection • 🤝 social reasoning<br>
<strong>Nautral Language Generation:</strong> 🌍 translation • 📚 summarization
</p>
<div class="figure-container">
<img src="example.png" alt="NOVER Framework Overview" style="width: 100%; height: 100%; object-fit: contain;">
</div>
</div>
</section>
<section class="section">
<div class="container is-widescreen">
<h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">NOVER Methodology</h2>
<div style="display: flex; justify-content: space-between; align-items: center; margin: 2rem 0; padding: 0 1rem;">
<div style="width: 600px; height: 420px;">
<img src="paradigm.png" alt="paradigm" style="width: 100%; height: 100%; object-fit: contain;">
</div>
<div style="width: 600px; height: 420px;">
<img src="overall.png" alt="overall" style="width: 100%; height: 100%; object-fit: contain;">
</div>
</div>
<div class="method-comparison">
<div class="method-card">
<div class="method-icon"><i class="fas fa-graduation-cap"></i></div>
<h3 class="title is-5">SFT</h3>
<p>Memorize Input-Output Patterns</p>
</div>
<div class="method-card">
<div class="method-icon"><i class="fas fa-robot"></i></div>
<h3 class="title is-5">RLHF</h3>
<p>Train Reward Model <br>Give Preference Feedback</p>
</div>
<div class="method-card">
<div class="method-icon"><i class="fas fa-balance-scale"></i></div>
<h3 class="title is-5">RLVR</h3>
<p>Rule-based Reward <br>End2End Outcome RL</p>
</div>
<div class="method-card nover">
<div class="method-icon"><i class="fas fa-brain"></i></div>
<h3 class="title is-5"><span class="nover">NOVER</span></h3>
<p>Reasoning Perplexity as Reward<br>Reason on Any Task</p>
</div>
</div>
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 2rem; margin: 3rem 0;">
<div class="formula-container">
<div class="formula-title">Reasoning Perplexity</div>
<div style="font-size: 0.9rem; margin: 1rem 0;">
$P_r(p, t, g) = \exp\left(-\frac{\sum_{i=1}^{|g|} \log \pi_{p}(g_i \mid p, t, g_{&lt;i})}{|g| \cdot N(|t|)}\right)$
</div>
<div class="formula-description">
Use perplexity of policy model on ground truth conditioned on reasoning trajectory as reward proxy
</div>
</div>
<div class="formula-container">
<div class="formula-title">Rewards</div>
<div style="font-size: 1.1rem; margin: 1rem 0;">
$$R_{\mathrm{total}} = w_{\mathrm{f}} R_{\mathrm{f}} + \mathbb{I}(R_{\mathrm{f}} = 1) \cdot (w_{\mathrm{r}} R_{\mathrm{r}} + w_{\mathrm{e}} R_{\mathrm{e}})$$
</div>
<div class="formula-description">
Combined reward function incorporating reasoning, efficiency, and format components
</div>
</div>
<div class="formula-container">
<div class="formula-title">Policy-Proxy Synchronization</div>
<div style="font-size: 1.1rem; margin: 1rem 0;">
$$\pi_{\mathrm{p}} \leftarrow \alpha \cdot \pi_{\mathrm{p}} + (1-\alpha) \cdot \pi_{\theta}$$
</div>
<div class="formula-description">
Smooth synchronization between policy and proxy ensures stable training with limited resource
</div>
</div>
</div>
</div>
</section>
<section class="section">
<div class="container is-widescreen">
<h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Experimental Results</h2>
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 3rem; margin-top: 3rem; align-items: start;">
<div>
<h3 class="table-title">Overall on NOVEReason Dataset</h3>
<div class="table-section">
<div class="results-table table-1">
<table>
<thead>
<tr>
<th>Method</th>
<th>NR</th>
<th>GT</th>
<th>WI</th>
<th>SGN</th>
<th>EB</th>
<th>TB</th>
<th>OPUS</th>
</tr>
</thead>
<tbody>
<tr class="model-group-header">
<td colspan="8"><strong>Qwen2.5-3B</strong></td>
</tr>
<tr>
<td class="method-name">Base</td>
<td class="score-cell">21.80%</td>
<td class="score-cell">43.10%</td>
<td class="score-cell">18.40%</td>
<td class="score-cell">18.70%</td>
<td class="score-cell">32.03%</td>
<td class="score-cell">46.79%</td>
<td class="score-cell">16.70%</td>
</tr>
<tr>
<td class="method-name">+ CoT</td>
<td class="score-cell">24.40%</td>
<td class="score-cell">48.90%</td>
<td class="score-cell">24.20%</td>
<td class="score-cell">14.76%</td>
<td class="score-cell">28.12%</td>
<td class="score-cell">51.23%</td>
<td class="score-cell">1.40%</td>
</tr>
<tr>
<td class="method-name">+ SFT</td>
<td class="score-cell">27.00%</td>
<td class="score-cell">36.20%</td>
<td class="score-cell">27.30%</td>
<td class="score-cell">20.08%</td>
<td class="score-cell">36.72%</td>
<td class="score-cell">48.66%</td>
<td class="score-cell">17.30%</td>
</tr>
<tr class="nover-row">
<td class="method-name"><strong>+ NOVER</strong></td>
<td class="score-cell best-score">28.60%</td>
<td class="score-cell best-score">60.30%</td>
<td class="score-cell best-score">28.10%</td>
<td class="score-cell best-score">41.64%</td>
<td class="score-cell best-score">38.28%</td>
<td class="score-cell best-score">57.88%</td>
<td class="score-cell best-score">20.70%</td>
</tr>
<tr class="model-group-header">
<td colspan="8"><strong>Qwen2.5-7B</strong></td>
</tr>
<tr>
<td class="method-name">Base</td>
<td class="score-cell">31.80%</td>
<td class="score-cell">48.50%</td>
<td class="score-cell">20.70%</td>
<td class="score-cell">24.21%</td>
<td class="score-cell">28.91%</td>
<td class="score-cell">44.22%</td>
<td class="score-cell">19.30%</td>
</tr>
<tr>
<td class="method-name">+ CoT</td>
<td class="score-cell">31.20%</td>
<td class="score-cell">57.60%</td>
<td class="score-cell">29.20%</td>
<td class="score-cell">33.46%</td>
<td class="score-cell">38.28%</td>
<td class="score-cell">50.99%</td>
<td class="score-cell">1.60%</td>
</tr>
<tr>
<td class="method-name">+ SFT</td>
<td class="score-cell">27.50%</td>
<td class="score-cell">45.20%</td>
<td class="score-cell">33.50%</td>
<td class="score-cell">37.85%</td>
<td class="score-cell">47.66%</td>
<td class="score-cell">57.06%</td>
<td class="score-cell">23.30%</td>
</tr>
<tr class="nover-row">
<td class="method-name"><strong>+ NOVER</strong></td>
<td class="score-cell best-score">38.20%</td>
<td class="score-cell best-score">61.80%</td>
<td class="score-cell best-score">36.60%</td>
<td class="score-cell best-score">50.79%</td>
<td class="score-cell best-score">49.22%</td>
<td class="score-cell best-score">67.79%</td>
<td class="score-cell best-score">26.80%</td>
</tr>
<tr class="model-group-header">
<td colspan="8"><strong>Other Baselines</strong></td>
</tr>
<tr>
<td class="method-name">Qwen2.5-3B-Instruct</td>
<td class="score-cell">27.10%</td>
<td class="score-cell">50.00%</td>
<td class="score-cell">31.50%</td>
<td class="score-cell">21.25%</td>
<td class="score-cell">40.62%</td>
<td class="score-cell">58.69%</td>
<td class="score-cell">19.90%</td>
</tr>
<tr>
<td class="method-name">Qwen2.5-7B-Instruct</td>
<td class="score-cell">29.90%</td>
<td class="score-cell">56.20%</td>
<td class="score-cell">35.60%</td>
<td class="score-cell">67.72%</td>
<td class="score-cell">46.88%</td>
<td class="score-cell">65.23%</td>
<td class="score-cell">23.50%</td>
</tr>
<tr>
<td class="method-name">R1-Distill-Qwen-7B</td>
<td class="score-cell">41.00%</td>
<td class="score-cell">60.20%</td>
<td class="score-cell">38.00%</td>
<td class="score-cell">40.16%</td>
<td class="score-cell">35.16%</td>
<td class="score-cell">54.61%</td>
<td class="score-cell">8.20%</td>
</tr>
</tbody>
</table>
</div>
<div class="table-caption">
<strong>NR:</strong> Natural Reasoning, <strong>GT:</strong> General Thoughts-430k, <strong>WI:</strong> WebInstruct, <strong>SGN:</strong> SS-GEN,
<strong>EB:</strong> EmoBench, <strong>TB:</strong> TomBench, <strong>OPUS:</strong> OPUS-BOOK-TRANSLATION.
</div>
</div>
</div>
<div>
<h3 class="table-title">General Reasoning with Different Backends</h3>
<div class="table-section">
<div class="results-table table-2">
<table>
<thead>
<tr>
<th class="model-type-column">Model Type</th>
<th class="model-name-column">Model</th>
<th class="method-column">Method</th>
<th class="metric-column">NR</th>
<th class="metric-column">GT</th>
<th class="metric-column">WI</th>
</tr>
</thead>
<tbody>
<tr class="model-group-header">
<td class="model-type-column" rowspan="8" style="vertical-align: middle; background: #f1f5f9 !important; color: #334155; font-weight: 700;">Base</td>
<td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Qwen2.5 3B</td>
<td class="method-name">Base</td>
<td class="score-cell">21.80%</td>
<td class="score-cell">43.10%</td>
<td class="score-cell">18.40%</td>
</tr>
<tr>
<td class="method-name">+ CoT</td>
<td class="score-cell">24.40%</td>
<td class="score-cell">48.90%</td>
<td class="score-cell">24.20%</td>
</tr>
<tr>
<td class="method-name">+ SFT</td>
<td class="score-cell">27.00%</td>
<td class="score-cell">36.20%</td>
<td class="score-cell">27.30%</td>
</tr>
<tr class="nover-row">
<td class="method-name"><strong>+ NOVER</strong></td>
<td class="score-cell best-score"><strong>28.60%</strong></td>
<td class="score-cell best-score"><strong>60.30%</strong></td>
<td class="score-cell best-score"><strong>28.10%</strong></td>
</tr>
<tr class="model-group-header">
<td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Qwen 2.5 7B</td>
<td class="method-name">Base</td>
<td class="score-cell">31.80%</td>
<td class="score-cell">48.50%</td>
<td class="score-cell">20.70%</td>
</tr>
<tr>
<td class="method-name">+ CoT</td>
<td class="score-cell">31.20%</td>
<td class="score-cell">57.60%</td>
<td class="score-cell">29.20%</td>
</tr>
<tr>
<td class="method-name">+ SFT</td>
<td class="score-cell">27.50%</td>
<td class="score-cell">45.20%</td>
<td class="score-cell">33.50%</td>
</tr>
<tr class="nover-row">
<td class="method-name"><strong>+ NOVER</strong></td>
<td class="score-cell best-score"><strong>38.20%</strong></td>
<td class="score-cell best-score"><strong>61.80%</strong></td>
<td class="score-cell best-score"><strong>36.60%</strong></td>
</tr>
<tr class="model-group-header">
<td class="model-type-column" rowspan="8" style="vertical-align: middle; background: #f1f5f9 !important; color: #334155; font-weight: 700;">Instruct</td>
<td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Llama-3.1-8B</td>
<td class="method-name">Base</td>
<td class="score-cell">34.20%</td>
<td class="score-cell">36.70%</td>
<td class="score-cell">29.90%</td>
</tr>
<tr>
<td class="method-name">+ CoT</td>
<td class="score-cell">28.10%</td>
<td class="score-cell">35.10%</td>
<td class="score-cell">30.00%</td>
</tr>
<tr>
<td class="method-name">+ SFT</td>
<td class="score-cell">23.60%</td>
<td class="score-cell">23.40%</td>
<td class="score-cell best-score"><strong>34.50%</strong></td>
</tr>
<tr class="nover-row">
<td class="method-name"><strong>+ NOVER</strong></td>
<td class="score-cell best-score"><strong>40.70%</strong></td>
<td class="score-cell best-score"><strong>41.50%</strong></td>
<td class="score-cell">34.00%</td>
</tr>
<tr class="model-group-header">
<td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Mistral-7B</td>
<td class="method-name">Base</td>
<td class="score-cell best-score"><strong>33.00%</strong></td>
<td class="score-cell">17.80%</td>
<td class="score-cell">27.00%</td>
</tr>
<tr>
<td class="method-name">+ CoT</td>
<td class="score-cell">29.20%</td>
<td class="score-cell">18.60%</td>
<td class="score-cell">27.10%</td>
</tr>
<tr>
<td class="method-name">+ SFT</td>
<td class="score-cell">22.50%</td>
<td class="score-cell">20.70%</td>
<td class="score-cell">27.80%</td>
</tr>
<tr class="nover-row">
<td class="method-name"><strong>+ NOVER</strong></td>
<td class="score-cell">32.20%</td>
<td class="score-cell best-score"><strong>21.90%</strong></td>
<td class="score-cell best-score"><strong>29.30%</strong></td>
</tr>
</tbody>
</table>
</div>
<div class="table-caption">
<strong>NR:</strong> Natural Reasoning, <strong>GT:</strong> General Thoughts-430k, <strong>WI:</strong> WebInstruct.
</div>
</div>
</div>
</div>
<div style="margin-top: 3rem;">
<div class="glass-card">
<h3 class="title is-4" style="color: #1a1a1a; margin-bottom: 1.5rem;">Key Takeaways</h3>
<ul style="color: #374151; line-height: 1.8; font-size: 0.9rem;">
<li>• NOVER trains successfully on both pretrained and instruct models, with larger gains on stronger base models</li>
<li>• Despite the free-form nature of answers, NOVER still prefer objective solutions instead of subjective ones</li>
<li>• On general reasoning, NOVER inherits base model boundaries, which have been observed in math reasoning. It struggles on false-premise tasks like FANToM</li>
<li>• NOVER's design prevent reward hacking, avoiding issues such as reasoning explosion and collapse</li>
<li>• Unlike closed-source or verifier-based rewards that suffer from cold start and hacking risks, NOVER remains stable</li>
<li>• Its dense reward signals allow greater error tolerance and encourage diverse reasoning patterns</li>
</ul>
</div>
</div>
</div>
</section>
<section class="section">
<div class="container is-widescreen">
<h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Inverse Incentive Training</h2>
<div style="display: flex; justify-content: space-between; align-items: center; margin: 2rem 0; padding: 0 1rem;">
<div style="width: 600px; height: 420px;">
<img src="iit.png" alt="iit" style="width: 100%; height: 100%; object-fit: contain;">
</div>
<div style="width: 600px; height: 420px;">
<img src="iit_result.png" alt="iit_result" style="width: 100%; height: 100%; object-fit: contain;">
</div>
</div>
<div class="glass-card">
<div style="text-align: center;">
<div style="display: flex; justify-content: center; align-items: center; gap: 2rem; margin-bottom: 1rem;">
<div style="text-align: center;">
<i class="fas fa-fish" style="font-size: 3rem; margin-bottom: 0.5rem; color: #0e41a8;"></i>
<div style="font-size: 1.0rem; color: #0e41a8;">Reward the Outcome, Incentivize Process</div>
</div>
<div style="font-size: 1.5rem;"></div>
<div style="text-align: center;">
<i class="fas fa-graduation-cap" style="font-size: 3rem; margin-bottom: 0.5rem; color: #d736d2;"></i>
<div style="font-size: 1.0rem; color: #d736d2;">Write Rubrics in the Outcome, Process as Result</div>
</div>
</div>
<div style="font-size: 1.2rem; color: #000000;">Teaching Models "How to Fish" Rather Than Giving Them Fish</div>
</div>
</div>
</div>
</section>
<section class="section" id="BibTeX">
<div class="container is-widescreen">
<div class="glass-card">
<h2 class="title is-3">Citation</h2>
<pre style="background: #f8f9fa; padding: 1.5rem; border-radius: 10px; overflow-x: auto;"><code>@article{liu2025nover,
title={NOVER: Incentive Training for Language Models via Verifier-Free Reinforcement Learning},
author={Liu, Wei and Qi, Siya and Wang, Xinyu and Qian, Chen and Du, Yali and He, Yulan},
journal={arXiv preprint arXiv:2505.16022},
year={2025}
}</code></pre>
</div>
</div>
</section>
<footer class="section" style="background: white; border-top: 1px solid #e5e7eb; margin-top: 4rem;">
<div class="container has-text-centered">
<div class="content">
<div style="margin-bottom: 2rem;">
<p>Find me on <a href="https://thinkwee.top/about" target="_blank" style="color: #10b981;">thinkwee.top/about</a>, with other interesting works on LLM Agent🤖, NLP and more~</p>
</div>
<p style="color: #6b7280;">
Licensed under <a href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank" style="color: #10b981;">CC BY-SA 4.0</a>
</p>
</div>
</div>
</footer>
</body>
</html>