Spaces:

thinkwee
/

NOVER

Running

App Files Files Community

NOVER / index.html

thinkwee

Update index.html

91eeccf verified 16 days ago

raw

history blame contribute delete

50.4 kB

	<!DOCTYPE html>
	<html>
	<head>
	<meta charset="utf-8">
	<meta name="description"
	content="NOVER: Incentive Training for Language Models via Verifier-Free Reinforcement Learning">
	<meta name="keywords" content="NOVER, Reinforcement Learning, Language Models, Reasoning">
	<meta name="viewport" content="width=device-width, initial-scale=1">
	<title>NOVER: NO-VERifier Reinforcement Learning</title>

	<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&display=swap"
	rel="stylesheet">

	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/bulma/0.9.4/css/bulma.min.css">
	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
	<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">

	<style>
	body {
	background: #fafafa;
	font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
	color: #1a1a1a;
	position: relative;
	overflow-x: hidden;
	}

	body::before {
	content: '';
	position: fixed;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	background:
	radial-gradient(circle at 20% 80%, rgba(16, 185, 129, 0.03) 0%, transparent 50%),
	radial-gradient(circle at 80% 20%, rgba(59, 130, 246, 0.03) 0%, transparent 50%),
	radial-gradient(circle at 40% 40%, rgba(16, 185, 129, 0.02) 0%, transparent 50%);
	pointer-events: none;
	z-index: -1;
	}

	.geometric-bg {
	position: absolute;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	opacity: 0.03;
	background-image:
	linear-gradient(45deg, transparent 45%, #10b981 45%, #10b981 55%, transparent 55%),
	linear-gradient(-45deg, transparent 45%, #3b82f6 45%, #3b82f6 55%, transparent 55%);
	background-size: 60px 60px;
	background-position: 0 0, 30px 30px;
	pointer-events: none;
	z-index: -1;
	}

	/* Hero Section - Brain/Neural Network Pattern */
	.hero {
	background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
	border-radius: 12px;
	margin: 2rem;
	box-shadow: 0 4px 24px rgba(0,0,0,0.06);
	border: 1px solid #e5e7eb;
	position: relative;
	overflow: hidden;
	}

	.hero::before {
	content: '';
	position: absolute;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	background-image:
	repeating-linear-gradient(
	45deg,
	transparent,
	transparent 40px,
	rgba(16, 185, 129, 0.04) 40px,
	rgba(16, 185, 129, 0.04) 80px
	),
	repeating-linear-gradient(
	-45deg,
	transparent,
	transparent 40px,
	rgba(59, 130, 246, 0.03) 40px,
	rgba(59, 130, 246, 0.03) 80px
	);
	background-size: 80px 80px, 80px 80px;
	background-position: 0 0, 40px 40px;
	opacity: 0.3;
	pointer-events: none;
	z-index: 0;
	}

	/* Abstract Section - Circuit/Technology Pattern */
	.section:nth-of-type(1) {
	background: linear-gradient(135deg, #ffffff 0%, #f0f9ff 100%);
	margin: 2rem;
	border-radius: 12px;
	box-shadow: 0 2px 8px rgba(0,0,0,0.04);
	border: 1px solid #e5e7eb;
	position: relative;
	overflow: hidden;
	}

	.section:nth-of-type(1)::before {
	content: '';
	position: absolute;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	background-image:
	repeating-linear-gradient(
	90deg,
	transparent,
	transparent 30px,
	rgba(59, 130, 246, 0.06) 30px,
	rgba(59, 130, 246, 0.06) 60px
	),
	repeating-linear-gradient(
	0deg,
	transparent,
	transparent 30px,
	rgba(59, 130, 246, 0.06) 30px,
	rgba(59, 130, 246, 0.06) 60px
	);
	background-size: 60px 60px, 60px 60px;
	background-position: 0 0, 30px 30px;
	opacity: 0.3;
	pointer-events: none;
	z-index: 0;
	}

	/* Incentivize Reasoning Section - Puzzle/Logic Pattern */
	.section:nth-of-type(2) {
	background: linear-gradient(135deg, #ffffff 0%, #f0fdf4 100%);
	margin: 2rem;
	border-radius: 12px;
	box-shadow: 0 2px 8px rgba(0,0,0,0.04);
	border: 1px solid #e5e7eb;
	position: relative;
	overflow: hidden;
	}

	.section:nth-of-type(2)::before {
	content: '';
	position: absolute;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	background-image:
	repeating-linear-gradient(
	45deg,
	transparent,
	transparent 50px,
	rgba(16, 185, 129, 0.07) 50px,
	rgba(16, 185, 129, 0.07) 100px
	),
	repeating-linear-gradient(
	-45deg,
	transparent,
	transparent 50px,
	rgba(16, 185, 129, 0.05) 50px,
	rgba(16, 185, 129, 0.05) 100px
	);
	background-size: 100px 100px, 100px 100px;
	background-position: 0 0, 50px 50px;
	opacity: 0.3;
	pointer-events: none;
	z-index: 0;
	}

	/* NOVER Methodology Section - Mathematical/Formula Pattern */
	.section:nth-of-type(3) {
	background: linear-gradient(135deg, #ffffff 0%, #fefce8 100%);
	margin: 2rem;
	border-radius: 12px;
	box-shadow: 0 2px 8px rgba(0,0,0,0.04);
	border: 1px solid #e5e7eb;
	position: relative;
	overflow: hidden;
	}

	.section:nth-of-type(3)::before {
	content: '';
	position: absolute;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	background-image:
	repeating-linear-gradient(
	30deg,
	transparent,
	transparent 40px,
	rgba(245, 158, 11, 0.06) 40px,
	rgba(245, 158, 11, 0.06) 80px
	),
	repeating-linear-gradient(
	-30deg,
	transparent,
	transparent 40px,
	rgba(245, 158, 11, 0.05) 40px,
	rgba(245, 158, 11, 0.05) 80px
	);
	background-size: 80px 80px, 80px 80px;
	background-position: 0 0, 40px 40px;
	opacity: 0.3;
	pointer-events: none;
	z-index: 0;
	}

	/* Experimental Results Section - Data/Chart Pattern */
	.section:nth-of-type(4) {
	background: linear-gradient(135deg, #ffffff 0%, #fef2f2 100%);
	margin: 2rem;
	border-radius: 12px;
	box-shadow: 0 2px 8px rgba(0,0,0,0.04);
	border: 1px solid #e5e7eb;
	position: relative;
	overflow: hidden;
	}

	.section:nth-of-type(4)::before {
	content: '';
	position: absolute;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	background-image:
	repeating-linear-gradient(
	0deg,
	transparent,
	transparent 35px,
	rgba(239, 68, 68, 0.06) 35px,
	rgba(239, 68, 68, 0.06) 70px
	),
	repeating-linear-gradient(
	90deg,
	transparent,
	transparent 35px,
	rgba(239, 68, 68, 0.05) 35px,
	rgba(239, 68, 68, 0.05) 70px
	);
	background-size: 70px 70px, 70px 70px;
	background-position: 0 0, 35px 35px;
	opacity: 0.3;
	pointer-events: none;
	z-index: 0;
	}

	/* Inverse Incentive Training Section - Fish/Teaching Pattern */
	.section:nth-of-type(5) {
	background: linear-gradient(135deg, #ffffff 0%, #f0f9ff 100%);
	margin: 2rem;
	border-radius: 12px;
	box-shadow: 0 2px 8px rgba(0,0,0,0.04);
	border: 1px solid #e5e7eb;
	position: relative;
	overflow: hidden;
	}

	.section:nth-of-type(5)::before {
	content: '';
	position: absolute;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	background-image:
	repeating-linear-gradient(
	60deg,
	transparent,
	transparent 60px,
	rgba(14, 165, 233, 0.07) 60px,
	rgba(14, 165, 233, 0.07) 120px
	),
	repeating-linear-gradient(
	-60deg,
	transparent,
	transparent 60px,
	rgba(14, 165, 233, 0.05) 60px,
	rgba(14, 165, 233, 0.05) 120px
	);
	background-size: 120px 120px, 120px 120px;
	background-position: 0 0, 60px 60px;
	opacity: 0.3;
	pointer-events: none;
	z-index: 0;
	}

	/* Citation Section - Book/Paper Pattern */
	.section:nth-of-type(6) {
	background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%);
	margin: 2rem;
	border-radius: 12px;
	box-shadow: 0 2px 8px rgba(0,0,0,0.04);
	border: 1px solid #e5e7eb;
	position: relative;
	overflow: hidden;
	}

	.section:nth-of-type(6)::before {
	content: '';
	position: absolute;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	background-image:
	repeating-linear-gradient(
	25deg,
	transparent,
	transparent 45px,
	rgba(107, 114, 128, 0.06) 45px,
	rgba(107, 114, 128, 0.06) 90px
	),
	repeating-linear-gradient(
	-25deg,
	transparent,
	transparent 45px,
	rgba(107, 114, 128, 0.05) 45px,
	rgba(107, 114, 128, 0.05) 90px
	);
	background-size: 90px 90px, 90px 90px;
	background-position: 0 0, 45px 45px;
	opacity: 0.3;
	pointer-events: none;
	z-index: 0;
	}

	/* Footer Section - Social/Network Pattern */
	footer.section {
	background: linear-gradient(135deg, #ffffff 0%, #f9fafb 100%);
	border-top: 1px solid #e5e7eb;
	margin-top: 4rem;
	position: relative;
	overflow: hidden;
	}

	footer.section::before {
	content: '';
	position: absolute;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	background-image:
	repeating-linear-gradient(
	45deg,
	transparent,
	transparent 80px,
	rgba(16, 185, 129, 0.06) 80px,
	rgba(16, 185, 129, 0.06) 160px
	),
	repeating-linear-gradient(
	-45deg,
	transparent,
	transparent 80px,
	rgba(59, 130, 246, 0.05) 80px,
	rgba(59, 130, 246, 0.05) 160px
	);
	background-size: 160px 160px, 160px 160px;
	background-position: 0 0, 80px 80px;
	opacity: 0.3;
	pointer-events: none;
	z-index: 0;
	}

	/* Ensure content is above patterns */
	.hero-body,
	.section .container,
	footer .container {
	position: relative;
	z-index: 1;
	}

	.publication-title {
	color: #1a1a1a;
	font-weight: 600;
	letter-spacing: -0.02em;
	}

	.nover {
	color: #10b981;
	font-weight: 600;
	}

	.glass-card {
	background: #f9fafb;
	border-radius: 12px;
	padding: 2rem;
	box-shadow: 0 1px 3px rgba(0,0,0,0.1);
	border: 1px solid #e5e7eb;
	margin: 1rem 0;
	transition: all 0.2s ease;
	position: relative;
	z-index: 1;
	}

	.glass-card:hover {
	box-shadow: 0 4px 12px rgba(0,0,0,0.08);
	border-color: #d1d5db;
	}

	.figure-container {
	background: white;
	border-radius: 12px;
	padding: 2rem;
	box-shadow: 0 1px 3px rgba(0,0,0,0.1);
	border: 1px solid #e5e7eb;
	margin: 2rem 0;
	text-align: center;
	position: relative;
	z-index: 1;
	}

	.figure-placeholder {
	background: #f3f4f6;
	border: 2px dashed #9ca3af;
	border-radius: 8px;
	padding: 3rem 2rem;
	color: #6b7280;
	font-weight: 500;
	font-size: 1rem;
	margin-bottom: 1rem;
	position: relative;
	transition: all 0.3s ease;
	}

	.figure-placeholder:hover {
	background: #f9fafb;
	border-color: #6b7280;
	}

	.figure-placeholder.analysis {
	background: linear-gradient(135deg, #eff6ff 0%, #dbeafe 100%);
	border-color: #3b82f6;
	color: #1e40af;
	}

	.figure-placeholder.experiment {
	background: linear-gradient(135deg, #f0fdf4 0%, #dcfce7 100%);
	border-color: #10b981;
	color: #047857;
	}

	.figure-placeholder.comparison {
	background: linear-gradient(135deg, #fefce8 0%, #fef3c7 100%);
	border-color: #f59e0b;
	color: #92400e;
	}

	.analysis-grid {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
	gap: 1.5rem;
	margin: 2rem 0;
	}

	.insight-card {
	background: white;
	border: 1px solid #e5e7eb;
	padding: 1.5rem;
	border-radius: 12px;
	text-align: left;
	transition: all 0.2s ease;
	box-shadow: 0 1px 3px rgba(0,0,0,0.1);
	position: relative;
	z-index: 1;
	}

	.insight-card:hover {
	box-shadow: 0 4px 12px rgba(0,0,0,0.08);
	border-color: #10b981;
	}

	.insight-title {
	font-size: 1.1rem;
	font-weight: 600;
	margin-bottom: 0.5rem;
	color: #1a1a1a;
	}

	.insight-description {
	font-size: 0.9rem;
	color: #6b7280;
	line-height: 1.5;
	}

	.comparison-table {
	background: white;
	border-radius: 12px;
	overflow: hidden;
	box-shadow: 0 1px 3px rgba(0,0,0,0.1);
	border: 1px solid #e5e7eb;
	margin: 2rem 0;
	position: relative;
	z-index: 1;
	}

	.comparison-table table {
	width: 100%;
	border-collapse: collapse;
	}

	.comparison-table th {
	background: #f9fafb;
	color: #374151;
	padding: 1rem;
	font-weight: 600;
	border-bottom: 1px solid #e5e7eb;
	}

	.comparison-table td {
	padding: 1rem;
	border-bottom: 1px solid #f3f4f6;
	text-align: center;
	}

	.comparison-table tr:nth-child(even) {
	background: #fafbfc;
	}

	.highlight-row {
	background: #f0fdf4 !important;
	border-left: 3px solid #10b981;
	}

	.highlight-number {
	color: #10b981;
	font-weight: 600;
	font-size: 1.05em;
	}

	.metrics-container {
	background: #1f2937;
	border-radius: 12px;
	padding: 2rem;
	color: #e5e7eb;
	font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Roboto Mono', monospace;
	box-shadow: 0 4px 12px rgba(0,0,0,0.15);
	margin: 2rem 0;
	position: relative;
	z-index: 1;
	}

	.metrics-header {
	color: #10b981;
	font-weight: 600;
	margin-bottom: 1rem;
	border-bottom: 1px solid #374151;
	padding-bottom: 0.5rem;
	}

	.method-comparison {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
	gap: 1.5rem;
	margin: 2rem 0;
	}

	.method-card {
	background: white;
	border-radius: 12px;
	padding: 1.5rem;
	text-align: center;
	box-shadow: 0 1px 3px rgba(0,0,0,0.1);
	border: 1px solid #e5e7eb;
	transition: all 0.2s ease;
	position: relative;
	z-index: 1;
	}

	.method-card:hover {
	box-shadow: 0 4px 12px rgba(0,0,0,0.08);
	border-color: #10b981;
	}

	.method-icon {
	font-size: 2.5rem;
	margin-bottom: 1rem;
	color: #6b7280;
	}

	.method-card.nover .method-icon {
	color: #10b981;
	}

	.abstract-card {
	background: #f9fafb;
	border: 1px solid #e5e7eb;
	padding: 2rem;
	border-radius: 12px;
	margin: 2rem 0;
	box-shadow: 0 1px 3px rgba(0,0,0,0.1);
	position: relative;
	z-index: 1;
	}

	.stats-grid {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
	gap: 1rem;
	margin: 2rem 0;
	}

	.stat-item {
	background: white;
	border: 1px solid #e5e7eb;
	border-radius: 8px;
	padding: 1.5rem;
	text-align: center;
	box-shadow: 0 1px 3px rgba(0,0,0,0.1);
	transition: all 0.2s ease;
	position: relative;
	z-index: 1;
	}

	.stat-item:hover {
	box-shadow: 0 4px 12px rgba(0,0,0,0.08);
	border-color: #10b981;
	}

	.stat-number {
	font-size: 2rem;
	font-weight: 600;
	color: #10b981;
	margin-bottom: 0.5rem;
	}

	.stat-label {
	font-size: 0.9rem;
	color: #6b7280;
	font-weight: 500;
	}

	.results-table {
	background: white;
	border-radius: 12px;
	overflow: hidden;
	box-shadow: 0 2px 12px rgba(0,0,0,0.08);
	border: 1px solid #e5e7eb;
	margin: 0;
	width: 100%;
	position: relative;
	z-index: 1;
	}

	.results-table table {
	width: 100%;
	border-collapse: collapse;
	font-size: 0.8rem;
	}

	.results-table th {
	background: #f8fafc;
	color: #374155;
	padding: 1rem 0.8rem;
	font-weight: 600;
	border-bottom: 2px solid #e5e7eb;
	text-align: center !important;
	position: sticky;
	top: 0;
	z-index: 10;
	}

	.results-table td {
	padding: 0.8rem;
	border-bottom: 1px solid #f3f4f6;
	text-align: center !important;
	}

	/* 微调表格高度 - 为Table 1增加行高 */
	.results-table.table-1 td {
	padding: 0.8rem;
	line-height: 0.9;
	}

	.results-table.table-1 th {
	padding: 1.2rem 0.8rem;
	}

	/* 微调表格高度 - 为Table 2减少行高 */
	.results-table.table-2 td {
	padding: 0.8rem 0.8rem;
	line-height: 1.3;
	}

	.results-table.table-2 th {
	padding: 0.8rem 0.8rem;
	}

	/* 调整模型组标题的行高 */
	.results-table.table-2 .model-group-header td {
	padding: 0.8rem 0.6rem;
	line-height: 1.3;
	}

	/* 进一步微调表格间距 */
	.results-table.table-1 tbody tr {
	height: 48px;
	}

	.results-table.table-2 tbody tr {
	height: 42px;
	}

	/* 调整表格标题间距 */
	.table-1 + .table-caption {
	margin-top: 1.5rem;
	}

	.table-2 + .table-caption {
	margin-top: 1rem;
	}

	.results-table .method-name {
	text-align: center !important;
	font-weight: 600;
	color: #1f2937;
	}

	/* 控制Table 2列宽度的CSS */
	.results-table .model-type-column {
	width: 100px;
	min-width: 100px;
	max-width: 100px;
	}

	.results-table .model-name-column {
	width: 100px;
	min-width: 100px;
	max-width: 100px;
	}

	.results-table .method-column {
	width: 100px;
	min-width: 100px;
	max-width: 100px;
	}

	.results-table .metric-column {
	width: 80px;
	min-width: 80px;
	max-width: 80px;
	}





	.results-table .nover-row {
	background: #f0fdf4;
	border-left: 3px solid #10b981;
	}



	.results-table .best-score {
	color: #10b981;
	font-weight: 700;
	position: relative;
	}



	.table-section {
	margin: 0;
	}

	.table-title {
	font-size: 1.5rem;
	font-weight: 600;
	color: #1f2937;
	margin-bottom: 1rem;
	text-align: center;
	}

	.table-caption {
	font-size: 0.9rem;
	color: #6b7280;
	text-align: center;
	margin-top: 1rem;
	line-height: 1.5;
	max-width: 800px;
	margin-left: auto;
	margin-right: auto;
	}

	.model-group-header {
	background: #f1f5f9 !important;
	color: #334155;
	font-weight: 700;
	text-align: center !important;
	}

	.model-group-header td {
	text-align: center !important;
	}

	.score-cell {
	position: relative;
	}

	@media (max-width: 768px) {
	.results-table {
	font-size: 0.8rem;
	}

	.results-table th,
	.results-table td {
	padding: 0.5rem 0.3rem;
	}

	.table-section {
	margin: 2rem 0;
	}

	.table-title {
	font-size: 1.2rem;
	}

	div[style*="grid-template-columns: 1fr 1fr"] {
	display: block !important;
	}


	}

	.formula-container {
	background: #f8fafc;
	border: 1px solid #e2e8f0;
	border-radius: 12px;
	padding: 2rem;
	margin: 2rem 0;
	text-align: center;
	position: relative;
	z-index: 1;
	}

	.formula-container::before {
	content: '🧮';
	position: absolute;
	top: 1rem;
	left: 1rem;
	font-size: 1.2rem;
	}

	.formula-title {
	font-size: 1.1rem;
	font-weight: 600;
	color: #334155;
	margin-bottom: 1rem;
	}

	.formula-description {
	font-size: 0.9rem;
	color: #64748b;
	margin-top: 1rem;
	line-height: 1.5;
	}

	.diagram-grid {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
	gap: 2rem;
	margin: 3rem 0;
	}

	.diagram-card {
	background: white;
	border-radius: 12px;
	padding: 2rem;
	box-shadow: 0 2px 8px rgba(0,0,0,0.06);
	border: 1px solid #e5e7eb;
	text-align: center;
	transition: all 0.3s ease;
	position: relative;
	z-index: 1;
	}

	.diagram-card:hover {
	transform: translateY(-2px);
	box-shadow: 0 8px 24px rgba(0,0,0,0.12);
	}

	.diagram-placeholder {
	height: 200px;
	background: #f1f5f9;
	border: 2px dashed #94a3b8;
	border-radius: 8px;
	display: flex;
	flex-direction: column;
	align-items: center;
	justify-content: center;
	margin-bottom: 1rem;
	transition: all 0.3s ease;
	}

	.diagram-placeholder:hover {
	background: #e2e8f0;
	border-color: #64748b;
	}

	.diagram-icon {
	font-size: 3rem;
	color: #64748b;
	margin-bottom: 0.5rem;
	}

	.diagram-label {
	font-size: 0.9rem;
	color: #475569;
	font-weight: 500;
	}

	.diagram-description {
	font-size: 0.85rem;
	color: #64748b;
	line-height: 1.4;
	}

	/* Apple Design Button Styles */
	.external-link.button {
	position: relative;
	overflow: hidden;
	}

	.external-link.button::before {
	content: '';
	position: absolute;
	top: 0;
	left: -100%;
	width: 100%;
	height: 100%;
	background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
	transition: left 0.5s;
	}

	.external-link.button:hover {
	transform: translateY(-2px);
	box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15), 0 4px 8px rgba(0, 0, 0, 0.1);
	}

	.external-link.button:hover::before {
	left: 100%;
	}

	.external-link.button:active {
	transform: translateY(0);
	transition: transform 0.1s;
	}
	</style>

	<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
	<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
	<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
	<script>
	window.MathJax = {
	tex: {
	inlineMath: [['$', '$'], ['\$', '\$']],
	displayMath: [['$$', '$$'], ['\\[', '\\]']]
	}
	};
	</script>
	</head>
	<body>
	<div class="geometric-bg"></div>

	<section class="hero">
	<div class="hero-body">
	<div class="container is-widescreen">
	<div class="columns is-centered">
	<div class="column has-text-centered">
	<h1 class="title is-1 publication-title">NOVER</h1>
	<p class="subtitle is-4" style="color: #888;">Incentive Training for Language Models via Verifier-Free Reinforcement Learning</p>

	<div class="is-size-5 publication-authors" style="margin: 2rem 0;">
	<span class="author-block">Wei Liu¹ • Siya Qi¹ • Xinyu Wang¹ • Chen Qian² • Yali Du¹·³ • Yulan He¹·³</span>
	<div style="margin-top: 0.5rem; font-size: 0.9rem; color: #666;">
	¹King's College London • ²Shanghai Jiao Tong University • ³The Alan Turing Institute
	</div>
	</div>

	<div class="publication-links" style="display: flex; justify-content: center; gap: 1rem; flex-wrap: wrap;">
	<a href="https://arxiv.org/pdf/2505.16022.pdf" target="_blank"
	class="external-link button is-normal" style="background: linear-gradient(135deg, #B31B1B 0%, #D32F2F 100%); color: white; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(179, 27, 27, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);">
	<span class="icon"><i class="fas fa-file-pdf"></i></span>
	<span>Paper</span>
	</a>
	<a href="https://github.com/thinkwee/NOVER" target="_blank"
	class="external-link button is-normal" style="background: linear-gradient(135deg, #24292e 0%, #2f363d 100%); color: white; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(36, 41, 46, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);">
	<span class="icon"><i class="fab fa-github"></i></span>
	<span>Code</span>
	</a>
	<a href="https://huggingface.co/collections/thinkwee/novereason-68937ca75331dfaddaf24016" target="_blank"
	class="external-link button is-normal" style="background: linear-gradient(135deg, #FFD43B 0%, #FFE066 100%); color: #000; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(255, 212, 59, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);">
	<span class="icon"><i class="fas fa-database"></i></span>
	<span>Dataset</span>
	</a>
	<a href="https://huggingface.co/collections/thinkwee/nover1-68a6524eac725c915abd77e3" target="_blank"
	class="external-link button is-normal" style="background: linear-gradient(135deg, #0EA5E9 0%, #38BDF8 100%); color: white; border: none; border-radius: 12px; padding: 12px 20px; font-weight: 500; box-shadow: 0 4px 12px rgba(14, 165, 233, 0.3), 0 2px 4px rgba(0, 0, 0, 0.1); transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); transform: translateY(0);">
	<span class="icon"><i class="fas fa-cube"></i></span>
	<span>Model</span>
	</a>
	</div>
	</div>
	</div>
	</div>
	</div>
	</section>

	<section class="section">
	<div class="container is-widescreen">
	<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 3rem; align-items: start;">

	<div class="abstract-card">
	<h2 class="title is-3" style="color: #1a1a1a; margin-bottom: 1.5rem;">TL;DR</h2>
	<p class="is-size-5" style="color: #374151; line-height: 1.6;">
	<span class="nover">NOVER</span> (NO-Verifier Reinforcement Learning) enables
	incentive training on any text-to-text task without external verifiers. It utilizes policy model's reasoning perplexity to estimate the reward.
	<br>
	<br>
	<strong>• Your LLM is secretly a verifier.
	<br>
	• Your LLM only reason on <s>Easy-to-Verify</s> tasks.
	<br>
	• Your LLM can <s>reason</s> on ANY tasks.
	<br>
	• Your LLM can be incentivized to do more than reasoning.</strong>
	<br>
	</p>
	</div>


	<div class="figure-container">
	<div class="figure-placeholder" style="height: 280px; display: flex; flex-direction: column; justify-content: center; align-items: center;">
	<img src="logo.png" alt="NOVER Framework Overview" style="width: 100%; height: 100%; object-fit: contain;">
	</div>
	<div style="font-size: 1.1rem; color: #2e3036; text-align: center; margin-top: 0.5rem;">
	<div><span class="nover">NOVER</span> extends RLVR on any text-to-text task</div>
	<div>beyond easy-to-verify math/coding problems.</div>
	</div>
	</div>
	</div>
	</div>
	</section>

	<section class="section">
	<div class="container is-widescreen">
	<h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Incentivize Reasoning on Any Task</h2>
	<p class="is-size-5" style="color: #6b7280; margin-bottom: 3rem; max-width: 800px; margin-left: auto; margin-right: auto;">
	NOVER enables training large reasoning models on any text data and any task.<br>
	NO verifiers/models/rules needed, just ground truth answer, and policy model itself.<br>
	<strong>General Reasoning:</strong> ⚛️ physics • ⚖️ law • 🏥 medical • 💰 finance<br>
	<strong>Creative Tasks:</strong> 🎨 creative writing<br>
	<strong>Social Intelligence:</strong> 🧠 theory of mind • 😊 emotion detection • 🤝 social reasoning<br>
	<strong>Nautral Language Generation:</strong> 🌍 translation • 📚 summarization
	</p>

	<div class="figure-container">
	<img src="example.png" alt="NOVER Framework Overview" style="width: 100%; height: 100%; object-fit: contain;">
	</div>
	</div>
	</section>

	<section class="section">
	<div class="container is-widescreen">
	<h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">NOVER Methodology</h2>


	<div style="display: flex; justify-content: space-between; align-items: center; margin: 2rem 0; padding: 0 1rem;">
	<div style="width: 600px; height: 420px;">
	<img src="paradigm.png" alt="paradigm" style="width: 100%; height: 100%; object-fit: contain;">
	</div>
	<div style="width: 600px; height: 420px;">
	<img src="overall.png" alt="overall" style="width: 100%; height: 100%; object-fit: contain;">
	</div>
	</div>


	<div class="method-comparison">
	<div class="method-card">
	<div class="method-icon"><i class="fas fa-graduation-cap"></i></div>
	<h3 class="title is-5">SFT</h3>
	<p>Memorize Input-Output Patterns</p>
	</div>
	<div class="method-card">
	<div class="method-icon"><i class="fas fa-robot"></i></div>
	<h3 class="title is-5">RLHF</h3>
	<p>Train Reward Model <br>Give Preference Feedback</p>
	</div>
	<div class="method-card">
	<div class="method-icon"><i class="fas fa-balance-scale"></i></div>
	<h3 class="title is-5">RLVR</h3>
	<p>Rule-based Reward <br>End2End Outcome RL</p>
	</div>
	<div class="method-card nover">
	<div class="method-icon"><i class="fas fa-brain"></i></div>
	<h3 class="title is-5"><span class="nover">NOVER</span></h3>
	<p>Reasoning Perplexity as Reward<br>Reason on Any Task</p>
	</div>
	</div>


	<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 2rem; margin: 3rem 0;">

	<div class="formula-container">
	<div class="formula-title">Reasoning Perplexity</div>
	<div style="font-size: 0.9rem; margin: 1rem 0;">
	$P_r(p, t, g) = \exp\left(-\frac{\sum_{i=1}^{\|g\|} \log \pi_{p}(g_i \mid p, t, g_{<i})}{\|g\| \cdot N(\|t\|)}\right)$
	</div>
	<div class="formula-description">
	Use perplexity of policy model on ground truth conditioned on reasoning trajectory as reward proxy
	</div>
	</div>


	<div class="formula-container">
	<div class="formula-title">Rewards</div>
	<div style="font-size: 1.1rem; margin: 1rem 0;">
	$$R_{\mathrm{total}} = w_{\mathrm{f}} R_{\mathrm{f}} + \mathbb{I}(R_{\mathrm{f}} = 1) \cdot (w_{\mathrm{r}} R_{\mathrm{r}} + w_{\mathrm{e}} R_{\mathrm{e}})$$
	</div>
	<div class="formula-description">
	Combined reward function incorporating reasoning, efficiency, and format components
	</div>
	</div>


	<div class="formula-container">
	<div class="formula-title">Policy-Proxy Synchronization</div>
	<div style="font-size: 1.1rem; margin: 1rem 0;">
	$$\pi_{\mathrm{p}} \leftarrow \alpha \cdot \pi_{\mathrm{p}} + (1-\alpha) \cdot \pi_{\theta}$$
	</div>
	<div class="formula-description">
	Smooth synchronization between policy and proxy ensures stable training with limited resource
	</div>
	</div>
	</div>




	</div>
	</section>

	<section class="section">
	<div class="container is-widescreen">
	<h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Experimental Results</h2>


	<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 3rem; margin-top: 3rem; align-items: start;">

	<div>
	<h3 class="table-title">Overall on NOVEReason Dataset</h3>

	<div class="table-section">
	<div class="results-table table-1">
	<table>
	<thead>
	<tr>
	<th>Method</th>
	<th>NR</th>
	<th>GT</th>
	<th>WI</th>
	<th>SGN</th>
	<th>EB</th>
	<th>TB</th>
	<th>OPUS</th>
	</tr>
	</thead>
	<tbody>
	<tr class="model-group-header">
	<td colspan="8"><strong>Qwen2.5-3B</strong></td>
	</tr>
	<tr>
	<td class="method-name">Base</td>
	<td class="score-cell">21.80%</td>
	<td class="score-cell">43.10%</td>
	<td class="score-cell">18.40%</td>
	<td class="score-cell">18.70%</td>
	<td class="score-cell">32.03%</td>
	<td class="score-cell">46.79%</td>
	<td class="score-cell">16.70%</td>
	</tr>
	<tr>
	<td class="method-name">+ CoT</td>
	<td class="score-cell">24.40%</td>
	<td class="score-cell">48.90%</td>
	<td class="score-cell">24.20%</td>
	<td class="score-cell">14.76%</td>
	<td class="score-cell">28.12%</td>
	<td class="score-cell">51.23%</td>
	<td class="score-cell">1.40%</td>
	</tr>
	<tr>
	<td class="method-name">+ SFT</td>
	<td class="score-cell">27.00%</td>
	<td class="score-cell">36.20%</td>
	<td class="score-cell">27.30%</td>
	<td class="score-cell">20.08%</td>
	<td class="score-cell">36.72%</td>
	<td class="score-cell">48.66%</td>
	<td class="score-cell">17.30%</td>
	</tr>
	<tr class="nover-row">
	<td class="method-name"><strong>+ NOVER</strong></td>
	<td class="score-cell best-score">28.60%</td>
	<td class="score-cell best-score">60.30%</td>
	<td class="score-cell best-score">28.10%</td>
	<td class="score-cell best-score">41.64%</td>
	<td class="score-cell best-score">38.28%</td>
	<td class="score-cell best-score">57.88%</td>
	<td class="score-cell best-score">20.70%</td>
	</tr>
	<tr class="model-group-header">
	<td colspan="8"><strong>Qwen2.5-7B</strong></td>
	</tr>
	<tr>
	<td class="method-name">Base</td>
	<td class="score-cell">31.80%</td>
	<td class="score-cell">48.50%</td>
	<td class="score-cell">20.70%</td>
	<td class="score-cell">24.21%</td>
	<td class="score-cell">28.91%</td>
	<td class="score-cell">44.22%</td>
	<td class="score-cell">19.30%</td>
	</tr>
	<tr>
	<td class="method-name">+ CoT</td>
	<td class="score-cell">31.20%</td>
	<td class="score-cell">57.60%</td>
	<td class="score-cell">29.20%</td>
	<td class="score-cell">33.46%</td>
	<td class="score-cell">38.28%</td>
	<td class="score-cell">50.99%</td>
	<td class="score-cell">1.60%</td>
	</tr>
	<tr>
	<td class="method-name">+ SFT</td>
	<td class="score-cell">27.50%</td>
	<td class="score-cell">45.20%</td>
	<td class="score-cell">33.50%</td>
	<td class="score-cell">37.85%</td>
	<td class="score-cell">47.66%</td>
	<td class="score-cell">57.06%</td>
	<td class="score-cell">23.30%</td>
	</tr>
	<tr class="nover-row">
	<td class="method-name"><strong>+ NOVER</strong></td>
	<td class="score-cell best-score">38.20%</td>
	<td class="score-cell best-score">61.80%</td>
	<td class="score-cell best-score">36.60%</td>
	<td class="score-cell best-score">50.79%</td>
	<td class="score-cell best-score">49.22%</td>
	<td class="score-cell best-score">67.79%</td>
	<td class="score-cell best-score">26.80%</td>
	</tr>
	<tr class="model-group-header">
	<td colspan="8"><strong>Other Baselines</strong></td>
	</tr>
	<tr>
	<td class="method-name">Qwen2.5-3B-Instruct</td>
	<td class="score-cell">27.10%</td>
	<td class="score-cell">50.00%</td>
	<td class="score-cell">31.50%</td>
	<td class="score-cell">21.25%</td>
	<td class="score-cell">40.62%</td>
	<td class="score-cell">58.69%</td>
	<td class="score-cell">19.90%</td>
	</tr>
	<tr>
	<td class="method-name">Qwen2.5-7B-Instruct</td>
	<td class="score-cell">29.90%</td>
	<td class="score-cell">56.20%</td>
	<td class="score-cell">35.60%</td>
	<td class="score-cell">67.72%</td>
	<td class="score-cell">46.88%</td>
	<td class="score-cell">65.23%</td>
	<td class="score-cell">23.50%</td>
	</tr>
	<tr>
	<td class="method-name">R1-Distill-Qwen-7B</td>
	<td class="score-cell">41.00%</td>
	<td class="score-cell">60.20%</td>
	<td class="score-cell">38.00%</td>
	<td class="score-cell">40.16%</td>
	<td class="score-cell">35.16%</td>
	<td class="score-cell">54.61%</td>
	<td class="score-cell">8.20%</td>
	</tr>
	</tbody>
	</table>
	</div>
	<div class="table-caption">
	<strong>NR:</strong> Natural Reasoning, <strong>GT:</strong> General Thoughts-430k, <strong>WI:</strong> WebInstruct, <strong>SGN:</strong> SS-GEN,
	<strong>EB:</strong> EmoBench, <strong>TB:</strong> TomBench, <strong>OPUS:</strong> OPUS-BOOK-TRANSLATION.
	</div>
	</div>
	</div>


	<div>
	<h3 class="table-title">General Reasoning with Different Backends</h3>
	<div class="table-section">
	<div class="results-table table-2">
	<table>
	<thead>
	<tr>
	<th class="model-type-column">Model Type</th>
	<th class="model-name-column">Model</th>
	<th class="method-column">Method</th>
	<th class="metric-column">NR</th>
	<th class="metric-column">GT</th>
	<th class="metric-column">WI</th>
	</tr>
	</thead>
	<tbody>
	<tr class="model-group-header">
	<td class="model-type-column" rowspan="8" style="vertical-align: middle; background: #f1f5f9 !important; color: #334155; font-weight: 700;">Base</td>
	<td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Qwen2.5 3B</td>
	<td class="method-name">Base</td>
	<td class="score-cell">21.80%</td>
	<td class="score-cell">43.10%</td>
	<td class="score-cell">18.40%</td>
	</tr>
	<tr>
	<td class="method-name">+ CoT</td>
	<td class="score-cell">24.40%</td>
	<td class="score-cell">48.90%</td>
	<td class="score-cell">24.20%</td>
	</tr>
	<tr>
	<td class="method-name">+ SFT</td>
	<td class="score-cell">27.00%</td>
	<td class="score-cell">36.20%</td>
	<td class="score-cell">27.30%</td>
	</tr>
	<tr class="nover-row">
	<td class="method-name"><strong>+ NOVER</strong></td>
	<td class="score-cell best-score"><strong>28.60%</strong></td>
	<td class="score-cell best-score"><strong>60.30%</strong></td>
	<td class="score-cell best-score"><strong>28.10%</strong></td>
	</tr>
	<tr class="model-group-header">
	<td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Qwen 2.5 7B</td>
	<td class="method-name">Base</td>
	<td class="score-cell">31.80%</td>
	<td class="score-cell">48.50%</td>
	<td class="score-cell">20.70%</td>
	</tr>
	<tr>
	<td class="method-name">+ CoT</td>
	<td class="score-cell">31.20%</td>
	<td class="score-cell">57.60%</td>
	<td class="score-cell">29.20%</td>
	</tr>
	<tr>
	<td class="method-name">+ SFT</td>
	<td class="score-cell">27.50%</td>
	<td class="score-cell">45.20%</td>
	<td class="score-cell">33.50%</td>
	</tr>
	<tr class="nover-row">
	<td class="method-name"><strong>+ NOVER</strong></td>
	<td class="score-cell best-score"><strong>38.20%</strong></td>
	<td class="score-cell best-score"><strong>61.80%</strong></td>
	<td class="score-cell best-score"><strong>36.60%</strong></td>
	</tr>
	<tr class="model-group-header">
	<td class="model-type-column" rowspan="8" style="vertical-align: middle; background: #f1f5f9 !important; color: #334155; font-weight: 700;">Instruct</td>
	<td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Llama-3.1-8B</td>
	<td class="method-name">Base</td>
	<td class="score-cell">34.20%</td>
	<td class="score-cell">36.70%</td>
	<td class="score-cell">29.90%</td>
	</tr>
	<tr>
	<td class="method-name">+ CoT</td>
	<td class="score-cell">28.10%</td>
	<td class="score-cell">35.10%</td>
	<td class="score-cell">30.00%</td>
	</tr>
	<tr>
	<td class="method-name">+ SFT</td>
	<td class="score-cell">23.60%</td>
	<td class="score-cell">23.40%</td>
	<td class="score-cell best-score"><strong>34.50%</strong></td>
	</tr>
	<tr class="nover-row">
	<td class="method-name"><strong>+ NOVER</strong></td>
	<td class="score-cell best-score"><strong>40.70%</strong></td>
	<td class="score-cell best-score"><strong>41.50%</strong></td>
	<td class="score-cell">34.00%</td>
	</tr>
	<tr class="model-group-header">
	<td class="model-name-column" rowspan="4" style="vertical-align: middle; background: #f8fafc !important; color: #374151; font-weight: 600;">Mistral-7B</td>
	<td class="method-name">Base</td>
	<td class="score-cell best-score"><strong>33.00%</strong></td>
	<td class="score-cell">17.80%</td>
	<td class="score-cell">27.00%</td>
	</tr>
	<tr>
	<td class="method-name">+ CoT</td>
	<td class="score-cell">29.20%</td>
	<td class="score-cell">18.60%</td>
	<td class="score-cell">27.10%</td>
	</tr>
	<tr>
	<td class="method-name">+ SFT</td>
	<td class="score-cell">22.50%</td>
	<td class="score-cell">20.70%</td>
	<td class="score-cell">27.80%</td>
	</tr>
	<tr class="nover-row">
	<td class="method-name"><strong>+ NOVER</strong></td>
	<td class="score-cell">32.20%</td>
	<td class="score-cell best-score"><strong>21.90%</strong></td>
	<td class="score-cell best-score"><strong>29.30%</strong></td>
	</tr>
	</tbody>
	</table>
	</div>
	<div class="table-caption">
	<strong>NR:</strong> Natural Reasoning, <strong>GT:</strong> General Thoughts-430k, <strong>WI:</strong> WebInstruct.
	</div>
	</div>
	</div>
	</div>


	<div style="margin-top: 3rem;">
	<div class="glass-card">
	<h3 class="title is-4" style="color: #1a1a1a; margin-bottom: 1.5rem;">Key Takeaways</h3>
	<ul style="color: #374151; line-height: 1.8; font-size: 0.9rem;">
	<li>• NOVER trains successfully on both pretrained and instruct models, with larger gains on stronger base models</li>
	<li>• Despite the free-form nature of answers, NOVER still prefer objective solutions instead of subjective ones</li>
	<li>• On general reasoning, NOVER inherits base model boundaries, which have been observed in math reasoning. It struggles on false-premise tasks like FANToM</li>
	<li>• NOVER's design prevent reward hacking, avoiding issues such as reasoning explosion and collapse</li>
	<li>• Unlike closed-source or verifier-based rewards that suffer from cold start and hacking risks, NOVER remains stable</li>
	<li>• Its dense reward signals allow greater error tolerance and encourage diverse reasoning patterns</li>
	</ul>
	</div>
	</div>



	</div>
	</section>

	<section class="section">
	<div class="container is-widescreen">
	<h2 class="title is-2 has-text-centered" style="color: #333; margin-bottom: 3rem;">Inverse Incentive Training</h2>

	<div style="display: flex; justify-content: space-between; align-items: center; margin: 2rem 0; padding: 0 1rem;">
	<div style="width: 600px; height: 420px;">
	<img src="iit.png" alt="iit" style="width: 100%; height: 100%; object-fit: contain;">
	</div>
	<div style="width: 600px; height: 420px;">
	<img src="iit_result.png" alt="iit_result" style="width: 100%; height: 100%; object-fit: contain;">
	</div>
	</div>

	<div class="glass-card">
	<div style="text-align: center;">
	<div style="display: flex; justify-content: center; align-items: center; gap: 2rem; margin-bottom: 1rem;">
	<div style="text-align: center;">
	<i class="fas fa-fish" style="font-size: 3rem; margin-bottom: 0.5rem; color: #0e41a8;"></i>
	<div style="font-size: 1.0rem; color: #0e41a8;">Reward the Outcome, Incentivize Process</div>
	</div>
	<div style="font-size: 1.5rem;">→</div>
	<div style="text-align: center;">
	<i class="fas fa-graduation-cap" style="font-size: 3rem; margin-bottom: 0.5rem; color: #d736d2;"></i>
	<div style="font-size: 1.0rem; color: #d736d2;">Write Rubrics in the Outcome, Process as Result</div>
	</div>
	</div>
	<div style="font-size: 1.2rem; color: #000000;">Teaching Models "How to Fish" Rather Than Giving Them Fish</div>
	</div>
	</div>
	</div>
	</section>



	<section class="section" id="BibTeX">
	<div class="container is-widescreen">
	<div class="glass-card">
	<h2 class="title is-3">Citation</h2>
	<pre style="background: #f8f9fa; padding: 1.5rem; border-radius: 10px; overflow-x: auto;"><code>@article{liu2025nover,
	title={NOVER: Incentive Training for Language Models via Verifier-Free Reinforcement Learning},
	author={Liu, Wei and Qi, Siya and Wang, Xinyu and Qian, Chen and Du, Yali and He, Yulan},
	journal={arXiv preprint arXiv:2505.16022},
	year={2025}
	}</code></pre>
	</div>
	</div>
	</section>

	<footer class="section" style="background: white; border-top: 1px solid #e5e7eb; margin-top: 4rem;">
	<div class="container has-text-centered">
	<div class="content">
	<div style="margin-bottom: 2rem;">
	<p>Find me on <a href="https://thinkwee.top/about" target="_blank" style="color: #10b981;">thinkwee.top/about</a>, with other interesting works on LLM Agent🤖, NLP and more~</p>
	</div>
	<p style="color: #6b7280;">
	Licensed under <a href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank" style="color: #10b981;">CC BY-SA 4.0</a>
	</p>
	</div>
	</div>
	</footer>

	</body>
	</html>