Spaces:

mcfloundinho
/

qp-rnn-demo

Sleeping

App Files Files Community

qp-rnn-demo / app.py

mcfloundinho

Initial QP-RNN interactive demo for Hugging Face Spaces

fada987 about 1 month ago

raw

history blame contribute delete

7.77 kB

	#!/usr/bin/env python3
	"""
	Gradio app for QP-RNN interactive demo.
	Suitable for deployment on Hugging Face Spaces.
	"""

	import gradio as gr
	import torch
	import numpy as np
	import matplotlib
	matplotlib.use('Agg') # Use non-interactive backend
	import matplotlib.pyplot as plt
	from io import BytesIO
	import base64

	class MinimalQPRNN(torch.nn.Module):
	"""Minimal QP-RNN for demonstration."""

	def __init__(self, position_gain=3.0, velocity_gain=1.5, control_cost=10.0):
	super().__init__()
	self.P = torch.tensor([[control_cost]], dtype=torch.float32)
	self.K = torch.tensor([position_gain, velocity_gain], dtype=torch.float32)

	def forward(self, state, reference=None):
	if reference is None:
	reference = torch.zeros_like(state)
	error = state - reference
	q = torch.sum(self.K * error, dim=-1, keepdim=True)
	u_unconstrained = -q / self.P
	u = torch.clamp(u_unconstrained, -1.0, 1.0)
	return u

	def simulate_system(position_gain, velocity_gain, control_cost,
	initial_position, initial_velocity,
	target_position, simulation_time):
	"""Run simulation with given parameters."""

	# Create controller
	controller = MinimalQPRNN(position_gain, velocity_gain, control_cost)

	# Setup
	dt = 0.05
	T = int(simulation_time / dt)
	x0 = torch.tensor([initial_position, initial_velocity])
	x_ref = torch.tensor([target_position, 0.0])

	# Simulate
	states = [x0.numpy()]
	controls = []
	x = x0.clone()

	for t in range(T):
	u = controller(x, x_ref)
	x_next = torch.zeros_like(x)
	x_next[0] = x[0] + x[1] * dt
	x_next[1] = x[1] + u.item() * dt
	states.append(x_next.numpy())
	controls.append(u.item())
	x = x_next

	return np.array(states), np.array(controls), dt

	def create_plots(states, controls, dt):
	"""Create visualization plots."""
	time = np.arange(len(states)) * dt
	time_control = time[:-1]

	# Create figure with subplots
	fig = plt.figure(figsize=(12, 10))

	# Position subplot
	ax1 = plt.subplot(3, 2, 1)
	ax1.plot(time, states[:, 0], 'b-', linewidth=2)
	ax1.axhline(y=states[-1, 0], color='r', linestyle='--', alpha=0.5)
	ax1.set_ylabel('Position')
	ax1.set_title('Position vs Time')
	ax1.grid(True, alpha=0.3)

	# Velocity subplot
	ax2 = plt.subplot(3, 2, 2)
	ax2.plot(time, states[:, 1], 'g-', linewidth=2)
	ax2.axhline(y=0, color='r', linestyle='--', alpha=0.5)
	ax2.set_ylabel('Velocity')
	ax2.set_title('Velocity vs Time')
	ax2.grid(True, alpha=0.3)

	# Control subplot
	ax3 = plt.subplot(3, 2, 3)
	ax3.plot(time_control, controls, 'r-', linewidth=2)
	ax3.axhline(y=1, color='k', linestyle=':', alpha=0.5)
	ax3.axhline(y=-1, color='k', linestyle=':', alpha=0.5)
	ax3.set_ylabel('Control Input')
	ax3.set_xlabel('Time (s)')
	ax3.set_title('Control Input vs Time')
	ax3.grid(True, alpha=0.3)
	ax3.set_ylim(-1.2, 1.2)

	# Phase portrait
	ax4 = plt.subplot(3, 2, 4)
	ax4.plot(states[:, 0], states[:, 1], 'b-', linewidth=2)
	ax4.scatter([states[0, 0]], [states[0, 1]], color='green', s=100, marker='o', label='Start')
	ax4.scatter([states[-1, 0]], [states[-1, 1]], color='red', s=100, marker='x', label='End')
	ax4.set_xlabel('Position')
	ax4.set_ylabel('Velocity')
	ax4.set_title('Phase Portrait')
	ax4.legend()
	ax4.grid(True, alpha=0.3)

	# QP visualization
	ax5 = plt.subplot(3, 2, 5)
	# Show how control saturates
	time_saturated = np.sum(np.abs(controls) >= 0.99) / len(controls) * 100
	labels = ['Saturated', 'Unsaturated']
	sizes = [time_saturated, 100 - time_saturated]
	colors = ['red', 'blue']
	ax5.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%')
	ax5.set_title('Control Saturation')

	# Metrics text
	ax6 = plt.subplot(3, 2, 6)
	ax6.axis('off')
	metrics_text = f"""Performance Metrics:

	Final Position Error: {abs(states[-1, 0]):.4f}
	Final Velocity: {states[-1, 1]:.4f}
	Control Effort (L1): {np.sum(np.abs(controls)):.2f}
	Control Effort (L2): {np.sqrt(np.sum(controls**2)):.2f}
	Settling Time: ~{len(states) * dt:.1f}s
	Max Overshoot: {np.max(np.abs(states[:, 0])):.2f}
	"""
	ax6.text(0.1, 0.5, metrics_text, fontsize=12, verticalalignment='center',
	fontfamily='monospace', bbox=dict(boxstyle="round,pad=0.5", facecolor="lightgray"))

	plt.suptitle('QP-RNN Control Simulation Results', fontsize=16)
	plt.tight_layout()

	return fig

	def run_qp_rnn_demo(position_gain, velocity_gain, control_cost,
	initial_position, initial_velocity,
	target_position, simulation_time):
	"""Main function for Gradio interface."""

	# Run simulation
	states, controls, dt = simulate_system(
	position_gain, velocity_gain, control_cost,
	initial_position, initial_velocity,
	target_position, simulation_time
	)

	# Create plots
	fig = create_plots(states, controls, dt)

	# Create description
	description = f"""
	### QP-RNN Control Results

	The QP-RNN controller solves the following optimization problem at each time step:

	```
	min 0.5 * u² * {control_cost} + u * (K @ error)
	s.t. -1 ≤ u ≤ 1
	```

	Where K = [{position_gain}, {velocity_gain}] are the feedback gains.

	Final State: Position = {states[-1, 0]:.3f}, Velocity = {states[-1, 1]:.3f}

	Key Features:
	- Guaranteed constraint satisfaction (control always in [-1, 1])
	- Interpretable structure (quadratic cost + linear feedback)
	- Can be trained via RL for complex tasks
	"""

	return fig, description

	# Create Gradio interface
	iface = gr.Interface(
	fn=run_qp_rnn_demo,
	inputs=[
	gr.Slider(0.1, 10.0, value=3.0, label="Position Gain (Kp)",
	info="Higher values = faster position correction"),
	gr.Slider(0.1, 5.0, value=1.5, label="Velocity Gain (Kv)",
	info="Higher values = more damping"),
	gr.Slider(0.1, 50.0, value=10.0, label="Control Cost",
	info="Higher values = less aggressive control"),
	gr.Slider(-5.0, 5.0, value=2.0, label="Initial Position"),
	gr.Slider(-2.0, 2.0, value=0.0, label="Initial Velocity"),
	gr.Slider(-3.0, 3.0, value=0.0, label="Target Position"),
	gr.Slider(1.0, 10.0, value=5.0, label="Simulation Time (s)")
	],
	outputs=[
	gr.Plot(label="Simulation Results"),
	gr.Markdown(label="Analysis")
	],
	title="QP-RNN: Quadratic Programming Recurrent Neural Network Demo",
	description="""
	This interactive demo shows how QP-RNN controllers work for a simple double integrator system.

	What is QP-RNN?
	- Combines Model Predictive Control structure with Deep Reinforcement Learning
	- Learns to solve a parameterized Quadratic Program (QP) to generate control actions
	- Provides theoretical guarantees (constraint satisfaction, stability verification)

	Try adjusting the parameters to see how they affect control performance!

	Paper: [MPC-Inspired Reinforcement Learning for Verifiable Model-Free Control](https://arxiv.org/abs/2312.05332)
	""",
	examples=[
	[3.0, 1.5, 10.0, 2.0, 0.0, 0.0, 5.0], # Default
	[5.0, 2.0, 5.0, 2.0, 0.0, 0.0, 5.0], # Aggressive
	[1.0, 0.5, 20.0, 2.0, 0.0, 0.0, 5.0], # Conservative
	[3.0, 0.1, 10.0, 2.0, 0.0, 0.0, 5.0], # Underdamped
	[3.0, 3.0, 10.0, 2.0, 0.0, 0.0, 5.0], # Overdamped
	],
	cache_examples=True
	)

	if __name__ == "__main__":
	iface.launch()