#!/usr/bin/env python3 """ Gradio app for QP-RNN interactive demo. Suitable for deployment on Hugging Face Spaces. """ import gradio as gr import torch import numpy as np import matplotlib matplotlib.use('Agg') # Use non-interactive backend import matplotlib.pyplot as plt from io import BytesIO import base64 class MinimalQPRNN(torch.nn.Module): """Minimal QP-RNN for demonstration.""" def __init__(self, position_gain=3.0, velocity_gain=1.5, control_cost=10.0): super().__init__() self.P = torch.tensor([[control_cost]], dtype=torch.float32) self.K = torch.tensor([position_gain, velocity_gain], dtype=torch.float32) def forward(self, state, reference=None): if reference is None: reference = torch.zeros_like(state) error = state - reference q = torch.sum(self.K * error, dim=-1, keepdim=True) u_unconstrained = -q / self.P u = torch.clamp(u_unconstrained, -1.0, 1.0) return u def simulate_system(position_gain, velocity_gain, control_cost, initial_position, initial_velocity, target_position, simulation_time): """Run simulation with given parameters.""" # Create controller controller = MinimalQPRNN(position_gain, velocity_gain, control_cost) # Setup dt = 0.05 T = int(simulation_time / dt) x0 = torch.tensor([initial_position, initial_velocity]) x_ref = torch.tensor([target_position, 0.0]) # Simulate states = [x0.numpy()] controls = [] x = x0.clone() for t in range(T): u = controller(x, x_ref) x_next = torch.zeros_like(x) x_next[0] = x[0] + x[1] * dt x_next[1] = x[1] + u.item() * dt states.append(x_next.numpy()) controls.append(u.item()) x = x_next return np.array(states), np.array(controls), dt def create_plots(states, controls, dt): """Create visualization plots.""" time = np.arange(len(states)) * dt time_control = time[:-1] # Create figure with subplots fig = plt.figure(figsize=(12, 10)) # Position subplot ax1 = plt.subplot(3, 2, 1) ax1.plot(time, states[:, 0], 'b-', linewidth=2) ax1.axhline(y=states[-1, 0], color='r', linestyle='--', alpha=0.5) ax1.set_ylabel('Position') ax1.set_title('Position vs Time') ax1.grid(True, alpha=0.3) # Velocity subplot ax2 = plt.subplot(3, 2, 2) ax2.plot(time, states[:, 1], 'g-', linewidth=2) ax2.axhline(y=0, color='r', linestyle='--', alpha=0.5) ax2.set_ylabel('Velocity') ax2.set_title('Velocity vs Time') ax2.grid(True, alpha=0.3) # Control subplot ax3 = plt.subplot(3, 2, 3) ax3.plot(time_control, controls, 'r-', linewidth=2) ax3.axhline(y=1, color='k', linestyle=':', alpha=0.5) ax3.axhline(y=-1, color='k', linestyle=':', alpha=0.5) ax3.set_ylabel('Control Input') ax3.set_xlabel('Time (s)') ax3.set_title('Control Input vs Time') ax3.grid(True, alpha=0.3) ax3.set_ylim(-1.2, 1.2) # Phase portrait ax4 = plt.subplot(3, 2, 4) ax4.plot(states[:, 0], states[:, 1], 'b-', linewidth=2) ax4.scatter([states[0, 0]], [states[0, 1]], color='green', s=100, marker='o', label='Start') ax4.scatter([states[-1, 0]], [states[-1, 1]], color='red', s=100, marker='x', label='End') ax4.set_xlabel('Position') ax4.set_ylabel('Velocity') ax4.set_title('Phase Portrait') ax4.legend() ax4.grid(True, alpha=0.3) # QP visualization ax5 = plt.subplot(3, 2, 5) # Show how control saturates time_saturated = np.sum(np.abs(controls) >= 0.99) / len(controls) * 100 labels = ['Saturated', 'Unsaturated'] sizes = [time_saturated, 100 - time_saturated] colors = ['red', 'blue'] ax5.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%') ax5.set_title('Control Saturation') # Metrics text ax6 = plt.subplot(3, 2, 6) ax6.axis('off') metrics_text = f"""Performance Metrics: Final Position Error: {abs(states[-1, 0]):.4f} Final Velocity: {states[-1, 1]:.4f} Control Effort (L1): {np.sum(np.abs(controls)):.2f} Control Effort (L2): {np.sqrt(np.sum(controls**2)):.2f} Settling Time: ~{len(states) * dt:.1f}s Max Overshoot: {np.max(np.abs(states[:, 0])):.2f} """ ax6.text(0.1, 0.5, metrics_text, fontsize=12, verticalalignment='center', fontfamily='monospace', bbox=dict(boxstyle="round,pad=0.5", facecolor="lightgray")) plt.suptitle('QP-RNN Control Simulation Results', fontsize=16) plt.tight_layout() return fig def run_qp_rnn_demo(position_gain, velocity_gain, control_cost, initial_position, initial_velocity, target_position, simulation_time): """Main function for Gradio interface.""" # Run simulation states, controls, dt = simulate_system( position_gain, velocity_gain, control_cost, initial_position, initial_velocity, target_position, simulation_time ) # Create plots fig = create_plots(states, controls, dt) # Create description description = f""" ### QP-RNN Control Results The QP-RNN controller solves the following optimization problem at each time step: ``` min 0.5 * u² * {control_cost} + u * (K @ error) s.t. -1 ≤ u ≤ 1 ``` Where K = [{position_gain}, {velocity_gain}] are the feedback gains. **Final State:** Position = {states[-1, 0]:.3f}, Velocity = {states[-1, 1]:.3f} **Key Features:** - Guaranteed constraint satisfaction (control always in [-1, 1]) - Interpretable structure (quadratic cost + linear feedback) - Can be trained via RL for complex tasks """ return fig, description # Create Gradio interface iface = gr.Interface( fn=run_qp_rnn_demo, inputs=[ gr.Slider(0.1, 10.0, value=3.0, label="Position Gain (Kp)", info="Higher values = faster position correction"), gr.Slider(0.1, 5.0, value=1.5, label="Velocity Gain (Kv)", info="Higher values = more damping"), gr.Slider(0.1, 50.0, value=10.0, label="Control Cost", info="Higher values = less aggressive control"), gr.Slider(-5.0, 5.0, value=2.0, label="Initial Position"), gr.Slider(-2.0, 2.0, value=0.0, label="Initial Velocity"), gr.Slider(-3.0, 3.0, value=0.0, label="Target Position"), gr.Slider(1.0, 10.0, value=5.0, label="Simulation Time (s)") ], outputs=[ gr.Plot(label="Simulation Results"), gr.Markdown(label="Analysis") ], title="QP-RNN: Quadratic Programming Recurrent Neural Network Demo", description=""" This interactive demo shows how QP-RNN controllers work for a simple double integrator system. **What is QP-RNN?** - Combines Model Predictive Control structure with Deep Reinforcement Learning - Learns to solve a parameterized Quadratic Program (QP) to generate control actions - Provides theoretical guarantees (constraint satisfaction, stability verification) **Try adjusting the parameters** to see how they affect control performance! Paper: [MPC-Inspired Reinforcement Learning for Verifiable Model-Free Control](https://arxiv.org/abs/2312.05332) """, examples=[ [3.0, 1.5, 10.0, 2.0, 0.0, 0.0, 5.0], # Default [5.0, 2.0, 5.0, 2.0, 0.0, 0.0, 5.0], # Aggressive [1.0, 0.5, 20.0, 2.0, 0.0, 0.0, 5.0], # Conservative [3.0, 0.1, 10.0, 2.0, 0.0, 0.0, 5.0], # Underdamped [3.0, 3.0, 10.0, 2.0, 0.0, 0.0, 5.0], # Overdamped ], cache_examples=True ) if __name__ == "__main__": iface.launch()