1+ import numpy as np
2+ 3+ class CartPoleConfigModule ():
4+ # parameters
5+ ENV_NAME = "CartPole-v0"
6+ TYPE = "Nonlinear"
7+ TASK_HORIZON = 500
8+ PRED_LEN = 50
9+ STATE_SIZE = 4
10+ INPUT_SIZE = 1
11+ DT = 0.02
12+ # cost parameters
13+ R = np .diag ([0.01 ])
14+ # bounds
15+ INPUT_LOWER_BOUND = np .array ([- 3. ])
16+ INPUT_UPPER_BOUND = np .array ([3. ])
17+ # parameters
18+ MP = 0.2
19+ MC = 1.
20+ L = 0.5
21+ G = 9.81
22+ 23+ def __init__ (self ):
24+ """
25+ """
26+ # opt configs
27+ self .opt_config = {
28+ "Random" : {
29+ "popsize" : 5000
30+ },
31+ "CEM" : {
32+ "popsize" : 500 ,
33+ "num_elites" : 50 ,
34+ "max_iters" : 15 ,
35+ "alpha" : 0.3 ,
36+ "init_var" :9. ,
37+ "threshold" :0.001
38+ },
39+ "MPPI" :{
40+ "beta" : 0.6 ,
41+ "popsize" : 5000 ,
42+ "kappa" : 0.9 ,
43+ "noise_sigma" : 0.5 ,
44+ },
45+ "MPPIWilliams" :{
46+ "popsize" : 5000 ,
47+ "lambda" : 1. ,
48+ "noise_sigma" : 0.9 ,
49+ },
50+ "iLQR" :{
51+ "max_iter" : 500 ,
52+ "init_mu" : 1. ,
53+ "mu_min" : 1e-6 ,
54+ "mu_max" : 1e10 ,
55+ "init_delta" : 2. ,
56+ "threshold" : 1e-6 ,
57+ },
58+ "DDP" :{
59+ "max_iter" : 500 ,
60+ "init_mu" : 1. ,
61+ "mu_min" : 1e-6 ,
62+ "mu_max" : 1e10 ,
63+ "init_delta" : 2. ,
64+ "threshold" : 1e-6 ,
65+ },
66+ "NMPC-CGMRES" :{
67+ },
68+ "NMPC-Newton" :{
69+ },
70+ }
71+ 72+ @staticmethod
73+ def input_cost_fn (u ):
74+ """ input cost functions
75+ Args:
76+ u (numpy.ndarray): input, shape(pred_len, input_size)
77+ or shape(pop_size, pred_len, input_size)
78+ Returns:
79+ cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or
80+ shape(pop_size, pred_len, input_size)
81+ """
82+ return (u ** 2 ) * np .diag (CartPoleConfigModule .R )
83+ 84+ @staticmethod
85+ def state_cost_fn (x , g_x ):
86+ """ state cost function
87+ Args:
88+ x (numpy.ndarray): state, shape(pred_len, state_size)
89+ or shape(pop_size, pred_len, state_size)
90+ g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
91+ or shape(pop_size, pred_len, state_size)
92+ Returns:
93+ cost (numpy.ndarray): cost of state, shape(pred_len, 1) or
94+ shape(pop_size, pred_len, 1)
95+ """
96+ 97+ if len (x .shape ) > 2 :
98+ return (6. * (x [:, :, 0 ]** 2 ) \
99+ + 12. * ((np .cos (x [:, :, 2 ]) + 1. )** 2 ) \
100+ + 0.1 * (x [:, :, 1 ]** 2 ) \
101+ + 0.1 * (x [:, :, 3 ]** 2 ))[:, :, np .newaxis ]
102+ 103+ elif len (x .shape ) > 1 :
104+ return (6. * (x [:, 0 ]** 2 ) \
105+ + 12. * ((np .cos (x [:, 2 ]) + 1. )** 2 ) \
106+ + 0.1 * (x [:, 1 ]** 2 ) \
107+ + 0.1 * (x [:, 3 ]** 2 ))[:, np .newaxis ]
108+ 109+ return 6. * (x [0 ]** 2 ) \
110+ + 12. * ((np .cos (x [2 ]) + 1. )** 2 ) \
111+ + 0.1 * (x [1 ]** 2 ) \
112+ + 0.1 * (x [3 ]** 2 )
113+ 114+ @staticmethod
115+ def terminal_state_cost_fn (terminal_x , terminal_g_x ):
116+ """
117+ Args:
118+ terminal_x (numpy.ndarray): terminal state,
119+ shape(state_size, ) or shape(pop_size, state_size)
120+ terminal_g_x (numpy.ndarray): terminal goal state,
121+ shape(state_size, ) or shape(pop_size, state_size)
122+ Returns:
123+ cost (numpy.ndarray): cost of state, shape(pred_len, ) or
124+ shape(pop_size, pred_len)
125+ """
126+ 127+ if len (terminal_x .shape ) > 1 :
128+ return (6. * (terminal_x [:, 0 ]** 2 ) \
129+ + 12. * ((np .cos (terminal_x [:, 2 ]) + 1. )** 2 ) \
130+ + 0.1 * (terminal_x [:, 1 ]** 2 ) \
131+ + 0.1 * (terminal_x [:, 3 ]** 2 ))[:, np .newaxis ]
132+ 133+ return 6. * (terminal_x [0 ]** 2 ) \
134+ + 12. * ((np .cos (terminal_x [2 ]) + 1. )** 2 ) \
135+ + 0.1 * (terminal_x [1 ]** 2 ) \
136+ + 0.1 * (terminal_x [3 ]** 2 )
137+ 138+ @staticmethod
139+ def gradient_cost_fn_with_state (x , g_x , terminal = False ):
140+ """ gradient of costs with respect to the state
141+
142+ Args:
143+ x (numpy.ndarray): state, shape(pred_len, state_size)
144+ g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
145+
146+ Returns:
147+ l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
148+ or shape(1, state_size)
149+ """
150+ if not terminal :
151+ return None
152+ 153+ return None
154+ 155+ @staticmethod
156+ def gradient_cost_fn_with_input (x , u ):
157+ """ gradient of costs with respect to the input
158+
159+ Args:
160+ x (numpy.ndarray): state, shape(pred_len, state_size)
161+ u (numpy.ndarray): goal state, shape(pred_len, input_size)
162+
163+ Returns:
164+ l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
165+ """
166+ return None
167+ 168+ @staticmethod
169+ def hessian_cost_fn_with_state (x , g_x , terminal = False ):
170+ """ hessian costs with respect to the state
171+
172+ Args:
173+ x (numpy.ndarray): state, shape(pred_len, state_size)
174+ g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
175+
176+ Returns:
177+ l_xx (numpy.ndarray): gradient of cost,
178+ shape(pred_len, state_size, state_size) or
179+ shape(1, state_size, state_size) or
180+ """
181+ if not terminal :
182+ (pred_len , _ ) = x .shape
183+ return None
184+ 185+ return None
186+ 187+ @staticmethod
188+ def hessian_cost_fn_with_input (x , u ):
189+ """ hessian costs with respect to the input
190+
191+ Args:
192+ x (numpy.ndarray): state, shape(pred_len, state_size)
193+ u (numpy.ndarray): goal state, shape(pred_len, input_size)
194+
195+ Returns:
196+ l_uu (numpy.ndarray): gradient of cost,
197+ shape(pred_len, input_size, input_size)
198+ """
199+ (pred_len , _ ) = u .shape
200+ 201+ return None
202+ 203+ @staticmethod
204+ def hessian_cost_fn_with_input_state (x , u ):
205+ """ hessian costs with respect to the state and input
206+
207+ Args:
208+ x (numpy.ndarray): state, shape(pred_len, state_size)
209+ u (numpy.ndarray): goal state, shape(pred_len, input_size)
210+
211+ Returns:
212+ l_ux (numpy.ndarray): gradient of cost ,
213+ shape(pred_len, input_size, state_size)
214+ """
215+ (_ , state_size ) = x .shape
216+ (pred_len , input_size ) = u .shape
217+ 218+ return np .zeros ((pred_len , input_size , state_size ))
0 commit comments