Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 4e01264

Browse files
author
Shunichi09
committed
Add: Environments.md and MPPIWilliamns
1 parent a36a8bc commit 4e01264

File tree

20 files changed

+669
-30
lines changed

20 files changed

+669
-30
lines changed

‎Environments.md‎

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Enviroments
2+
3+
| Name | Linear | Nonlinear | State Size | Input size |
4+
|:----------|:---------------:|:----------------:|:----------------:|:----------------:|
5+
| First Order Lag System || x | 4 | 2 |
6+
| Two wheeled System (Constant Goal) | x || 3 | 2 |
7+
| Two wheeled System (Moving Goal) (Coming soon) | x || 3 | 2 |
8+
| Cartpole (Swing up) | x || 4 | 1 |
9+
10+
## FistOrderLagEnv
11+
12+
System equations.
13+
14+
<img src="assets/firstorderlag.png" width="550">
15+
16+
You can set arbinatry time constant, tau. The default is 0.63 s
17+
18+
## TwoWheeledEnv
19+
20+
System equations.
21+
22+
<img src="assets/twowheeled.png" width="300">
23+
24+
## CatpoleEnv (Swing up)
25+
26+
System equations.
27+
28+
<img src="assets/cartpole.png" width="600">
29+
30+
You can set arbinatry parameters, mc, mp, l and g.
31+
32+
Default settings are as follows:
33+
34+
mc = 1, mp = 0.2, l = 0.5, g = 9.8
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1 @@
11
import numpy as np
2-

‎PythonLinearNonlinearControl/configs/first_order_lag.py‎

Lines changed: 113 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ class FirstOrderLagConfigModule():
55
ENV_NAME = "FirstOrderLag-v0"
66
TYPE = "Linear"
77
TASK_HORIZON = 1000
8-
PRED_LEN = 10
8+
PRED_LEN = 50
99
STATE_SIZE = 4
1010
INPUT_SIZE = 2
1111
DT = 0.05
@@ -43,8 +43,33 @@ def __init__(self):
4343
"kappa": 0.9,
4444
"noise_sigma": 0.5,
4545
},
46+
"MPPIWilliams":{
47+
"popsize": 5000,
48+
"lambda": 1.,
49+
"noise_sigma": 0.9,
50+
},
4651
"MPC":{
47-
}
52+
},
53+
"iLQR":{
54+
"max_iter": 500,
55+
"init_mu": 1.,
56+
"mu_min": 1e-6,
57+
"mu_max": 1e10,
58+
"init_delta": 2.,
59+
"threshold": 1e-6,
60+
},
61+
"DDP":{
62+
"max_iter": 500,
63+
"init_mu": 1.,
64+
"mu_min": 1e-6,
65+
"mu_max": 1e10,
66+
"init_delta": 2.,
67+
"threshold": 1e-6,
68+
},
69+
"NMPC-CGMRES":{
70+
},
71+
"NMPC-Newton":{
72+
},
4873
}
4974

5075
@staticmethod
@@ -86,4 +111,89 @@ def terminal_state_cost_fn(terminal_x, terminal_g_x):
86111
shape(pop_size, pred_len)
87112
"""
88113
return ((terminal_x - terminal_g_x)**2) \
89-
* np.diag(FirstOrderLagConfigModule.Sf)
114+
* np.diag(FirstOrderLagConfigModule.Sf)
115+
116+
@staticmethod
117+
def gradient_cost_fn_with_state(x, g_x, terminal=False):
118+
""" gradient of costs with respect to the state
119+
120+
Args:
121+
x (numpy.ndarray): state, shape(pred_len, state_size)
122+
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
123+
124+
Returns:
125+
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
126+
or shape(1, state_size)
127+
"""
128+
if not terminal:
129+
return 2. * (x - g_x) * np.diag(FirstOrderLagConfigModule.Q)
130+
131+
return (2. * (x - g_x) \
132+
* np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :]
133+
134+
@staticmethod
135+
def gradient_cost_fn_with_input(x, u):
136+
""" gradient of costs with respect to the input
137+
138+
Args:
139+
x (numpy.ndarray): state, shape(pred_len, state_size)
140+
u (numpy.ndarray): goal state, shape(pred_len, input_size)
141+
142+
Returns:
143+
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
144+
"""
145+
return 2. * u * np.diag(FirstOrderLagConfigModule.R)
146+
147+
@staticmethod
148+
def hessian_cost_fn_with_state(x, g_x, terminal=False):
149+
""" hessian costs with respect to the state
150+
151+
Args:
152+
x (numpy.ndarray): state, shape(pred_len, state_size)
153+
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
154+
155+
Returns:
156+
l_xx (numpy.ndarray): gradient of cost,
157+
shape(pred_len, state_size, state_size) or
158+
shape(1, state_size, state_size) or
159+
"""
160+
if not terminal:
161+
(pred_len, _) = x.shape
162+
return -g_x[:, :, np.newaxis] \
163+
* np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1))
164+
165+
return -g_x[:, np.newaxis] \
166+
* np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))
167+
168+
@staticmethod
169+
def hessian_cost_fn_with_input(x, u):
170+
""" hessian costs with respect to the input
171+
172+
Args:
173+
x (numpy.ndarray): state, shape(pred_len, state_size)
174+
u (numpy.ndarray): goal state, shape(pred_len, input_size)
175+
176+
Returns:
177+
l_uu (numpy.ndarray): gradient of cost,
178+
shape(pred_len, input_size, input_size)
179+
"""
180+
(pred_len, _) = u.shape
181+
182+
return np.tile(2.*FirstOrderLagConfigModule.R, (pred_len, 1, 1))
183+
184+
@staticmethod
185+
def hessian_cost_fn_with_input_state(x, u):
186+
""" hessian costs with respect to the state and input
187+
188+
Args:
189+
x (numpy.ndarray): state, shape(pred_len, state_size)
190+
u (numpy.ndarray): goal state, shape(pred_len, input_size)
191+
192+
Returns:
193+
l_ux (numpy.ndarray): gradient of cost ,
194+
shape(pred_len, input_size, state_size)
195+
"""
196+
(_, state_size) = x.shape
197+
(pred_len, input_size) = u.shape
198+
199+
return np.zeros((pred_len, input_size, state_size))

‎PythonLinearNonlinearControl/configs/two_wheeled.py‎

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ def __init__(self):
3939
"kappa": 0.9,
4040
"noise_sigma": 0.5,
4141
},
42+
"MPPIWilliams":{
43+
"popsize": 5000,
44+
"lambda": 1,
45+
"noise_sigma": 1.,
46+
},
4247
"iLQR":{
4348
"max_iter": 500,
4449
"init_mu": 1.,

‎PythonLinearNonlinearControl/controllers/ddp.py‎

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,6 @@ def __init__(self, config, model):
2323
"""
2424
super(DDP, self).__init__(config, model)
2525

26-
if config.TYPE != "Nonlinear":
27-
raise ValueError("{} could be not applied to \
28-
this controller".format(model))
29-
3026
# model
3127
self.model = model
3228

@@ -296,6 +292,7 @@ def _calc_gradient_hessian_cost(self, pred_xs, g_x, sol):
296292

297293
def backward(self, f_x, f_u, f_xx, f_ux, f_uu, l_x, l_xx, l_u, l_uu, l_ux):
298294
""" backward step of iLQR
295+
299296
Args:
300297
f_x (numpy.ndarray): gradient of model with respecto to state,
301298
shape(pred_len+1, state_size, state_size)
@@ -317,7 +314,6 @@ def backward(self, f_x, f_u, f_xx, f_ux, f_uu, l_x, l_xx, l_u, l_uu, l_ux):
317314
shape(pred_len, input_size, input_size)
318315
l_ux (numpy.ndarray): hessian of cost with respect
319316
to state and input, shape(pred_len, input_size, state_size)
320-
321317
Returns:
322318
k (numpy.ndarray): gain, shape(pred_len, input_size)
323319
K (numpy.ndarray): gain, shape(pred_len, input_size, state_size)
@@ -353,7 +349,8 @@ def backward(self, f_x, f_u, f_xx, f_ux, f_uu, l_x, l_xx, l_u, l_uu, l_ux):
353349

354350
def _Q(self, f_x, f_u, f_xx, f_ux, f_uu,
355351
l_x, l_u, l_xx, l_ux, l_uu, V_x, V_xx):
356-
"""Computes second order expansion.
352+
""" compute Q function valued
353+
357354
Args:
358355
f_x (numpy.ndarray): gradient of model with respecto to state,
359356
shape(state_size, state_size)

‎PythonLinearNonlinearControl/controllers/ilqr.py‎

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@ def __init__(self, config, model):
2121
"""
2222
super(iLQR, self).__init__(config, model)
2323

24-
if config.TYPE != "Nonlinear":
25-
raise ValueError("{} could be not applied to \
26-
this controller".format(model))
27-
2824
# model
2925
self.model = model
3026

‎PythonLinearNonlinearControl/controllers/make_controllers.py‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from .cem import CEM
33
from .random import RandomShooting
44
from .mppi import MPPI
5+
from .mppi_williams import MPPIWilliams
56
from .ilqr import iLQR
67
from .ddp import DDP
78

@@ -15,6 +16,8 @@ def make_controller(args, config, model):
1516
return RandomShooting(config, model)
1617
elif args.controller_type == "MPPI":
1718
return MPPI(config, model)
19+
elif args.controller_type == "MPPIWilliams":
20+
return MPPIWilliams(config, model)
1821
elif args.controller_type == "iLQR":
1922
return iLQR(config, model)
2023
elif args.controller_type == "DDP":
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
from logging import getLogger
2+
3+
import numpy as np
4+
import scipy.stats as stats
5+
6+
from .controller import Controller
7+
from ..envs.cost import calc_cost
8+
9+
logger = getLogger(__name__)
10+
11+
class MPPIWilliams(Controller):
12+
""" Model Predictive Path Integral for linear and nonlinear method
13+
14+
Attributes:
15+
history_u (list[numpy.ndarray]): time history of optimal input
16+
Ref:
17+
G. Williams et al., "Information theoretic MPC
18+
for model-based reinforcement learning,"
19+
2017 IEEE International Conference on Robotics and Automation (ICRA),
20+
Singapore, 2017, pp. 1714-1721.
21+
"""
22+
def __init__(self, config, model):
23+
super(MPPIWilliams, self).__init__(config, model)
24+
25+
# model
26+
self.model = model
27+
28+
# general parameters
29+
self.pred_len = config.PRED_LEN
30+
self.input_size = config.INPUT_SIZE
31+
32+
# mppi parameters
33+
self.pop_size = config.opt_config["MPPIWilliams"]["popsize"]
34+
self.lam = config.opt_config["MPPIWilliams"]["lambda"]
35+
self.noise_sigma = config.opt_config["MPPIWilliams"]["noise_sigma"]
36+
self.opt_dim = self.input_size * self.pred_len
37+
38+
# get bound
39+
self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND,
40+
(self.pred_len, 1))
41+
self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND,
42+
(self.pred_len, 1))
43+
44+
# get cost func
45+
self.state_cost_fn = config.state_cost_fn
46+
self.terminal_state_cost_fn = config.terminal_state_cost_fn
47+
self.input_cost_fn = config.input_cost_fn
48+
49+
# init mean
50+
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \
51+
+ config.INPUT_LOWER_BOUND) / 2.,
52+
self.pred_len)
53+
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
54+
55+
# save
56+
self.history_u = [np.zeros(self.input_size)]
57+
58+
def clear_sol(self):
59+
""" clear prev sol
60+
"""
61+
logger.debug("Clear Solution")
62+
self.prev_sol = \
63+
(self.input_upper_bounds + self.input_lower_bounds) / 2.
64+
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
65+
66+
def calc_cost(self, curr_x, samples, g_xs):
67+
""" calculate the cost of input samples by using MPPI's eq
68+
69+
Args:
70+
curr_x (numpy.ndarray): shape(state_size),
71+
current robot position
72+
samples (numpy.ndarray): shape(pop_size, opt_dim),
73+
input samples
74+
g_xs (numpy.ndarray): shape(pred_len, state_size),
75+
goal states
76+
Returns:
77+
costs (numpy.ndarray): shape(pop_size, )
78+
"""
79+
# get size
80+
pop_size = samples.shape[0]
81+
g_xs = np.tile(g_xs, (pop_size, 1, 1))
82+
83+
# calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size)
84+
pred_xs = self.model.predict_traj(curr_x, samples)
85+
86+
# get particle cost
87+
costs = calc_cost(pred_xs, samples, g_xs,
88+
self.state_cost_fn, None, \
89+
self.terminal_state_cost_fn)
90+
91+
return costs
92+
93+
def obtain_sol(self, curr_x, g_xs):
94+
""" calculate the optimal inputs
95+
96+
Args:
97+
curr_x (numpy.ndarray): current state, shape(state_size, )
98+
g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size)
99+
Returns:
100+
opt_input (numpy.ndarray): optimal input, shape(input_size, )
101+
"""
102+
# get noised inputs
103+
noise = np.random.normal(
104+
loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
105+
self.input_size)) * self.noise_sigma
106+
107+
noised_inputs = self.prev_sol + noise
108+
109+
# clip actions
110+
noised_inputs = np.clip(
111+
noised_inputs, self.input_lower_bounds, self.input_upper_bounds)
112+
113+
# calc cost
114+
costs = self.calc_cost(curr_x, noised_inputs, g_xs)
115+
116+
costs += np.sum(np.sum(
117+
self.lam * self.prev_sol * noise / self.noise_sigma,
118+
axis=-1), axis=-1)
119+
120+
# mppi update
121+
beta = np.min(costs)
122+
eta = np.sum(np.exp(- 1. / self.lam * (costs - beta)), axis=0) \
123+
+ 1e-10
124+
125+
# weight
126+
# eta.shape = (pred_len, input_size)
127+
weights = np.exp(- 1. / self.lam * (costs - beta)) / eta
128+
129+
# update inputs
130+
sol = self.prev_sol \
131+
+ np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0)
132+
133+
# update
134+
self.prev_sol[:-1] = sol[1:]
135+
self.prev_sol[-1] = sol[-1] # last use the terminal input
136+
137+
# log
138+
self.history_u.append(sol[0])
139+
140+
return sol[0]
141+
142+
def __str__(self):
143+
return "MPPIWilliams"

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /