Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 91fa46f

Browse files
author
Shunichi09
authored
Merge pull request #4 from Shunichi09/develop
Develop
2 parents e716272 + f741ec6 commit 91fa46f

32 files changed

+1368
-32
lines changed

‎Environments.md‎

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Enviroments
2+
3+
| Name | Linear | Nonlinear | State Size | Input size |
4+
|:----------|:---------------:|:----------------:|:----------------:|:----------------:|
5+
| First Order Lag System || x | 4 | 2 |
6+
| Two wheeled System (Constant Goal) | x || 3 | 2 |
7+
| Two wheeled System (Moving Goal) (Coming soon) | x || 3 | 2 |
8+
| Cartpole (Swing up) | x || 4 | 1 |
9+
10+
## FistOrderLagEnv
11+
12+
### System equation.
13+
14+
<img src="assets/firstorderlag.png" width="550">
15+
16+
You can set arbinatry time constant, tau. The default is 0.63 s
17+
18+
### Cost.
19+
20+
<img src="assets/quadratic_score.png" width="300">
21+
22+
Q = diag[1., 1., 1., 1.],
23+
R = diag[1., 1.]
24+
25+
X_g denote the goal states.
26+
27+
## TwoWheeledEnv
28+
29+
### System equation.
30+
31+
<img src="assets/twowheeled.png" width="300">
32+
33+
### Cost.
34+
35+
<img src="assets/quadratic_score.png" width="300">
36+
37+
Q = diag[5., 5., 1.],
38+
R = diag[0.1, 0.1]
39+
40+
X_g denote the goal states.
41+
42+
## CatpoleEnv (Swing up)
43+
44+
System equation.
45+
46+
<img src="assets/cartpole.png" width="600">
47+
48+
You can set arbinatry parameters, mc, mp, l and g.
49+
50+
Default settings are as follows:
51+
52+
mc = 1, mp = 0.2, l = 0.5, g = 9.81
53+
54+
### Cost.
55+
56+
<img src="assets/cartpole_score.png" width="300">
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1 @@
11
import numpy as np
2-
Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
import numpy as np
2+
3+
class CartPoleConfigModule():
4+
# parameters
5+
ENV_NAME = "CartPole-v0"
6+
TYPE = "Nonlinear"
7+
TASK_HORIZON = 500
8+
PRED_LEN = 50
9+
STATE_SIZE = 4
10+
INPUT_SIZE = 1
11+
DT = 0.02
12+
# cost parameters
13+
R = np.diag([0.01])
14+
# bounds
15+
INPUT_LOWER_BOUND = np.array([-3.])
16+
INPUT_UPPER_BOUND = np.array([3.])
17+
# parameters
18+
MP = 0.2
19+
MC = 1.
20+
L = 0.5
21+
G = 9.81
22+
23+
def __init__(self):
24+
"""
25+
"""
26+
# opt configs
27+
self.opt_config = {
28+
"Random": {
29+
"popsize": 5000
30+
},
31+
"CEM": {
32+
"popsize": 500,
33+
"num_elites": 50,
34+
"max_iters": 15,
35+
"alpha": 0.3,
36+
"init_var":9.,
37+
"threshold":0.001
38+
},
39+
"MPPI":{
40+
"beta" : 0.6,
41+
"popsize": 5000,
42+
"kappa": 0.9,
43+
"noise_sigma": 0.5,
44+
},
45+
"MPPIWilliams":{
46+
"popsize": 5000,
47+
"lambda": 1.,
48+
"noise_sigma": 0.9,
49+
},
50+
"iLQR":{
51+
"max_iter": 500,
52+
"init_mu": 1.,
53+
"mu_min": 1e-6,
54+
"mu_max": 1e10,
55+
"init_delta": 2.,
56+
"threshold": 1e-6,
57+
},
58+
"DDP":{
59+
"max_iter": 500,
60+
"init_mu": 1.,
61+
"mu_min": 1e-6,
62+
"mu_max": 1e10,
63+
"init_delta": 2.,
64+
"threshold": 1e-6,
65+
},
66+
"NMPC-CGMRES":{
67+
},
68+
"NMPC-Newton":{
69+
},
70+
}
71+
72+
@staticmethod
73+
def input_cost_fn(u):
74+
""" input cost functions
75+
Args:
76+
u (numpy.ndarray): input, shape(pred_len, input_size)
77+
or shape(pop_size, pred_len, input_size)
78+
Returns:
79+
cost (numpy.ndarray): cost of input, shape(pred_len, input_size) or
80+
shape(pop_size, pred_len, input_size)
81+
"""
82+
return (u**2) * np.diag(CartPoleConfigModule.R)
83+
84+
@staticmethod
85+
def state_cost_fn(x, g_x):
86+
""" state cost function
87+
Args:
88+
x (numpy.ndarray): state, shape(pred_len, state_size)
89+
or shape(pop_size, pred_len, state_size)
90+
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
91+
or shape(pop_size, pred_len, state_size)
92+
Returns:
93+
cost (numpy.ndarray): cost of state, shape(pred_len, 1) or
94+
shape(pop_size, pred_len, 1)
95+
"""
96+
97+
if len(x.shape) > 2:
98+
return (6. * (x[:, :, 0]**2) \
99+
+ 12. * ((np.cos(x[:, :, 2]) + 1.)**2) \
100+
+ 0.1 * (x[:, :, 1]**2) \
101+
+ 0.1 * (x[:, :, 3]**2))[:, :, np.newaxis]
102+
103+
elif len(x.shape) > 1:
104+
return (6. * (x[:, 0]**2) \
105+
+ 12. * ((np.cos(x[:, 2]) + 1.)**2) \
106+
+ 0.1 * (x[:, 1]**2) \
107+
+ 0.1 * (x[:, 3]**2))[:, np.newaxis]
108+
109+
return 6. * (x[0]**2) \
110+
+ 12. * ((np.cos(x[2]) + 1.)**2) \
111+
+ 0.1 * (x[1]**2) \
112+
+ 0.1 * (x[3]**2)
113+
114+
@staticmethod
115+
def terminal_state_cost_fn(terminal_x, terminal_g_x):
116+
"""
117+
Args:
118+
terminal_x (numpy.ndarray): terminal state,
119+
shape(state_size, ) or shape(pop_size, state_size)
120+
terminal_g_x (numpy.ndarray): terminal goal state,
121+
shape(state_size, ) or shape(pop_size, state_size)
122+
Returns:
123+
cost (numpy.ndarray): cost of state, shape(pred_len, ) or
124+
shape(pop_size, pred_len)
125+
"""
126+
127+
if len(terminal_x.shape) > 1:
128+
return (6. * (terminal_x[:, 0]**2) \
129+
+ 12. * ((np.cos(terminal_x[:, 2]) + 1.)**2) \
130+
+ 0.1 * (terminal_x[:, 1]**2) \
131+
+ 0.1 * (terminal_x[:, 3]**2))[:, np.newaxis]
132+
133+
return 6. * (terminal_x[0]**2) \
134+
+ 12. * ((np.cos(terminal_x[2]) + 1.)**2) \
135+
+ 0.1 * (terminal_x[1]**2) \
136+
+ 0.1 * (terminal_x[3]**2)
137+
138+
@staticmethod
139+
def gradient_cost_fn_with_state(x, g_x, terminal=False):
140+
""" gradient of costs with respect to the state
141+
142+
Args:
143+
x (numpy.ndarray): state, shape(pred_len, state_size)
144+
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
145+
146+
Returns:
147+
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
148+
or shape(1, state_size)
149+
"""
150+
if not terminal:
151+
return None
152+
153+
return None
154+
155+
@staticmethod
156+
def gradient_cost_fn_with_input(x, u):
157+
""" gradient of costs with respect to the input
158+
159+
Args:
160+
x (numpy.ndarray): state, shape(pred_len, state_size)
161+
u (numpy.ndarray): goal state, shape(pred_len, input_size)
162+
163+
Returns:
164+
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
165+
"""
166+
return None
167+
168+
@staticmethod
169+
def hessian_cost_fn_with_state(x, g_x, terminal=False):
170+
""" hessian costs with respect to the state
171+
172+
Args:
173+
x (numpy.ndarray): state, shape(pred_len, state_size)
174+
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
175+
176+
Returns:
177+
l_xx (numpy.ndarray): gradient of cost,
178+
shape(pred_len, state_size, state_size) or
179+
shape(1, state_size, state_size) or
180+
"""
181+
if not terminal:
182+
(pred_len, _) = x.shape
183+
return None
184+
185+
return None
186+
187+
@staticmethod
188+
def hessian_cost_fn_with_input(x, u):
189+
""" hessian costs with respect to the input
190+
191+
Args:
192+
x (numpy.ndarray): state, shape(pred_len, state_size)
193+
u (numpy.ndarray): goal state, shape(pred_len, input_size)
194+
195+
Returns:
196+
l_uu (numpy.ndarray): gradient of cost,
197+
shape(pred_len, input_size, input_size)
198+
"""
199+
(pred_len, _) = u.shape
200+
201+
return None
202+
203+
@staticmethod
204+
def hessian_cost_fn_with_input_state(x, u):
205+
""" hessian costs with respect to the state and input
206+
207+
Args:
208+
x (numpy.ndarray): state, shape(pred_len, state_size)
209+
u (numpy.ndarray): goal state, shape(pred_len, input_size)
210+
211+
Returns:
212+
l_ux (numpy.ndarray): gradient of cost ,
213+
shape(pred_len, input_size, state_size)
214+
"""
215+
(_, state_size) = x.shape
216+
(pred_len, input_size) = u.shape
217+
218+
return np.zeros((pred_len, input_size, state_size))

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /