Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 6f47ed1

Browse files
Added all projects and lecture notes
1 parent 9d73134 commit 6f47ed1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+5622
-0
lines changed

‎Assignments/A2C-Pong/a2c_agent.py

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
from keras.layers import Dense, Input, Conv2D, Flatten
2+
from keras.models import Model, load_model
3+
from keras.optimizers import RMSprop, Adam
4+
from keras.losses import Huber
5+
import numpy as np
6+
import gym
7+
from collections import deque
8+
import matplotlib.pyplot as plt
9+
import keras.backend.tensorflow_backend as tfback
10+
import keras.backend as K
11+
import tensorflow as tf
12+
import time
13+
14+
# def _get_available_gpus():
15+
# """Get a list of available gpu devices (formatted as strings).
16+
17+
# # Returns
18+
# A list of available GPU devices.
19+
# """
20+
# #global _LOCAL_DEVICES
21+
# if tfback._LOCAL_DEVICES is None:
22+
# devices = tf.config.list_logical_devices()
23+
# tfback._LOCAL_DEVICES = [x.name for x in devices]
24+
# return [x for x in tfback._LOCAL_DEVICES if 'device:gpu' in x.lower()]
25+
26+
# tfback._get_available_gpus = _get_available_gpus
27+
28+
def get_actor_model(input_shape, num_actions, learning_rate):
29+
X_inp = Input(shape = input_shape)
30+
advantages = Input(shape = [1])
31+
# X = Conv2D(32, 8, strides=(4,4), data_format = 'channels_first',
32+
# activation = 'relu')(X_inp)
33+
# X = Conv2D(16, 4, strides=(2,2), data_format = 'channels_first',
34+
# activation = 'relu')(X)
35+
X = Flatten(input_shape=input_shape)(X_inp)
36+
X = Dense(512, activation="relu", kernel_initializer='he_uniform')(X)
37+
X = Dense(num_actions, activation = 'softmax')(X)
38+
39+
def pg_loss(y_true, y_pred):
40+
clipped_y_pred = K.clip(y_pred, 1e-8, 1 - 1e-8)
41+
log_liklihood = y_true * K.log(clipped_y_pred)
42+
loss = K.sum(-log_liklihood * advantages)
43+
return loss
44+
45+
model = Model(inputs = [X_inp, advantages], outputs = X)
46+
model.compile(optimizer = Adam(learning_rate = learning_rate), loss = pg_loss)
47+
48+
prediction = Model(inputs=X_inp, outputs = X)
49+
50+
return model, prediction
51+
52+
def get_critic_model(input_shape, learning_rate):
53+
X_inp = Input(shape = input_shape)
54+
# X = Conv2D(32, 8, strides=(4,4), data_format = 'channels_first',
55+
# activation = 'relu')(X_inp)
56+
# X = Conv2D(16, 4, strides=(2,2), data_format = 'channels_first',
57+
# activation = 'relu')(X)
58+
X = Flatten(input_shape=input_shape)(X_inp)
59+
X = Dense(512, activation="relu", kernel_initializer='he_uniform')(X)
60+
X = Dense(1, activation = 'linear')(X)
61+
62+
model = Model(inputs = X_inp, outputs = X)
63+
model.compile(optimizer = Adam(learning_rate = learning_rate), loss = Huber(delta = 1.5))
64+
65+
return model
66+
67+
68+
class A2CAgent(object):
69+
def __init__(self, env, train_flag = True, num_episodes = 20000, actor_learning_rate = 0.00025,
70+
critic_learning_rate = 0.00025, gamma = 0.99, model_path = None, num_checkpoints = 10):
71+
self.env = env
72+
self.actor_learning_rate = actor_learning_rate
73+
self.critic_learning_rate = critic_learning_rate
74+
self.gamma = gamma
75+
self.num_episodes = num_episodes
76+
self.num_checkpoints = num_checkpoints
77+
78+
self.LEFT_ACTION = 2
79+
self.RIGHT_ACTION = 3
80+
self.action_space = [self.LEFT_ACTION, self.RIGHT_ACTION]
81+
82+
self.num_actions = len(self.action_space)
83+
# self.num_actions = self.env.n_action
84+
self.model_path = model_path
85+
86+
if(train_flag):
87+
self.actor_model, self.prediction = get_actor_model(self.env.observation_shape, self.num_actions, self.actor_learning_rate)
88+
self.critic_model = get_critic_model(self.env.observation_shape, self.critic_learning_rate)
89+
else:
90+
assert model_path!=None, "Please pass path model_path"
91+
self.prediction = load_model(model_path)
92+
93+
def get_discounted_rewards(self, reward, gamma):
94+
running_add = 0
95+
discounted_r = np.zeros_like(reward)
96+
for i in reversed(range(0,len(reward))):
97+
if reward[i] != 0:
98+
running_add = 0
99+
running_add = running_add * gamma + reward[i]
100+
discounted_r[i] = running_add
101+
102+
# Normalizing the discounted rewards
103+
discounted_r -= np.mean(discounted_r)
104+
discounted_r /= np.std(discounted_r)
105+
return discounted_r
106+
107+
def train(self, render = False):
108+
all_episode_scores = []
109+
best_score = float('-inf')
110+
for episode in range(self.num_episodes):
111+
states = []
112+
actions = []
113+
rewards = []
114+
state = self.env.reset()
115+
episode_score = 0
116+
t = 0
117+
while(True):
118+
if(render):
119+
self.env.render()
120+
action_probabilities = self.prediction.predict(state)[0]
121+
action = np.random.choice(range(self.num_actions), p = action_probabilities)
122+
next_state, reward, done, info = self.env.step(self.action_space[action])
123+
states.append(state)
124+
ohe_action = np.zeros((self.num_actions), dtype = np.float64)
125+
ohe_action[action] = 1
126+
actions.append(ohe_action)
127+
rewards.append(reward)
128+
129+
state = next_state
130+
episode_score = episode_score + reward
131+
t = t + 1
132+
if(done or t>10000):
133+
all_episode_scores.append(episode_score)
134+
print("Episode {}/{} | Episode score : {} ({:.4})".format(episode+1, self.num_episodes, episode_score, np.mean(all_episode_scores[-50:])))
135+
if( np.mean(all_episode_scores[-50:]) > best_score):
136+
best_score = np.mean(all_episode_scores[-50:])
137+
self.prediction.save(self.model_path)
138+
print('Model Saved!')
139+
break
140+
states_batch = np.vstack(states)
141+
actions_batch = np.vstack(actions)
142+
discounted_rewards = self.get_discounted_rewards(rewards, self.gamma)
143+
values = self.critic_model.predict(states_batch)[:, 0]
144+
advantages = discounted_rewards - values
145+
self.actor_model.train_on_batch([states_batch, advantages], actions_batch)
146+
self.critic_model.train_on_batch(states_batch, discounted_rewards)
147+
self.env.close()
148+
if(self.num_checkpoints != 0 and (episode % (self.num_episodes/self.num_checkpoints)) == 0):
149+
self.prediction.save('./saved_models/a2c-{:06d}.model'.format(episode))
150+
151+
def test(self, render = True):
152+
for episode in range(self.num_episodes):
153+
state = self.env.reset()
154+
episode_score = 0
155+
while(True):
156+
if(render):
157+
self.env.render()
158+
time.sleep(0.001)
159+
action_probabilities = self.prediction.predict(state)[0]
160+
action = np.argmax(action_probabilities)
161+
next_state, reward, done, info = self.env.step(self.action_space[action])
162+
state = next_state
163+
episode_score = episode_score + reward
164+
if(done):
165+
print("Episode {}/{} | Episode score : {}".format(episode+1, self.num_episodes, episode_score))
166+
break
167+
self.env.close()
168+
169+
170+

‎Assignments/A2C-Pong/main.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from a2c_agent import A2CAgent
2+
from pong import PongEnv
3+
import argparse
4+
5+
ENV = 'PongDeterministic-v4'
6+
MODEL_PATH = './saved_models/a2c-best_model.model'
7+
TRAIN_EPISODES = 1000
8+
TEST_EPISODES = 10
9+
GAMMA = 0.99
10+
ACTOR_LEARNING_RATE = 0.00025
11+
CRITIC_LEARNING_RATE = 0.00025
12+
NUM_CHECKPOINTS = 10
13+
14+
env = PongEnv(ENV)
15+
16+
def train():
17+
model = A2CAgent(env = env, train_flag = True, num_episodes = TRAIN_EPISODES,
18+
model_path = MODEL_PATH, actor_learning_rate = ACTOR_LEARNING_RATE,
19+
critic_learning_rate = CRITIC_LEARNING_RATE, gamma = GAMMA,
20+
num_checkpoints = NUM_CHECKPOINTS)
21+
model.train(render = False)
22+
23+
24+
def test():
25+
model = A2CAgent(env = env, train_flag = False, num_episodes = TEST_EPISODES,
26+
model_path = MODEL_PATH)
27+
28+
model.test(render = True)
29+
30+
def main():
31+
parser = argparse.ArgumentParser()
32+
parser.add_argument("--mode", type=str, choices = ['train', 'test'], default = 'train', help = 'Train or test mode')
33+
34+
35+
args = parser.parse_args()
36+
37+
if(args.mode == 'test'):
38+
test()
39+
40+
elif(args.mode == 'train'):
41+
train()
42+
43+
44+
45+
if __name__ == "__main__":
46+
main()

‎Assignments/A2C-Pong/pong.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import numpy as np
2+
import gym
3+
import cv2
4+
5+
class PongEnv(object):
6+
def __init__(self, env_name, frame_size = (80,80),
7+
binarize = True, stack_size = 4):
8+
self.env = gym.make(env_name)
9+
self.frame_size = frame_size
10+
self.stack_size = stack_size
11+
self.binarize = binarize
12+
self.frame_stack = np.zeros((self.stack_size, self.frame_size[0], self.frame_size[1]), dtype = np.float64)
13+
self.n_action = self.env.action_space.n
14+
self.observation_shape = self.frame_stack.shape
15+
16+
def step(self, action):
17+
obs, reward, done, info = self.env.step(action)
18+
processed_obs = self.process_obs(obs)
19+
return processed_obs, reward, done, info
20+
21+
def reset(self):
22+
obs = self.env.reset()
23+
for i in range(20):
24+
obs, _, _, _ = self.env.step(0)
25+
self.frame_stack = np.zeros((self.stack_size, self.frame_size[0], self.frame_size[1]))
26+
processed_obs = self.process_obs(obs)
27+
return processed_obs
28+
29+
def render(self):
30+
self.env.render()
31+
32+
def close(self):
33+
self.env.close()
34+
35+
def process_obs(self, obs):
36+
clip_obs = obs[35:195:2,::2,:]
37+
grayscale_obs = cv2.cvtColor(clip_obs, cv2.COLOR_RGB2GRAY)
38+
if(grayscale_obs.shape != self.frame_size):
39+
grayscale_obs = cv2.resize(grayscale_obs, self.frame_size,
40+
interpolation=cv2.INTER_CUBIC)
41+
if(self.binarize):
42+
grayscale_obs[grayscale_obs < 100] = 0
43+
grayscale_obs[grayscale_obs >= 100] = 255.0
44+
45+
grayscale_obs = grayscale_obs.astype(np.float64) / 255.0
46+
self.frame_stack = np.roll(self.frame_stack, shift = 1, axis = 0)
47+
self.frame_stack[0,:,:] = grayscale_obs
48+
return np.expand_dims(self.frame_stack, 0)

‎Assignments/Capstone/__init__.py

Whitespace-only changes.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /