Commit 6f47ed1

committed

Added all projects and lecture notes

1 parent 9d73134 commit 6f47ed1Copy full SHA for 6f47ed1

File tree

45 files changed

+5622

-0

lines changed

Assignments
Lecture Notes

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+5622

-0

lines changed

`‎Assignments/A2C-Pong/a2c_agent.py`

Lines changed: 170 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,170 @@`
	`1`	`+from keras.layers import Dense, Input, Conv2D, Flatten`
	`2`	`+from keras.models import Model, load_model`
	`3`	`+from keras.optimizers import RMSprop, Adam`
	`4`	`+from keras.losses import Huber`
	`5`	`+import numpy as np`
	`6`	`+import gym`
	`7`	`+from collections import deque`
	`8`	`+import matplotlib.pyplot as plt`
	`9`	`+import keras.backend.tensorflow_backend as tfback`
	`10`	`+import keras.backend as K`
	`11`	`+import tensorflow as tf`
	`12`	`+import time`
	`13`	`+`
	`14`	`+# def _get_available_gpus():`
	`15`	`+# """Get a list of available gpu devices (formatted as strings).`
	`16`	`+`
	`17`	`+# # Returns`
	`18`	`+# A list of available GPU devices.`
	`19`	`+# """`
	`20`	`+# #global _LOCAL_DEVICES`
	`21`	`+# if tfback._LOCAL_DEVICES is None:`
	`22`	`+# devices = tf.config.list_logical_devices()`
	`23`	`+# tfback._LOCAL_DEVICES = [x.name for x in devices]`
	`24`	`+# return [x for x in tfback._LOCAL_DEVICES if 'device:gpu' in x.lower()]`
	`25`	`+`
	`26`	`+# tfback._get_available_gpus = _get_available_gpus`
	`27`	`+`
	`28`	`+def get_actor_model(input_shape, num_actions, learning_rate):`
	`29`	`+ X_inp = Input(shape = input_shape)`
	`30`	`+ advantages = Input(shape = [1])`
	`31`	`+ # X = Conv2D(32, 8, strides=(4,4), data_format = 'channels_first',`
	`32`	`+ # activation = 'relu')(X_inp)`
	`33`	`+ # X = Conv2D(16, 4, strides=(2,2), data_format = 'channels_first',`
	`34`	`+ # activation = 'relu')(X)`
	`35`	`+ X = Flatten(input_shape=input_shape)(X_inp)`
	`36`	`+ X = Dense(512, activation="relu", kernel_initializer='he_uniform')(X)`
	`37`	`+ X = Dense(num_actions, activation = 'softmax')(X)`
	`38`	`+`
	`39`	`+ def pg_loss(y_true, y_pred):`
	`40`	`+ clipped_y_pred = K.clip(y_pred, 1e-8, 1 - 1e-8)`
	`41`	`+ log_liklihood = y_true * K.log(clipped_y_pred)`
	`42`	`+ loss = K.sum(-log_liklihood * advantages)`
	`43`	`+ return loss`
	`44`	`+`
	`45`	`+ model = Model(inputs = [X_inp, advantages], outputs = X)`
	`46`	`+ model.compile(optimizer = Adam(learning_rate = learning_rate), loss = pg_loss)`
	`47`	`+`
	`48`	`+ prediction = Model(inputs=X_inp, outputs = X)`
	`49`	`+`
	`50`	`+ return model, prediction`
	`51`	`+`
	`52`	`+def get_critic_model(input_shape, learning_rate):`
	`53`	`+ X_inp = Input(shape = input_shape)`
	`54`	`+ # X = Conv2D(32, 8, strides=(4,4), data_format = 'channels_first',`
	`55`	`+ # activation = 'relu')(X_inp)`
	`56`	`+ # X = Conv2D(16, 4, strides=(2,2), data_format = 'channels_first',`
	`57`	`+ # activation = 'relu')(X)`
	`58`	`+ X = Flatten(input_shape=input_shape)(X_inp)`
	`59`	`+ X = Dense(512, activation="relu", kernel_initializer='he_uniform')(X)`
	`60`	`+ X = Dense(1, activation = 'linear')(X)`
	`61`	`+`
	`62`	`+ model = Model(inputs = X_inp, outputs = X)`
	`63`	`+ model.compile(optimizer = Adam(learning_rate = learning_rate), loss = Huber(delta = 1.5))`
	`64`	`+`
	`65`	`+ return model`
	`66`	`+`
	`67`	`+`
	`68`	`+class A2CAgent(object):`
	`69`	`+ def __init__(self, env, train_flag = True, num_episodes = 20000, actor_learning_rate = 0.00025,`
	`70`	`+ critic_learning_rate = 0.00025, gamma = 0.99, model_path = None, num_checkpoints = 10):`
	`71`	`+ self.env = env`
	`72`	`+ self.actor_learning_rate = actor_learning_rate`
	`73`	`+ self.critic_learning_rate = critic_learning_rate`
	`74`	`+ self.gamma = gamma`
	`75`	`+ self.num_episodes = num_episodes`
	`76`	`+ self.num_checkpoints = num_checkpoints`
	`77`	`+`
	`78`	`+ self.LEFT_ACTION = 2`
	`79`	`+ self.RIGHT_ACTION = 3`
	`80`	`+ self.action_space = [self.LEFT_ACTION, self.RIGHT_ACTION]`
	`81`	`+`
	`82`	`+ self.num_actions = len(self.action_space)`
	`83`	`+ # self.num_actions = self.env.n_action`
	`84`	`+ self.model_path = model_path`
	`85`	`+`
	`86`	`+ if(train_flag):`
	`87`	`+ self.actor_model, self.prediction = get_actor_model(self.env.observation_shape, self.num_actions, self.actor_learning_rate)`
	`88`	`+ self.critic_model = get_critic_model(self.env.observation_shape, self.critic_learning_rate)`
	`89`	`+ else:`
	`90`	`+ assert model_path!=None, "Please pass path model_path"`
	`91`	`+ self.prediction = load_model(model_path)`
	`92`	`+`
	`93`	`+ def get_discounted_rewards(self, reward, gamma):`
	`94`	`+ running_add = 0`
	`95`	`+ discounted_r = np.zeros_like(reward)`
	`96`	`+ for i in reversed(range(0,len(reward))):`
	`97`	`+ if reward[i] != 0:`
	`98`	`+ running_add = 0`
	`99`	`+ running_add = running_add * gamma + reward[i]`
	`100`	`+ discounted_r[i] = running_add`
	`101`	`+`
	`102`	`+ # Normalizing the discounted rewards`
	`103`	`+ discounted_r -= np.mean(discounted_r)`
	`104`	`+ discounted_r /= np.std(discounted_r)`
	`105`	`+ return discounted_r`
	`106`	`+`
	`107`	`+ def train(self, render = False):`
	`108`	`+ all_episode_scores = []`
	`109`	`+ best_score = float('-inf')`
	`110`	`+ for episode in range(self.num_episodes):`
	`111`	`+ states = []`
	`112`	`+ actions = []`
	`113`	`+ rewards = []`
	`114`	`+ state = self.env.reset()`
	`115`	`+ episode_score = 0`
	`116`	`+ t = 0`
	`117`	`+ while(True):`
	`118`	`+ if(render):`
	`119`	`+ self.env.render()`
	`120`	`+ action_probabilities = self.prediction.predict(state)[0]`
	`121`	`+ action = np.random.choice(range(self.num_actions), p = action_probabilities)`
	`122`	`+ next_state, reward, done, info = self.env.step(self.action_space[action])`
	`123`	`+ states.append(state)`
	`124`	`+ ohe_action = np.zeros((self.num_actions), dtype = np.float64)`
	`125`	`+ ohe_action[action] = 1`
	`126`	`+ actions.append(ohe_action)`
	`127`	`+ rewards.append(reward)`
	`128`	`+`
	`129`	`+ state = next_state`
	`130`	`+ episode_score = episode_score + reward`
	`131`	`+ t = t + 1`
	`132`	`+ if(done or t>10000):`
	`133`	`+ all_episode_scores.append(episode_score)`
	`134`	`+ print("Episode {}/{} \| Episode score : {} ({:.4})".format(episode+1, self.num_episodes, episode_score, np.mean(all_episode_scores[-50:])))`
	`135`	`+ if( np.mean(all_episode_scores[-50:]) > best_score):`
	`136`	`+ best_score = np.mean(all_episode_scores[-50:])`
	`137`	`+ self.prediction.save(self.model_path)`
	`138`	`+ print('Model Saved!')`
	`139`	`+ break`
	`140`	`+ states_batch = np.vstack(states)`
	`141`	`+ actions_batch = np.vstack(actions)`
	`142`	`+ discounted_rewards = self.get_discounted_rewards(rewards, self.gamma)`
	`143`	`+ values = self.critic_model.predict(states_batch)[:, 0]`
	`144`	`+ advantages = discounted_rewards - values`
	`145`	`+ self.actor_model.train_on_batch([states_batch, advantages], actions_batch)`
	`146`	`+ self.critic_model.train_on_batch(states_batch, discounted_rewards)`
	`147`	`+ self.env.close()`
	`148`	`+ if(self.num_checkpoints != 0 and (episode % (self.num_episodes/self.num_checkpoints)) == 0):`
	`149`	`+ self.prediction.save('./saved_models/a2c-{:06d}.model'.format(episode))`
	`150`	`+`
	`151`	`+ def test(self, render = True):`
	`152`	`+ for episode in range(self.num_episodes):`
	`153`	`+ state = self.env.reset()`
	`154`	`+ episode_score = 0`
	`155`	`+ while(True):`
	`156`	`+ if(render):`
	`157`	`+ self.env.render()`
	`158`	`+ time.sleep(0.001)`
	`159`	`+ action_probabilities = self.prediction.predict(state)[0]`
	`160`	`+ action = np.argmax(action_probabilities)`
	`161`	`+ next_state, reward, done, info = self.env.step(self.action_space[action])`
	`162`	`+ state = next_state`
	`163`	`+ episode_score = episode_score + reward`
	`164`	`+ if(done):`
	`165`	`+ print("Episode {}/{} \| Episode score : {}".format(episode+1, self.num_episodes, episode_score))`
	`166`	`+ break`
	`167`	`+ self.env.close()`
	`168`	`+`
	`169`	`+`
	`170`	`+`

`‎Assignments/A2C-Pong/main.py`

Lines changed: 46 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,46 @@`
	`1`	`+from a2c_agent import A2CAgent`
	`2`	`+from pong import PongEnv`
	`3`	`+import argparse`
	`4`	`+`
	`5`	`+ENV = 'PongDeterministic-v4'`
	`6`	`+MODEL_PATH = './saved_models/a2c-best_model.model'`
	`7`	`+TRAIN_EPISODES = 1000`
	`8`	`+TEST_EPISODES = 10`
	`9`	`+GAMMA = 0.99`
	`10`	`+ACTOR_LEARNING_RATE = 0.00025`
	`11`	`+CRITIC_LEARNING_RATE = 0.00025`
	`12`	`+NUM_CHECKPOINTS = 10`
	`13`	`+`
	`14`	`+env = PongEnv(ENV)`
	`15`	`+`
	`16`	`+def train():`
	`17`	`+ model = A2CAgent(env = env, train_flag = True, num_episodes = TRAIN_EPISODES,`
	`18`	`+ model_path = MODEL_PATH, actor_learning_rate = ACTOR_LEARNING_RATE,`
	`19`	`+ critic_learning_rate = CRITIC_LEARNING_RATE, gamma = GAMMA,`
	`20`	`+ num_checkpoints = NUM_CHECKPOINTS)`
	`21`	`+ model.train(render = False)`
	`22`	`+`
	`23`	`+`
	`24`	`+def test():`
	`25`	`+ model = A2CAgent(env = env, train_flag = False, num_episodes = TEST_EPISODES,`
	`26`	`+ model_path = MODEL_PATH)`
	`27`	`+`
	`28`	`+ model.test(render = True)`
	`29`	`+`
	`30`	`+def main():`
	`31`	`+ parser = argparse.ArgumentParser()`
	`32`	`+ parser.add_argument("--mode", type=str, choices = ['train', 'test'], default = 'train', help = 'Train or test mode')`
	`33`	`+`
	`34`	`+`
	`35`	`+ args = parser.parse_args()`
	`36`	`+`
	`37`	`+ if(args.mode == 'test'):`
	`38`	`+ test()`
	`39`	`+`
	`40`	`+ elif(args.mode == 'train'):`
	`41`	`+ train()`
	`42`	`+`
	`43`	`+`
	`44`	`+`
	`45`	`+if __name__ == "__main__":`
	`46`	`+ main()`

`‎Assignments/A2C-Pong/pong.py`

Lines changed: 48 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,48 @@`
	`1`	`+import numpy as np`
	`2`	`+import gym`
	`3`	`+import cv2`
	`4`	`+`
	`5`	`+class PongEnv(object):`
	`6`	`+ def __init__(self, env_name, frame_size = (80,80),`
	`7`	`+ binarize = True, stack_size = 4):`
	`8`	`+ self.env = gym.make(env_name)`
	`9`	`+ self.frame_size = frame_size`
	`10`	`+ self.stack_size = stack_size`
	`11`	`+ self.binarize = binarize`
	`12`	`+ self.frame_stack = np.zeros((self.stack_size, self.frame_size[0], self.frame_size[1]), dtype = np.float64)`
	`13`	`+ self.n_action = self.env.action_space.n`
	`14`	`+ self.observation_shape = self.frame_stack.shape`
	`15`	`+`
	`16`	`+ def step(self, action):`
	`17`	`+ obs, reward, done, info = self.env.step(action)`
	`18`	`+ processed_obs = self.process_obs(obs)`
	`19`	`+ return processed_obs, reward, done, info`
	`20`	`+`
	`21`	`+ def reset(self):`
	`22`	`+ obs = self.env.reset()`
	`23`	`+ for i in range(20):`
	`24`	`+ obs, _, _, _ = self.env.step(0)`
	`25`	`+ self.frame_stack = np.zeros((self.stack_size, self.frame_size[0], self.frame_size[1]))`
	`26`	`+ processed_obs = self.process_obs(obs)`
	`27`	`+ return processed_obs`
	`28`	`+`
	`29`	`+ def render(self):`
	`30`	`+ self.env.render()`
	`31`	`+`
	`32`	`+ def close(self):`
	`33`	`+ self.env.close()`
	`34`	`+`
	`35`	`+ def process_obs(self, obs):`
	`36`	`+ clip_obs = obs[35:195:2,::2,:]`
	`37`	`+ grayscale_obs = cv2.cvtColor(clip_obs, cv2.COLOR_RGB2GRAY)`
	`38`	`+ if(grayscale_obs.shape != self.frame_size):`
	`39`	`+ grayscale_obs = cv2.resize(grayscale_obs, self.frame_size,`
	`40`	`+ interpolation=cv2.INTER_CUBIC)`
	`41`	`+ if(self.binarize):`
	`42`	`+ grayscale_obs[grayscale_obs < 100] = 0`
	`43`	`+ grayscale_obs[grayscale_obs >= 100] = 255.0`
	`44`	`+`
	`45`	`+ grayscale_obs = grayscale_obs.astype(np.float64) / 255.0`
	`46`	`+ self.frame_stack = np.roll(self.frame_stack, shift = 1, axis = 0)`
	`47`	`+ self.frame_stack[0,:,:] = grayscale_obs`
	`48`	`+ return np.expand_dims(self.frame_stack, 0)`

`‎Assignments/Capstone/init.py`

Whitespace-only changes.

`‎Assignments/Capstone/pycache/ddpg_agent.cpython-37.pyc`

5.09 KB

Binary file not shown.

`‎Assignments/Capstone/pycache/ddpg_agent.cpython-37.pyc.1254273229712`

4.87 KB

Binary file not shown.

`‎Assignments/Capstone/pycache/oua_action_noise.cpython-37.pyc`

1.03 KB

Binary file not shown.

`‎Assignments/Capstone/pycache/replay_memory.cpython-37.pyc`

1.1 KB

Binary file not shown.

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 6f47ed1

File tree

45 files changed

Some content is hidden

45 files changed

`‎Assignments/A2C-Pong/a2c_agent.py`

`‎Assignments/A2C-Pong/main.py`

`‎Assignments/A2C-Pong/pong.py`

`‎Assignments/Capstone/init.py`

`‎Assignments/Capstone/pycache/ddpg_agent.cpython-37.pyc`

`‎Assignments/Capstone/pycache/ddpg_agent.cpython-37.pyc.1254273229712`

`‎Assignments/Capstone/pycache/oua_action_noise.cpython-37.pyc`

`‎Assignments/Capstone/pycache/replay_memory.cpython-37.pyc`

0 commit comments