I am using minimax algorithm (for now without alpha beta pruning) for AI in tic tac toe game in Python and Numpy. It's working, but very slow, so I would like to optimize it.
A few rules for current purposes:
- Board is 4x4 square,
- Player wins when he has 3 symbols (X or O) in row, column or diagonal,
- Empty field is represented by
0
, X by1
and O by2
. - There is no whole game for performance testing. Goal is to determine only one next move (player 1 must play
(2, 1)
) using minimax like bellow:
[[1 0 0 1] [[1 0 0 1]
[0 2 0 0] -> [0 2 0 0]
[0 0 1 0] [0 1 1 0]
[1 2 2 1]] [1 2 2 1]]
Here is whole program. Even game board is almost full, get optimal move takes 3 seconds. Most of the time takes function search_sequence
and then minimax
. Is there any way to optimize it? Bellow are described some parts of program.
search_sequence
Function accepts array and sequence (also array) and check, if array includes sequence.
# E. g. [1 2 3 4 5] includes [2 3 4] but not [5 4 3].
def search_sequence(arr, seq):
r_seq = numpy.arange(seq.shape[0])
M = (arr[numpy.arange(arr.shape[0] - seq.shape[0] + 1)[:, None] + r_seq] == seq).all(1)
return M.any() > 0
get_diags
Accepts 2D array and return list of all diagonals.
# [[1 2 3]
# [4 5 6] -> [[1] [2 4] [7 5 3] [8 6] [9] [3] [2 6] [1 5 9] [4 8] [7]]
# [7 8 9]]
def get_diags(state):
diags = [state[::-1,:].diagonal(i) for i in range(-state.shape[0] + 1, state.shape[1])]
diags.extend(state.diagonal(i) for i in range(state.shape[1]-1,-state.shape[0],-1))
return diags
Player.get_actions
Return list of all available actions. Action is simple tuple,
# [[1 2 0]
# [1 2 1] -> [(0, 1) (2, 0) (2, 2)]
# [0 1 0]]
def get_actions(self, state):
coords = numpy.asarray(numpy.where(state == 0)).T
return [tuple(i) for i in coords]
Player.apply_action, Player.undo_action
Apply action to game board. Simply set value on action's coordinates to player's ID.
def apply_action(self, state, action):
state[action] = self.id
return state
def undo_action(self, state, action):
state[action] = 0
return state
Player.is_win
Search all rows, columns and diagonals for sequence of 3 player's ID.
# [[1 2 0]
# [1 2 1] -> True (for player with id = 2)
# [0 2 0]]
def is_win(self, state):
for i, row in enumerate(state):
if search_sequence(row, self.win_seq):
return True
if search_sequence(state[:, i], self.win_seq):
return True
for diag in get_diags(state):
if search_sequence(diag, self.win_seq):
return True
return False
Player.is_full
Check if there is no field 0 on game board and no move is available.
def is_full(self, state):
fields = state == 0
return not fields.any()
Player.minimax
Return score of state tree. 0 = draw, -10 = loss, 10 = win.
def minimax(self, state, depth, is_max):
if self.is_win(state):
return 10 - depth
elif self.enemy.is_win(state):
return -10 + depth
elif self.is_full(state):
return 0
actions = self.get_actions(state)
if is_max:
best = -sys.maxsize
for action in actions:
val = self.minimax(self.apply_action(state, action), depth + 1, False)
self.undo_action(state, action)
best = max(best, val)
else:
best = sys.maxsize
for action in actions:
val = self.minimax(self.enemy.apply_action(state, action), depth + 1, True)
self.undo_action(state, action)
best = min(best, val)
return best
Player.get_decision
Get optimal action for next move.
def get_decision(self, state, enemy):
self.enemy = enemy
actions = self.get_actions(state)
best_i = None
best_val = -sys.maxsize
for i, action in enumerate(actions):
val = self.minimax(self.apply_action(state, action), 0, False)
state[action] = 0
if val > best_val:
best_i = i
best_val = val
return actions[best_i]
1 Answer 1
Initially your code was taking about 19-20 secs to complete. I added memoization, now it takes 2-3 secs to complete. Hope it helps. In case you have to rerun program many times. I have saved the 'mydict' object using 'pickle'. then reuse it. In case of reuse, program takes less than 1 second Repl link for the code
#!/usr/bin/python3
import numpy
import time
import sys
import pickle
start = time.time()
try:
with open('mydict','rb') as f:
print('using previous file')
mydict = pickle.load(f)
except:
mydict = {}
class Player:
def __init__(self, id):
self.id = id
self.win_seq = numpy.array([id, id, id])
def get_actions(self, state):
coords = numpy.asarray(numpy.where(state == 0)).T
return [tuple(i) for i in coords]
def apply_action(self, state, action):
state[action] = self.id
return state
def undo_action(self, state, action):
state[action] = 0
return state
def is_win(self, state):
for i, row in enumerate(state):
if search_sequence(row, self.win_seq):
return True
if search_sequence(state[:, i], self.win_seq):
return True
for diag in get_diags(state):
if search_sequence(diag, self.win_seq):
return True
return False
def get_decision(self, state, enemy):
self.enemy = enemy
actions = self.get_actions(state)
best_i = None
best_val = -sys.maxsize
for i, action in enumerate(actions):
val = self.minimax(self.apply_action(state, action), 0, False)
state[action] = 0
if val > best_val:
best_i = i
best_val = val
return actions[best_i]
def minimax(self, state, depth, is_max):
try:
return mydict[tuple(state.flatten())]
except:
pass
if self.is_win(state):
return 10 - depth
elif self.enemy.is_win(state):
return -10 + depth
elif self.is_full(state):
return 0
actions = self.get_actions(state)
if is_max:
best = -sys.maxsize
for action in actions:
val = self.minimax(self.apply_action(state, action), depth + 1, False)
self.undo_action(state, action)
best = max(best, val)
else:
best = sys.maxsize
for action in actions:
val = self.minimax(self.enemy.apply_action(state, action), depth + 1, True)
self.undo_action(state, action)
best = min(best, val)
mydict[tuple(state.flatten())]=best
return best
def is_full(self, state):
fields = state == 0
return not fields.any()
def search_sequence(arr, seq):
r_seq = numpy.arange(seq.shape[0])
M = (arr[numpy.arange(arr.shape[0] - seq.shape[0] + 1)[:, None] + r_seq] == seq).all(1)
return M.any() > 0
def get_diags(state):
diags = [state[::-1,:].diagonal(i) for i in range(-state.shape[0] + 1, state.shape[1])]
diags.extend(state.diagonal(i) for i in range(state.shape[1]-1,-state.shape[0],-1))
return diags
me = Player(1)
enemy = Player(2)
state = numpy.array([
[1, 0, 0, 1],
[0, 2, 0, 0],
[0, 0, 1, 0],
[1, 2, 2, 1]
])
decision = me.get_decision(state, enemy)
print(state)
print(decision)
print(me.apply_action(state, decision))
print(time.time() - start, "s")
with open('mydict','wb') as f:
pickle.dump(mydict,f)
-
\$\begingroup\$ Welcome to CodeReview! I like the rewrite, but can you review the OP code some more beyond the fact that it's slow and missing memoization? This is CodeReview after all. \$\endgroup\$konijn– konijn2019年11月05日 12:51:04 +00:00Commented Nov 5, 2019 at 12:51
-
\$\begingroup\$ Thank you sir, you for pointing out. I just did similar to stackoverflow. I am currently reading other answers on codereview. Now I am realizing, That this is not the good way ( in which I have written) to write answers here on codereview. \$\endgroup\$sleeping_coder– sleeping_coder2019年11月05日 13:04:00 +00:00Commented Nov 5, 2019 at 13:04
Explore related questions
See similar questions with these tags.
.all(1)
insearch_sequence()
, any idea what's going on? The message is: "Unresolved attribute reference 'all' for class 'bool'". \$\endgroup\$