Minimax algorithm for tic tac toe in Python

Question 1

I am using minimax algorithm (for now without alpha beta pruning) for AI in tic tac toe game in Python and Numpy. It's working, but very slow, so I would like to optimize it.

A few rules for current purposes:

Board is 4x4 square,
Player wins when he has 3 symbols (X or O) in row, column or diagonal,
Empty field is represented by 0, X by 1 and O by 2.
There is no whole game for performance testing. Goal is to determine only one next move (player 1 must play (2, 1)) using minimax like bellow:

[[1 0 0 1] [[1 0 0 1]
 [0 2 0 0] -> [0 2 0 0]
 [0 0 1 0] [0 1 1 0]
 [1 2 2 1]] [1 2 2 1]]

Here is whole program. Even game board is almost full, get optimal move takes 3 seconds. Most of the time takes function search_sequence and then minimax. Is there any way to optimize it? Bellow are described some parts of program.

search_sequence

Function accepts array and sequence (also array) and check, if array includes sequence.

# E. g. [1 2 3 4 5] includes [2 3 4] but not [5 4 3].
def search_sequence(arr, seq):
 r_seq = numpy.arange(seq.shape[0])
 M = (arr[numpy.arange(arr.shape[0] - seq.shape[0] + 1)[:, None] + r_seq] == seq).all(1)
 return M.any() > 0

get_diags

Accepts 2D array and return list of all diagonals.

# [[1 2 3] 
# [4 5 6] -> [[1] [2 4] [7 5 3] [8 6] [9] [3] [2 6] [1 5 9] [4 8] [7]] 
# [7 8 9]] 
def get_diags(state):
 diags = [state[::-1,:].diagonal(i) for i in range(-state.shape[0] + 1, state.shape[1])]
 diags.extend(state.diagonal(i) for i in range(state.shape[1]-1,-state.shape[0],-1))
 return diags

Player.get_actions

Return list of all available actions. Action is simple tuple,

# [[1 2 0]
# [1 2 1] -> [(0, 1) (2, 0) (2, 2)]
# [0 1 0]]
def get_actions(self, state):
 coords = numpy.asarray(numpy.where(state == 0)).T
 return [tuple(i) for i in coords]

Player.apply_action, Player.undo_action

Apply action to game board. Simply set value on action's coordinates to player's ID.

def apply_action(self, state, action):
 state[action] = self.id
 return state
def undo_action(self, state, action):
 state[action] = 0
 return state

Player.is_win

Search all rows, columns and diagonals for sequence of 3 player's ID.

# [[1 2 0]
# [1 2 1] -> True (for player with id = 2)
# [0 2 0]]
def is_win(self, state):
 for i, row in enumerate(state):
 if search_sequence(row, self.win_seq):
 return True
 if search_sequence(state[:, i], self.win_seq):
 return True
 for diag in get_diags(state):
 if search_sequence(diag, self.win_seq):
 return True
 return False

Player.is_full

Check if there is no field 0 on game board and no move is available.

def is_full(self, state):
 fields = state == 0
 return not fields.any()

Player.minimax

Return score of state tree. 0 = draw, -10 = loss, 10 = win.

def minimax(self, state, depth, is_max):
 if self.is_win(state):
 return 10 - depth
 elif self.enemy.is_win(state):
 return -10 + depth
 elif self.is_full(state):
 return 0
 actions = self.get_actions(state)
 if is_max:
 best = -sys.maxsize
 for action in actions:
 val = self.minimax(self.apply_action(state, action), depth + 1, False)
 self.undo_action(state, action)
 best = max(best, val)
 else:
 best = sys.maxsize
 for action in actions:
 val = self.minimax(self.enemy.apply_action(state, action), depth + 1, True)
 self.undo_action(state, action)
 best = min(best, val)
 return best

Player.get_decision

Get optimal action for next move.

def get_decision(self, state, enemy):
 self.enemy = enemy
 actions = self.get_actions(state)
 best_i = None
 best_val = -sys.maxsize
 for i, action in enumerate(actions):
 val = self.minimax(self.apply_action(state, action), 0, False)
 state[action] = 0
 if val > best_val:
 best_i = i
 best_val = val
 return actions[best_i]

Question 2

My IDE is giving me a warning about the .all(1) in search_sequence(), any idea what's going on? The message is: "Unresolved attribute reference 'all' for class 'bool'".

Question 3

Initially your code was taking about 19-20 secs to complete. I added memoization, now it takes 2-3 secs to complete. Hope it helps. In case you have to rerun program many times. I have saved the 'mydict' object using 'pickle'. then reuse it. In case of reuse, program takes less than 1 second Repl link for the code

#!/usr/bin/python3
import numpy
import time
import sys
import pickle
start = time.time()
try:
 with open('mydict','rb') as f:
 print('using previous file')
 mydict = pickle.load(f)
except:
 mydict = {}
class Player:
 def __init__(self, id):
 self.id = id
 self.win_seq = numpy.array([id, id, id])
 def get_actions(self, state):
 coords = numpy.asarray(numpy.where(state == 0)).T
 return [tuple(i) for i in coords]
 def apply_action(self, state, action):
 state[action] = self.id
 return state
 def undo_action(self, state, action):
 state[action] = 0
 return state
 def is_win(self, state):
 for i, row in enumerate(state):
 if search_sequence(row, self.win_seq):
 return True
 if search_sequence(state[:, i], self.win_seq):
 return True
 for diag in get_diags(state):
 if search_sequence(diag, self.win_seq):
 return True
 return False
 def get_decision(self, state, enemy):
 self.enemy = enemy
 actions = self.get_actions(state)
 best_i = None
 best_val = -sys.maxsize
 for i, action in enumerate(actions):
 val = self.minimax(self.apply_action(state, action), 0, False)
 state[action] = 0
 if val > best_val:
 best_i = i
 best_val = val
 return actions[best_i]
 def minimax(self, state, depth, is_max):
 try:
 return mydict[tuple(state.flatten())]
 except:
 pass
 if self.is_win(state):
 return 10 - depth
 elif self.enemy.is_win(state):
 return -10 + depth
 elif self.is_full(state):
 return 0
 actions = self.get_actions(state)
 if is_max:
 best = -sys.maxsize
 for action in actions:
 val = self.minimax(self.apply_action(state, action), depth + 1, False)
 self.undo_action(state, action)
 best = max(best, val)
 else:
 best = sys.maxsize
 for action in actions:
 val = self.minimax(self.enemy.apply_action(state, action), depth + 1, True)
 self.undo_action(state, action)
 best = min(best, val)
 mydict[tuple(state.flatten())]=best
 return best
 def is_full(self, state):
 fields = state == 0
 return not fields.any()
def search_sequence(arr, seq):
 r_seq = numpy.arange(seq.shape[0])
 M = (arr[numpy.arange(arr.shape[0] - seq.shape[0] + 1)[:, None] + r_seq] == seq).all(1)
 return M.any() > 0
def get_diags(state):
 diags = [state[::-1,:].diagonal(i) for i in range(-state.shape[0] + 1, state.shape[1])]
 diags.extend(state.diagonal(i) for i in range(state.shape[1]-1,-state.shape[0],-1))
 return diags
me = Player(1)
enemy = Player(2)
state = numpy.array([
 [1, 0, 0, 1],
 [0, 2, 0, 0],
 [0, 0, 1, 0],
 [1, 2, 2, 1]
])
decision = me.get_decision(state, enemy)
print(state)
print(decision)
print(me.apply_action(state, decision))
print(time.time() - start, "s")
with open('mydict','wb') as f:
 pickle.dump(mydict,f)

Question 4

Welcome to CodeReview! I like the rewrite, but can you review the OP code some more beyond the fact that it's slow and missing memoization? This is CodeReview after all.

Question 5

Thank you sir, you for pointing out. I just did similar to stackoverflow. I am currently reading other answers on codereview. Now I am realizing, That this is not the good way ( in which I have written) to write answers here on codereview.

sleeping_coder 413 bronze badges · Answer 1 · 2019-11-05 12:25:44Z

Initially your code was taking about 19-20 secs to complete. I added memoization, now it takes 2-3 secs to complete. Hope it helps. In case you have to rerun program many times. I have saved the 'mydict' object using 'pickle'. then reuse it. In case of reuse, program takes less than 1 second Repl link for the code

#!/usr/bin/python3
import numpy
import time
import sys
import pickle
start = time.time()
try:
 with open('mydict','rb') as f:
 print('using previous file')
 mydict = pickle.load(f)
except:
 mydict = {}
class Player:
 def __init__(self, id):
 self.id = id
 self.win_seq = numpy.array([id, id, id])
 def get_actions(self, state):
 coords = numpy.asarray(numpy.where(state == 0)).T
 return [tuple(i) for i in coords]
 def apply_action(self, state, action):
 state[action] = self.id
 return state
 def undo_action(self, state, action):
 state[action] = 0
 return state
 def is_win(self, state):
 for i, row in enumerate(state):
 if search_sequence(row, self.win_seq):
 return True
 if search_sequence(state[:, i], self.win_seq):
 return True
 for diag in get_diags(state):
 if search_sequence(diag, self.win_seq):
 return True
 return False
 def get_decision(self, state, enemy):
 self.enemy = enemy
 actions = self.get_actions(state)
 best_i = None
 best_val = -sys.maxsize
 for i, action in enumerate(actions):
 val = self.minimax(self.apply_action(state, action), 0, False)
 state[action] = 0
 if val > best_val:
 best_i = i
 best_val = val
 return actions[best_i]
 def minimax(self, state, depth, is_max):
 try:
 return mydict[tuple(state.flatten())]
 except:
 pass
 if self.is_win(state):
 return 10 - depth
 elif self.enemy.is_win(state):
 return -10 + depth
 elif self.is_full(state):
 return 0
 actions = self.get_actions(state)
 if is_max:
 best = -sys.maxsize
 for action in actions:
 val = self.minimax(self.apply_action(state, action), depth + 1, False)
 self.undo_action(state, action)
 best = max(best, val)
 else:
 best = sys.maxsize
 for action in actions:
 val = self.minimax(self.enemy.apply_action(state, action), depth + 1, True)
 self.undo_action(state, action)
 best = min(best, val)
 mydict[tuple(state.flatten())]=best
 return best
 def is_full(self, state):
 fields = state == 0
 return not fields.any()
def search_sequence(arr, seq):
 r_seq = numpy.arange(seq.shape[0])
 M = (arr[numpy.arange(arr.shape[0] - seq.shape[0] + 1)[:, None] + r_seq] == seq).all(1)
 return M.any() > 0
def get_diags(state):
 diags = [state[::-1,:].diagonal(i) for i in range(-state.shape[0] + 1, state.shape[1])]
 diags.extend(state.diagonal(i) for i in range(state.shape[1]-1,-state.shape[0],-1))
 return diags
me = Player(1)
enemy = Player(2)
state = numpy.array([
 [1, 0, 0, 1],
 [0, 2, 0, 0],
 [0, 0, 1, 0],
 [1, 2, 2, 1]
])
decision = me.get_decision(state, enemy)
print(state)
print(decision)
print(me.apply_action(state, decision))
print(time.time() - start, "s")
with open('mydict','wb') as f:
 pickle.dump(mydict,f)

Welcome to CodeReview! I like the rewrite, but can you review the OP code some more beyond the fact that it's slow and missing memoization? This is CodeReview after all.
Thank you sir, you for pointing out. I just did similar to stackoverflow. I am currently reading other answers on codereview. Now I am realizing, That this is not the good way ( in which I have written) to write answers here on codereview.

Stack Exchange Network

Minimax algorithm for tic tac toe in Python

search_sequence

get_diags

Player.get_actions

Player.apply_action, Player.undo_action

Player.is_win

Player.is_full

Player.minimax

Player.get_decision

1 Answer 1

You must log in to answer this question.

Hot Network Questions

Minimax algorithm for tic tac toe in Python

search_sequence

get_diags

Player.get_actions

Player.apply_action, Player.undo_action

Player.is_win

Player.is_full

Player.minimax

Player.get_decision

1 Answer 1

You must log in to answer this question.

Related

Hot Network Questions