I've implemented the other features of the board game, and made a simple learning method that stores state values and makes optimal moves based on that.
I've used some OOP concepts, but I'm not sure if I've used them appropriately.
Edit:
I'm mainly interested in a review of the system design, the way objects are structured & handled, the way Jaipur
object is modified from within Player
class (in the make_optimal_move
method) when Jaipur
itself contains Player
objects.
agent_jaipur.py
import random
from enum import Enum, IntEnum, unique
from itertools import cycle, combinations, product
from collections import Counter
import numpy as np
import copy
import pickle
state_values = dict()
@unique
class Commodity(IntEnum):
CAMEL = 0
LEATHER = 1
SPICE = 2
SILK = 3
SILVER = 4
GOLD = 5
DIAMOND = 6
@classmethod
def is_costly(self, commodity):
return commodity in [self.DIAMOND, self.GOLD, self.SILVER]
class Jaipur:
def __init__(self, player1_type, player2_type, muted=False):
self.muted = muted
self.price_tokens = {
Commodity.DIAMOND: [5, 5, 5, 7, 7],
Commodity.GOLD: [5, 5, 5, 6, 6],
Commodity.SILVER: [5, 5, 5, 5, 5],
Commodity.SILK: [1, 1, 2, 2, 3, 3, 5],
Commodity.SPICE: [1, 1, 2, 2, 3, 3, 5],
Commodity.LEATHER: [1, 1, 1, 1, 1, 1, 2, 3, 4],
}
self._pile = [Commodity.DIAMOND] * 6 + [Commodity.GOLD] * 6 + [Commodity.SILVER] * 6 + \
[Commodity.SILK] * 8 + [Commodity.SPICE] * 8 + [Commodity.LEATHER] * 10 + \
[Commodity.CAMEL] * 8
random.shuffle(self._pile)
self.market = Counter()
for i in Commodity:
self.market[i] = 0
self.market[Commodity.CAMEL] = 3
for i in range(2):
self.market[self._pile.pop()] += 1
self._player1 = player1_type(tag='P1', game=self)
self._player2 = player2_type(tag='P2', game=self)
for i in range(5):
for _player in self._player1, self._player2:
commodity = self._pile.pop()
if commodity == Commodity.CAMEL:
_player.camel_count += 1
else:
_player.hand[commodity] += 1
self.winner = None
self._players_gen = cycle([self._player1, self._player2])
self.player_turn = next(self._players_gen)
def pile_size(self):
return len(self._pile)
def pick_commodity(self, commodity=None):
if sum(self.market.values()) == 0:
return (None, 0)
if commodity is not None and self.market[commodity] > 0:
picked_commodity = commodity
else:
market_list = []
for c in self.market:
if self.market[c] > 0:
market_list += [c] * self.market[c]
picked_commodity = random.choice(market_list)
pick_count = 0
# When player takes camel, all camels in market must be taken
if picked_commodity == Commodity.CAMEL:
market_camels = self.market[Commodity.CAMEL]
pick_count = market_camels
self.market[Commodity.CAMEL] = 0
for i in range(market_camels):
if self._pile:
self.market[self._pile.pop()] += 1
else:
pick_count = 1
self.market[picked_commodity] -= 1
if self._pile:
self.market[self._pile.pop()] += 1
return (picked_commodity, pick_count)
def pprint(self, s, c):
print(s, end=' ')
for i in c.keys():
if c[i] > 0:
print('%s: %d,'%(i, c[i]), end=' ')
print()
def print_game(self):
if self.muted:
return
print('price_tokens: ', self.price_tokens.values())
print('pile size:', self.pile_size())
self.pprint('market: ', self.market)
self.pprint('P1 hand: ', self._player1.hand)
self.pprint('P2 hand: ', self._player2.hand)
print('P1 camels:', self._player1.camel_count)
print('P2 camels:', self._player2.camel_count)
print('P1 tokens: ', self._player1.tokens)
print('P2 tokens: ', self._player2.tokens)
print('P1 score:', self._player1.score())
print('P2 score:', self._player2.score())
print('Winner is', self.winner)
print()
def play_game(self, learn, muted=False):
self.muted = muted
print('----------------- GAME STARTED -------------------')
self.print_game()
while self.winner is None:
if not self.muted:
print('---------------------', self.player_turn.tag, ' turn', '---------------------')
self.print_game()
self = self.switch_player(learn)
self.game_winner()
else:
print('----------------- GAME ENDED -------------------')
self.print_game()
print('P1 final score:', self._player1.final_score)
print('P2 final score:', self._player2.final_score)
print()
if isinstance(self._player1, Agent):
self._player1.learn_state(self._player1.get_state(), self.winner)
if isinstance(self._player2, Agent):
self._player2.learn_state(self._player2.get_state(), self.winner)
return self.winner
def switch_player(self, learn):
self = self.player_turn.make_move(self.winner, learn)
self.player_turn = next(self._players_gen)
return self
def game_winner(self):
# End game if 3 resources are sold completely
# Or if market goes less than 5
if len(['empty' for i in self.price_tokens.values() if not i]) >= 3 or (sum(self.market.values()) < 5):
self._player1.final_score = self._player1.score()
self._player2.final_score = self._player2.score()
if self._player1.camel_count > self._player2.camel_count:
self._player1.final_score += 5
elif self._player1.camel_count < self._player2.camel_count:
self._player2.final_score += 5
if self._player1.final_score > self._player2.final_score:
self.winner = self._player1.tag
elif self._player1.final_score < self._player2.final_score:
self.winner = self._player2.tag
else:
self.winner = self._player2.tag #TODO
return self.winner
class Player:
def __init__(self, tag, game):
self.tag = tag
self.camel_count = 0
self.hand = Counter()
for i in Commodity:
self.hand[i] = 0
self.tokens = []
self.final_score = 0
self._game = game
self.prev_state = self.get_state()
def hand_size(self):
return sum(self.hand.values())
def score(self):
return sum(self.tokens)
def get_state(self): #TODO
#return tuple((self.hand_size(), self.camel_count))
score = self.score() // 10
pile_size = self._game.pile_size() // 5
camel = self.camel_count // 4
# hand = tuple(self.hand.items())
hand = tuple(self.hand[i] for i in Commodity)
hand_size = self.hand_size()
# market = tuple(self._game.market.items())
market_costly = sum([self._game.market[i] for i in Commodity if Commodity.is_costly(i)])
market_non_costly = sum([self._game.market[i] for i in Commodity if (not Commodity.is_costly(i)) and (not i == Commodity.CAMEL)])
market_camel = sum([self._game.market[i] for i in Commodity if i == Commodity.CAMEL])
market = (market_costly, market_non_costly, market_camel)
state = tuple((score, pile_size, hand_size, camel, market))
return state
def get_possible_trades(self, give_commodities, take_commodities):
# print('give commodities', give_commodities)
# print('take commodities', take_commodities)
if len(give_commodities) < 2 or len(take_commodities) < 2:
return []
give_commodities = sorted(give_commodities)
take_commodities = sorted(take_commodities)
possible_trades = []
for trade_size in range(2, min(len(give_commodities), len(take_commodities)) + 1):
give_subsets = set(combinations(give_commodities, trade_size))
take_subsets = set(combinations(take_commodities, trade_size))
all_combinations = product(give_subsets, take_subsets)
for give, take in all_combinations:
if len(set(give).intersection(set(take))) == 0:
possible_trades += [(give, take)]
# print('possible trades')
# for i in possible_trades:
# print(i[0])
# print(i[1])
# print()
return possible_trades
def get_all_moves(self):
moves = [0, 1, 2] # TAKE, SELL, TRADE
take_commodities = [i for i in self._game.market if self._game.market[i] > 0]
sell_commodities = [i for i in self.hand if (self.hand[i] > 1) or (not Commodity.is_costly(i) and self.hand[i] > 0)]
all_moves = []
if self.hand_size() < 7:
all_moves += [(moves[0], i) for i in take_commodities]
all_moves += [(moves[1], i) for i in sell_commodities]
trade_give_commodities = []
for i in self.hand:
trade_give_commodities += [i] * self.hand[i]
trade_give_commodities += [Commodity.CAMEL] * self.camel_count
trade_take_commodities = []
for i in self._game.market:
if i != Commodity.CAMEL:
trade_take_commodities += [i] * self._game.market[i]
# TODO Enable trading
# possible_trades = self.get_possible_trades(trade_give_commodities, trade_take_commodities)
# all_moves += [(moves[2], i) for i in possible_trades]
return all_moves
def take(self, commodity=None):
# self._game.pprint('before taking:', self.hand)
if not self._game.muted:
print('taking..', commodity)
if self.hand_size() < 7:
taken, take_count = self._game.pick_commodity(commodity)
if taken == Commodity.CAMEL:
self.camel_count += take_count
else:
self.hand[taken] += take_count
# self._game.pprint('after taking:', self.hand)
def sell(self, commodity=None, count=0):
# print('before selling..', self.tokens)
if not self._game.muted:
print('selling..', commodity)
if commodity is None:
commodity = self.hand.most_common(1)[0][0]
if ((not Commodity.is_costly(commodity)) and self.hand[commodity] > 0) or self.hand[commodity] > 1:
count = self.hand[commodity] # TODO As of now sell all cards of this type
for i in range(count):
if self._game.price_tokens[commodity]:
self.tokens.append(self._game.price_tokens[commodity].pop())
self.hand[commodity] -= count
if count == 3:
self.tokens.append(random.randint(1, 4))
elif count == 4:
self.tokens.append(random.randint(4, 7))
elif count >= 5:
self.tokens.append(random.randint(7, 11))
# print('after selling...', self.tokens)
def trade(self, give=None, take=None):
# if not self._game.muted:
# print('trading..', (give, take))
if give == None or take == None:
return
if len(give) != len(take):
return
if len(give) < 2:
return
if(set(give).intersection(set(take))):
return
give = Counter(give)
take = Counter(take)
self.hand -= give
self._game.market += give
self._game.market -= take
self.hand += take
self.camel_count -= give[Commodity.CAMEL]
def make_move(self, winner, learn=False):
all_moves = self.get_all_moves()
# for i, move in enumerate(all_moves):
# print(i, move)
# move = int(input('Choose move..'))
move = random.choice(all_moves)
if move[0] == 0:
self.take(move[1])
elif move[0] == 1:
self.sell(move[1])
elif move[0] == 2:
self.trade(move[1][0], move[1][1])
return self._game
class Agent(Player):
def __init__(self, tag, game):
super().__init__(tag, game)
def make_move(self, winner, learn):
if learn:
self.learn_state(self.get_state(), winner)
if learn:
epsilon = 0.8
else:
epsilon = 1
p = random.uniform(0, 1)
if p < epsilon:
self._game = self.make_optimal_move()
else:
super().make_move(winner, learn)
return self._game
def make_optimal_move(self):
opt_self = None
v = -float('Inf')
all_moves = self.get_all_moves()
# print('all_moves')
# for i in all_moves:
# print(i)
for m, c in all_moves:
temp_self = copy.deepcopy(self)
if m == 0:
temp_self.take(c)
elif m == 1:
temp_self.sell(c)
elif m == 2:
temp_self.trade(c[0], c[1])
# print('after making move', m, c)
# temp_self._game.print_game()
# print()
temp_state = self.get_state()
v_temp = self.calc_value(temp_state)
# Encourage exploration
if v_temp is None:
v_temp = 1
if v_temp > v:
opt_self = copy.deepcopy(temp_self)
v = v_temp
elif v_temp == v:
toss = random.randint(0, 1)
if toss == 1:
opt_self = copy.deepcopy(temp_self)
self = copy.deepcopy(opt_self)
# print('Optimal self')
# opt_self._game.print_game()
# print()
# print('After making optimal move')
# self._game.print_game()
return self._game
def calc_value(self, state):
global state_values
if state in state_values.keys():
return state_values[state]
def learn_state(self, state, winner):
global state_values
# if winner is not None:
# state_values[state] = self.reward(winner)
if self.prev_state in state_values.keys():
v_s = state_values[self.prev_state]
else:
v_s = int(0)
R = self.reward(winner)
if state in state_values.keys() and winner is None:
v_s_tag = state_values[state]
else:
v_s_tag = int(0)
state_values[self.prev_state] = v_s + 0.5 * (R + v_s_tag - v_s)
self.prev_state = state
def reward(self, winner):
if winner is self.tag:
R = 1
elif winner is None:
R = 0
else:
R = -1
return R
def load_values():
global state_values
try:
f = open('state_values.pickle', 'rb')
state_values = pickle.load(f)
except:
state_values = dict()
def save_values():
global state_values
f = open('state_values.pickle', 'wb')
try:
os.remove(f)
except:
pass
pickle.dump(state_values, f)
def play_to_learn(episodes, muted=True):
load_values()
print(len(state_values))
for i in range(episodes):
print('Episode', i)
game = Jaipur(Agent, Player)
game.play_game(learn=True, muted=muted)
game = Jaipur(Player, Agent)
game.play_game(learn=True, muted=muted)
if i % 1000 == 0:
save_values()
save_values()
print(len(state_values))
count = 0
for i in state_values:
if state_values[i] not in (-0.5, 0, 0.5):
print(i, state_values[i])
count += 1
print(count)
# print(state_values)
def test(n=100):
load_values()
# print('----------------------------------------------------------------- Agent vs Agent')
# ava_p1_wins = 0
# for i in range(n):
# game = Jaipur(Agent, Agent)
# winner = game.play_game(learn=False, muted=True)
# if winner == 'P1':
# ava_p1_wins += 1
print('----------------------------------------------------------------- Agent vs Player')
avp_p1_wins = 0
for i in range(n):
game = Jaipur(Agent, Player)
winner = game.play_game(learn=False, muted=True)
if winner == 'P1':
avp_p1_wins += 1
print('----------------------------------------------------------------- Player vs Agent')
pva_p1_wins = 0
for i in range(n):
game = Jaipur(Player, Agent)
winner = game.play_game(learn=False, muted=True)
if winner == 'P1':
pva_p1_wins += 1
print('----------------------------------------------------------------- Player vs Player')
pvp_p1_wins = 0
for i in range(n):
game = Jaipur(Player, Player)
winner = game.play_game(learn=False, muted=True)
if winner == 'P1':
pvp_p1_wins += 1
print('----------------------------------------------------------------- Result')
# print('----------------------------------------------------------------- Agent vs Agent')
# print('Total:', n)
# print('P1:', ava_p1_wins)
# print('P2:', n - ava_p1_wins)
print('----------------------------------------------------------------- Agent vs Player')
print('Total:', n)
print('P1:', avp_p1_wins)
print('P2:', n - avp_p1_wins)
print('----------------------------------------------------------------- Player vs Agent')
print('Total:', n)
print('P1:', pva_p1_wins)
print('P2:', n - pva_p1_wins)
print('----------------------------------------------------------------- Player vs Player')
print('Total:', n)
print('P1:', pvp_p1_wins)
print('P2:', n - pvp_p1_wins)
def play():
# play_to_learn(10000, muted=True)
game = Jaipur(Player, Agent)
game.play_game(learn=False, muted=False)
test()
if __name__ == "__main__":
play()
The GitHub repository can be found here.
2 Answers 2
You code looks mostly good to me. Adding docstrings to it would definitly make it even better.
Comments about the Jaipur class
Smaller functions
It could be a good idea to write small functions to have a higher level of abstractions without any consideration for the implementation details.
For instance, you have self._pile.pop()
in various places. It may be easier to understand if it was moved in a def draw_card()
method (an additional idea could be to add an optional argument for the number of cards). That method could be called in the initialisation step as well.
Also, the if self._pile: self.market[self.draw_card()] += 1
could be moved in a add_card_to_market()
method.
Duplicated code
Another way to remove duplicated code could be to consider what is actually different from one situation to another. In the case "when player takes camel", only the way to get the number of cards picked is different. For every other aspect, the logic is the same. We could write:
# When player takes camel, all camels in market must be taken
pick_count = self.market[picked_commodity] if picked_commodity == Commodity.CAMEL else 1
self.market[picked_commodity] -= pick_count
for i in range(pick_count):
self.add_card_to_marker()
return (picked_commodity, pick_count)
Using the Python tools
In pick_commodity
, you iterate over the self.market
keys and then retrieve the associated values.
You could use https://docs.python.org/3.8/library/stdtypes.html#dict.items to iterate over both keys and values.
market_list = []
for c, n in self.market.items():
if n > 0:
market_list += [c] * n
Also, another aspect of the Counter class you are using is that you do not need to initialise things to 0.
The part with 'empty'
I must confess that the part len(['empty' for i in self.price_tokens.values() if not i]) >= 3
got me really puzzled. Where is this 'empty'
string coming from ?
Here, you build a list where only the length will be relevant, not its content. You could use None
as the content.
`len([None for i in self.price_tokens.values() if not i]) >= 3`
Another option would be to just use sum to get the same value:
`sum(not i for i in self.price_tokens.values()) >= 3`
Comments about the Player class
Iterating over a different object
In get_state
, you use for i in Commodity
in various places.
I think it would be more natural to iterate over the other object you are considering (self.hand
or self._game.market
).
For instance:
market_costly = sum([self._game.market[i] for i in Commodity if Commodity.is_costly(i)])
market_non_costly = sum([self._game.market[i] for i in Commodity if (not Commodity.is_costly(i)) and (not i == Commodity.CAMEL)])
market_camel = sum([self._game.market[i] for i in Commodity if i == Commodity.CAMEL])
would become
market_costly = sum(n for c, n in self._game.market.items() if Commodity.is_costly(c))
market_non_costly = sum(n for c, n in self._game.market.items() if not Commodity.is_costly(c) and i != Commodity.CAMEL)
market_camel = sum(n for c, n in self._game.market.items() if i == Commodity.CAMEL)
Even though that last line can be simplified considerably:
market_camel = self._game.market[Commodity.CAMEL]
I think your code too good to be True! I hope to code like you one day!
Even so, there always exists improvements, no matter how few.
Bug
In the save_values
function, there is a line os.remove(f)
which raises an error, but is caught by the except
statement, which just moves on to the next line of the code. Is import os
statement missing? I believe so.
Improvements
I don't believe there are any improvements for make_optimal_move
, but I will add them if I find any.
from enum import Enum, IntEnum, unique
import numpy as np
Enum
and np
is not used. Do you plan to use them later? Removing it is your wish.
In class Agent
, signature of method 'Agent.make_move()' doesn't match the signature of base method in the class 'Player'.
The signature of Agent.make_move()
is make_move(self, winner, learn)
while the signature of class Player
is make_move(self, winner, learn=False)
.
The signatures should always be the same.
Use ternary operators.
In the Agent.make_move
function
if learn:
epsilon = 0.8
else:
epsilon = 1
can be replaced with
epsilon = 0.8 if learn else 1
and in Agent.reward
function
if winner is self.tag:
R = 1
elif winner is None:
R = 0
else:
R = -1
can be replaced with
return 0 if winner is None else 1 if winner is self.tag else -1
According to PEP 8 rules, variables in functions should be lower case.
In Jaipur.pick_commodity
,
if sum(self.market.values()) == 0
can be replaced with if not sum(self.market.values())
and you can remove the redundant parenthesis in return (None, 0)
and return (picked_commodity, pick_count)
See idiomatic coding.
Also, pick_count = 0
can be removed. It is changed in the if
statement or else
statement anyway.
In the Player.get_state
method, hand
is not used, so you can remove it.
Define static methods with @staticmethod
Player.get_possible_trades
, Agent.calc_value
, and Jaipur.pprint
are static.
In Player.trade
, you are using ==
to compare give
and take
to None
. You should always use is
instead of ==
to compare a value and None
Remove the redundant parenthesis in the line if(set(give).intersection(set(take))):
Now,
if len(give) != len(take):
return
if len(give) < 2:
return
if set(give).intersection(set(take)):
return
can be replaced with
if len(give) != len(take) or len(give) < 2 or set(give).intersection(set(take)):
return
If I get more improvements, I'll be sure to edit them in!
Explore related questions
See similar questions with these tags.