10
\$\begingroup\$

Link to my previous question.

I've implemented the other features of the board game, and made a simple learning method that stores state values and makes optimal moves based on that.

I've used some OOP concepts, but I'm not sure if I've used them appropriately.

Edit:

I'm mainly interested in a review of the system design, the way objects are structured & handled, the way Jaipur object is modified from within Player class (in the make_optimal_move method) when Jaipur itself contains Player objects.

agent_jaipur.py

import random
from enum import Enum, IntEnum, unique
from itertools import cycle, combinations, product
from collections import Counter
import numpy as np
import copy
import pickle
state_values = dict()
@unique
class Commodity(IntEnum):
 CAMEL = 0
 LEATHER = 1
 SPICE = 2
 SILK = 3
 SILVER = 4
 GOLD = 5
 DIAMOND = 6
 @classmethod
 def is_costly(self, commodity):
 return commodity in [self.DIAMOND, self.GOLD, self.SILVER]
class Jaipur:
 def __init__(self, player1_type, player2_type, muted=False):
 self.muted = muted
 self.price_tokens = {
 Commodity.DIAMOND: [5, 5, 5, 7, 7],
 Commodity.GOLD: [5, 5, 5, 6, 6], 
 Commodity.SILVER: [5, 5, 5, 5, 5], 
 Commodity.SILK: [1, 1, 2, 2, 3, 3, 5], 
 Commodity.SPICE: [1, 1, 2, 2, 3, 3, 5], 
 Commodity.LEATHER: [1, 1, 1, 1, 1, 1, 2, 3, 4], 
 }
 self._pile = [Commodity.DIAMOND] * 6 + [Commodity.GOLD] * 6 + [Commodity.SILVER] * 6 + \
 [Commodity.SILK] * 8 + [Commodity.SPICE] * 8 + [Commodity.LEATHER] * 10 + \
 [Commodity.CAMEL] * 8
 random.shuffle(self._pile)
 self.market = Counter()
 for i in Commodity:
 self.market[i] = 0
 self.market[Commodity.CAMEL] = 3
 for i in range(2):
 self.market[self._pile.pop()] += 1
 self._player1 = player1_type(tag='P1', game=self)
 self._player2 = player2_type(tag='P2', game=self)
 for i in range(5):
 for _player in self._player1, self._player2:
 commodity = self._pile.pop()
 if commodity == Commodity.CAMEL:
 _player.camel_count += 1
 else:
 _player.hand[commodity] += 1
 self.winner = None
 self._players_gen = cycle([self._player1, self._player2]) 
 self.player_turn = next(self._players_gen)
 def pile_size(self):
 return len(self._pile)
 def pick_commodity(self, commodity=None):
 if sum(self.market.values()) == 0:
 return (None, 0)
 if commodity is not None and self.market[commodity] > 0:
 picked_commodity = commodity
 else:
 market_list = []
 for c in self.market:
 if self.market[c] > 0:
 market_list += [c] * self.market[c]
 picked_commodity = random.choice(market_list)
 pick_count = 0
 # When player takes camel, all camels in market must be taken
 if picked_commodity == Commodity.CAMEL:
 market_camels = self.market[Commodity.CAMEL]
 pick_count = market_camels 
 self.market[Commodity.CAMEL] = 0
 for i in range(market_camels):
 if self._pile:
 self.market[self._pile.pop()] += 1
 else:
 pick_count = 1
 self.market[picked_commodity] -= 1
 if self._pile:
 self.market[self._pile.pop()] += 1
 return (picked_commodity, pick_count)
 def pprint(self, s, c):
 print(s, end=' ')
 for i in c.keys():
 if c[i] > 0:
 print('%s: %d,'%(i, c[i]), end=' ')
 print()
 def print_game(self):
 if self.muted:
 return
 print('price_tokens: ', self.price_tokens.values())
 print('pile size:', self.pile_size())
 self.pprint('market: ', self.market)
 self.pprint('P1 hand: ', self._player1.hand)
 self.pprint('P2 hand: ', self._player2.hand)
 print('P1 camels:', self._player1.camel_count)
 print('P2 camels:', self._player2.camel_count)
 print('P1 tokens: ', self._player1.tokens)
 print('P2 tokens: ', self._player2.tokens)
 print('P1 score:', self._player1.score())
 print('P2 score:', self._player2.score())
 print('Winner is', self.winner)
 print()
 def play_game(self, learn, muted=False):
 self.muted = muted
 print('----------------- GAME STARTED -------------------')
 self.print_game()
 while self.winner is None:
 if not self.muted:
 print('---------------------', self.player_turn.tag, ' turn', '---------------------')
 self.print_game()
 self = self.switch_player(learn)
 self.game_winner()
 else:
 print('----------------- GAME ENDED -------------------')
 self.print_game()
 print('P1 final score:', self._player1.final_score)
 print('P2 final score:', self._player2.final_score)
 print()
 if isinstance(self._player1, Agent):
 self._player1.learn_state(self._player1.get_state(), self.winner)
 if isinstance(self._player2, Agent):
 self._player2.learn_state(self._player2.get_state(), self.winner)
 return self.winner
 def switch_player(self, learn):
 self = self.player_turn.make_move(self.winner, learn)
 self.player_turn = next(self._players_gen)
 return self
 def game_winner(self):
 # End game if 3 resources are sold completely
 # Or if market goes less than 5
 if len(['empty' for i in self.price_tokens.values() if not i]) >= 3 or (sum(self.market.values()) < 5):
 self._player1.final_score = self._player1.score()
 self._player2.final_score = self._player2.score()
 if self._player1.camel_count > self._player2.camel_count:
 self._player1.final_score += 5
 elif self._player1.camel_count < self._player2.camel_count:
 self._player2.final_score += 5
 if self._player1.final_score > self._player2.final_score:
 self.winner = self._player1.tag
 elif self._player1.final_score < self._player2.final_score:
 self.winner = self._player2.tag
 else:
 self.winner = self._player2.tag #TODO
 return self.winner
class Player:
 def __init__(self, tag, game):
 self.tag = tag
 self.camel_count = 0
 self.hand = Counter()
 for i in Commodity:
 self.hand[i] = 0
 self.tokens = []
 self.final_score = 0
 self._game = game
 self.prev_state = self.get_state()
 def hand_size(self):
 return sum(self.hand.values())
 def score(self):
 return sum(self.tokens)
 def get_state(self): #TODO
 #return tuple((self.hand_size(), self.camel_count))
 score = self.score() // 10
 pile_size = self._game.pile_size() // 5
 camel = self.camel_count // 4
 # hand = tuple(self.hand.items())
 hand = tuple(self.hand[i] for i in Commodity)
 hand_size = self.hand_size()
 # market = tuple(self._game.market.items())
 market_costly = sum([self._game.market[i] for i in Commodity if Commodity.is_costly(i)])
 market_non_costly = sum([self._game.market[i] for i in Commodity if (not Commodity.is_costly(i)) and (not i == Commodity.CAMEL)])
 market_camel = sum([self._game.market[i] for i in Commodity if i == Commodity.CAMEL])
 market = (market_costly, market_non_costly, market_camel)
 state = tuple((score, pile_size, hand_size, camel, market))
 return state
 def get_possible_trades(self, give_commodities, take_commodities):
 # print('give commodities', give_commodities)
 # print('take commodities', take_commodities)
 if len(give_commodities) < 2 or len(take_commodities) < 2:
 return []
 give_commodities = sorted(give_commodities)
 take_commodities = sorted(take_commodities)
 possible_trades = []
 for trade_size in range(2, min(len(give_commodities), len(take_commodities)) + 1):
 give_subsets = set(combinations(give_commodities, trade_size))
 take_subsets = set(combinations(take_commodities, trade_size))
 all_combinations = product(give_subsets, take_subsets)
 for give, take in all_combinations:
 if len(set(give).intersection(set(take))) == 0:
 possible_trades += [(give, take)]
 # print('possible trades')
 # for i in possible_trades:
 # print(i[0])
 # print(i[1])
 # print()
 return possible_trades
 def get_all_moves(self):
 moves = [0, 1, 2] # TAKE, SELL, TRADE
 take_commodities = [i for i in self._game.market if self._game.market[i] > 0]
 sell_commodities = [i for i in self.hand if (self.hand[i] > 1) or (not Commodity.is_costly(i) and self.hand[i] > 0)]
 all_moves = []
 if self.hand_size() < 7:
 all_moves += [(moves[0], i) for i in take_commodities]
 all_moves += [(moves[1], i) for i in sell_commodities]
 trade_give_commodities = []
 for i in self.hand:
 trade_give_commodities += [i] * self.hand[i]
 trade_give_commodities += [Commodity.CAMEL] * self.camel_count
 trade_take_commodities = []
 for i in self._game.market:
 if i != Commodity.CAMEL:
 trade_take_commodities += [i] * self._game.market[i]
 # TODO Enable trading 
 # possible_trades = self.get_possible_trades(trade_give_commodities, trade_take_commodities)
 # all_moves += [(moves[2], i) for i in possible_trades]
 return all_moves
 def take(self, commodity=None):
 # self._game.pprint('before taking:', self.hand)
 if not self._game.muted:
 print('taking..', commodity)
 if self.hand_size() < 7:
 taken, take_count = self._game.pick_commodity(commodity)
 if taken == Commodity.CAMEL:
 self.camel_count += take_count
 else:
 self.hand[taken] += take_count
 # self._game.pprint('after taking:', self.hand)
 def sell(self, commodity=None, count=0):
 # print('before selling..', self.tokens)
 if not self._game.muted:
 print('selling..', commodity)
 if commodity is None:
 commodity = self.hand.most_common(1)[0][0]
 if ((not Commodity.is_costly(commodity)) and self.hand[commodity] > 0) or self.hand[commodity] > 1:
 count = self.hand[commodity] # TODO As of now sell all cards of this type
 for i in range(count):
 if self._game.price_tokens[commodity]:
 self.tokens.append(self._game.price_tokens[commodity].pop())
 self.hand[commodity] -= count
 if count == 3:
 self.tokens.append(random.randint(1, 4))
 elif count == 4:
 self.tokens.append(random.randint(4, 7))
 elif count >= 5:
 self.tokens.append(random.randint(7, 11))
 # print('after selling...', self.tokens)
 def trade(self, give=None, take=None):
 # if not self._game.muted:
 # print('trading..', (give, take))
 if give == None or take == None:
 return
 if len(give) != len(take):
 return 
 if len(give) < 2:
 return 
 if(set(give).intersection(set(take))):
 return
 give = Counter(give)
 take = Counter(take)
 self.hand -= give
 self._game.market += give
 self._game.market -= take
 self.hand += take
 self.camel_count -= give[Commodity.CAMEL]
 def make_move(self, winner, learn=False):
 all_moves = self.get_all_moves()
 # for i, move in enumerate(all_moves):
 # print(i, move)
 # move = int(input('Choose move..'))
 move = random.choice(all_moves)
 if move[0] == 0:
 self.take(move[1])
 elif move[0] == 1:
 self.sell(move[1])
 elif move[0] == 2:
 self.trade(move[1][0], move[1][1])
 return self._game
class Agent(Player):
 def __init__(self, tag, game):
 super().__init__(tag, game)
 def make_move(self, winner, learn):
 if learn:
 self.learn_state(self.get_state(), winner)
 if learn:
 epsilon = 0.8
 else:
 epsilon = 1
 p = random.uniform(0, 1)
 if p < epsilon:
 self._game = self.make_optimal_move()
 else:
 super().make_move(winner, learn)
 return self._game
 def make_optimal_move(self):
 opt_self = None
 v = -float('Inf')
 all_moves = self.get_all_moves()
 # print('all_moves')
 # for i in all_moves:
 # print(i)
 for m, c in all_moves:
 temp_self = copy.deepcopy(self)
 if m == 0:
 temp_self.take(c)
 elif m == 1:
 temp_self.sell(c)
 elif m == 2:
 temp_self.trade(c[0], c[1])
 # print('after making move', m, c)
 # temp_self._game.print_game()
 # print()
 temp_state = self.get_state()
 v_temp = self.calc_value(temp_state)
 # Encourage exploration
 if v_temp is None:
 v_temp = 1
 if v_temp > v:
 opt_self = copy.deepcopy(temp_self)
 v = v_temp
 elif v_temp == v:
 toss = random.randint(0, 1)
 if toss == 1:
 opt_self = copy.deepcopy(temp_self)
 self = copy.deepcopy(opt_self)
 # print('Optimal self')
 # opt_self._game.print_game()
 # print()
 # print('After making optimal move')
 # self._game.print_game()
 return self._game
 def calc_value(self, state):
 global state_values
 if state in state_values.keys():
 return state_values[state]
 def learn_state(self, state, winner):
 global state_values
 # if winner is not None:
 # state_values[state] = self.reward(winner)
 if self.prev_state in state_values.keys():
 v_s = state_values[self.prev_state]
 else:
 v_s = int(0)
 R = self.reward(winner)
 if state in state_values.keys() and winner is None:
 v_s_tag = state_values[state]
 else:
 v_s_tag = int(0)
 state_values[self.prev_state] = v_s + 0.5 * (R + v_s_tag - v_s)
 self.prev_state = state
 def reward(self, winner):
 if winner is self.tag:
 R = 1
 elif winner is None:
 R = 0
 else:
 R = -1
 return R
def load_values():
 global state_values
 try:
 f = open('state_values.pickle', 'rb')
 state_values = pickle.load(f)
 except:
 state_values = dict()
def save_values():
 global state_values
 f = open('state_values.pickle', 'wb')
 try:
 os.remove(f)
 except:
 pass
 pickle.dump(state_values, f)
def play_to_learn(episodes, muted=True):
 load_values()
 print(len(state_values))
 for i in range(episodes):
 print('Episode', i)
 game = Jaipur(Agent, Player)
 game.play_game(learn=True, muted=muted)
 game = Jaipur(Player, Agent)
 game.play_game(learn=True, muted=muted)
 if i % 1000 == 0:
 save_values()
 save_values()
 print(len(state_values))
 count = 0
 for i in state_values:
 if state_values[i] not in (-0.5, 0, 0.5):
 print(i, state_values[i])
 count += 1
 print(count)
 # print(state_values)
def test(n=100):
 load_values()
 # print('----------------------------------------------------------------- Agent vs Agent')
 # ava_p1_wins = 0
 # for i in range(n):
 # game = Jaipur(Agent, Agent)
 # winner = game.play_game(learn=False, muted=True)
 # if winner == 'P1':
 # ava_p1_wins += 1
 print('----------------------------------------------------------------- Agent vs Player')
 avp_p1_wins = 0
 for i in range(n):
 game = Jaipur(Agent, Player)
 winner = game.play_game(learn=False, muted=True)
 if winner == 'P1':
 avp_p1_wins += 1
 print('----------------------------------------------------------------- Player vs Agent')
 pva_p1_wins = 0
 for i in range(n):
 game = Jaipur(Player, Agent)
 winner = game.play_game(learn=False, muted=True)
 if winner == 'P1':
 pva_p1_wins += 1
 print('----------------------------------------------------------------- Player vs Player')
 pvp_p1_wins = 0
 for i in range(n):
 game = Jaipur(Player, Player)
 winner = game.play_game(learn=False, muted=True)
 if winner == 'P1':
 pvp_p1_wins += 1
 print('----------------------------------------------------------------- Result')
# print('----------------------------------------------------------------- Agent vs Agent')
# print('Total:', n)
# print('P1:', ava_p1_wins)
# print('P2:', n - ava_p1_wins)
 print('----------------------------------------------------------------- Agent vs Player')
 print('Total:', n)
 print('P1:', avp_p1_wins)
 print('P2:', n - avp_p1_wins)
 print('----------------------------------------------------------------- Player vs Agent')
 print('Total:', n)
 print('P1:', pva_p1_wins)
 print('P2:', n - pva_p1_wins)
 print('----------------------------------------------------------------- Player vs Player')
 print('Total:', n)
 print('P1:', pvp_p1_wins)
 print('P2:', n - pvp_p1_wins)
def play():
 # play_to_learn(10000, muted=True)
 game = Jaipur(Player, Agent)
 game.play_game(learn=False, muted=False)
 test()
if __name__ == "__main__":
 play()

The GitHub repository can be found here.

asked Nov 19, 2019 at 21:43
\$\endgroup\$
0

2 Answers 2

6
+75
\$\begingroup\$

You code looks mostly good to me. Adding docstrings to it would definitly make it even better.


Comments about the Jaipur class

Smaller functions

It could be a good idea to write small functions to have a higher level of abstractions without any consideration for the implementation details.

For instance, you have self._pile.pop() in various places. It may be easier to understand if it was moved in a def draw_card() method (an additional idea could be to add an optional argument for the number of cards). That method could be called in the initialisation step as well.

Also, the if self._pile: self.market[self.draw_card()] += 1 could be moved in a add_card_to_market() method.

Duplicated code

Another way to remove duplicated code could be to consider what is actually different from one situation to another. In the case "when player takes camel", only the way to get the number of cards picked is different. For every other aspect, the logic is the same. We could write:

 # When player takes camel, all camels in market must be taken
 pick_count = self.market[picked_commodity] if picked_commodity == Commodity.CAMEL else 1
 self.market[picked_commodity] -= pick_count
 for i in range(pick_count):
 self.add_card_to_marker()
 return (picked_commodity, pick_count)

Using the Python tools

In pick_commodity, you iterate over the self.market keys and then retrieve the associated values. You could use https://docs.python.org/3.8/library/stdtypes.html#dict.items to iterate over both keys and values.

 market_list = []
 for c, n in self.market.items():
 if n > 0:
 market_list += [c] * n

Also, another aspect of the Counter class you are using is that you do not need to initialise things to 0.

The part with 'empty'

I must confess that the part len(['empty' for i in self.price_tokens.values() if not i]) >= 3 got me really puzzled. Where is this 'empty' string coming from ?

Here, you build a list where only the length will be relevant, not its content. You could use None as the content.

`len([None for i in self.price_tokens.values() if not i]) >= 3`

Another option would be to just use sum to get the same value:

`sum(not i for i in self.price_tokens.values()) >= 3`

Comments about the Player class

Iterating over a different object

In get_state, you use for i in Commodity in various places.

I think it would be more natural to iterate over the other object you are considering (self.hand or self._game.market).

For instance:

 market_costly = sum([self._game.market[i] for i in Commodity if Commodity.is_costly(i)])
 market_non_costly = sum([self._game.market[i] for i in Commodity if (not Commodity.is_costly(i)) and (not i == Commodity.CAMEL)])
 market_camel = sum([self._game.market[i] for i in Commodity if i == Commodity.CAMEL])

would become

 market_costly = sum(n for c, n in self._game.market.items() if Commodity.is_costly(c))
 market_non_costly = sum(n for c, n in self._game.market.items() if not Commodity.is_costly(c) and i != Commodity.CAMEL)
 market_camel = sum(n for c, n in self._game.market.items() if i == Commodity.CAMEL)

Even though that last line can be simplified considerably:

 market_camel = self._game.market[Commodity.CAMEL]
answered Nov 23, 2019 at 13:32
\$\endgroup\$
4
\$\begingroup\$

I think your code too good to be True! I hope to code like you one day!

Even so, there always exists improvements, no matter how few.

Bug

In the save_values function, there is a line os.remove(f) which raises an error, but is caught by the except statement, which just moves on to the next line of the code. Is import os statement missing? I believe so.

Improvements

I don't believe there are any improvements for make_optimal_move, but I will add them if I find any.


from enum import Enum, IntEnum, unique
import numpy as np

Enum and np is not used. Do you plan to use them later? Removing it is your wish.


In class Agent, signature of method 'Agent.make_move()' doesn't match the signature of base method in the class 'Player'.

The signature of Agent.make_move() is make_move(self, winner, learn) while the signature of class Player is make_move(self, winner, learn=False).

The signatures should always be the same.


Use ternary operators.

In the Agent.make_move function

if learn:
 epsilon = 0.8
else:
 epsilon = 1

can be replaced with

epsilon = 0.8 if learn else 1

and in Agent.reward function

if winner is self.tag:
 R = 1
elif winner is None:
 R = 0
else:
 R = -1

can be replaced with

return 0 if winner is None else 1 if winner is self.tag else -1

According to PEP 8 rules, variables in functions should be lower case.


In Jaipur.pick_commodity,
if sum(self.market.values()) == 0 can be replaced with if not sum(self.market.values())

and you can remove the redundant parenthesis in return (None, 0) and return (picked_commodity, pick_count)

See idiomatic coding.

Also, pick_count = 0 can be removed. It is changed in the if statement or else statement anyway.


In the Player.get_state method, hand is not used, so you can remove it.


Define static methods with @staticmethod

Player.get_possible_trades, Agent.calc_value, and Jaipur.pprint are static.


In Player.trade, you are using == to compare give and take to None. You should always use is instead of == to compare a value and None

Remove the redundant parenthesis in the line if(set(give).intersection(set(take))):

Now,

if len(give) != len(take):
 return 
if len(give) < 2:
 return 
if set(give).intersection(set(take)):
 return

can be replaced with

if len(give) != len(take) or len(give) < 2 or set(give).intersection(set(take)):
 return

If I get more improvements, I'll be sure to edit them in!

AlexV
7,3532 gold badges24 silver badges47 bronze badges
answered Nov 22, 2019 at 13:35
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.