Return to Question

Notice removed Draw attention by Community Bot

occurred Nov 30, 2019 at 14:02

Bounty Ended with SylvainD's answer chosen by Community Bot

occurred Nov 30, 2019 at 14:02

added 264 characters in body; edited tags; edited title

Source Link

edited Nov 25, 2019 at 12:05

Thirupathi Thangavel

edited Nov 25, 2019 at 12:05

Thirupathi Thangavel

System design - Jaipur board game learning agent

Edit:

I'm mainly interested in a review of the system design, the way objects are structured & handled, the way Jaipur object is modified from within Player class (in the make_optimal_move method) when Jaipur itself contains Player objects.

agent_jaipur.py

Jaipur board game learning agent

agent_jaipur.py

System design - Jaipur board game learning agent

Edit:

agent_jaipur.py

Notice added Draw attention by Thirupathi Thangavel

occurred Nov 22, 2019 at 12:01

Bounty Started worth 150 reputation by Thirupathi Thangavel

occurred Nov 22, 2019 at 12:01

Tweeted twitter.com/StackCodeReview/status/1196986635222695937

occurred Nov 20, 2019 at 3:00

Trim fluff, improve formatting, add github link from comments

Source Link

edited Nov 19, 2019 at 22:23

Ethan Bierlein

edited Nov 19, 2019 at 22:23

Ethan Bierlein

15.9k
4
60
146

Previous question: Implementing simplified Jaipur board gameLink to my previous question.

I've used some oopsOOP concepts, but I'm not sure if that's the standardised way to do itI've used them appropriately.

All suggestions are welcomeThe GitHub repository can be found here.

Previous question: Implementing simplified Jaipur board game

I've used some oops concepts, but I'm not sure if that's the standardised way to do it.

All suggestions are welcome.

Link to my previous question.

I've used some OOP concepts, but I'm not sure if I've used them appropriately.

The GitHub repository can be found here.

Source Link

asked Nov 19, 2019 at 21:43

Thirupathi Thangavel

asked Nov 19, 2019 at 21:43

Thirupathi Thangavel

Jaipur board game learning agent

Previous question: Implementing simplified Jaipur board game

I've implemented the other features of the board game, and made a simple learning method that stores state values and makes optimal moves based on that.

I've used some oops concepts, but I'm not sure if that's the standardised way to do it.

agent_jaipur.py

import random
from enum import Enum, IntEnum, unique
from itertools import cycle, combinations, product
from collections import Counter
import numpy as np
import copy
import pickle
state_values = dict()
@unique
class Commodity(IntEnum):
 CAMEL = 0
 LEATHER = 1
 SPICE = 2
 SILK = 3
 SILVER = 4
 GOLD = 5
 DIAMOND = 6
 @classmethod
 def is_costly(self, commodity):
 return commodity in [self.DIAMOND, self.GOLD, self.SILVER]
class Jaipur:
 def __init__(self, player1_type, player2_type, muted=False):
 self.muted = muted
 self.price_tokens = {
 Commodity.DIAMOND: [5, 5, 5, 7, 7],
 Commodity.GOLD: [5, 5, 5, 6, 6], 
 Commodity.SILVER: [5, 5, 5, 5, 5], 
 Commodity.SILK: [1, 1, 2, 2, 3, 3, 5], 
 Commodity.SPICE: [1, 1, 2, 2, 3, 3, 5], 
 Commodity.LEATHER: [1, 1, 1, 1, 1, 1, 2, 3, 4], 
 }
 self._pile = [Commodity.DIAMOND] * 6 + [Commodity.GOLD] * 6 + [Commodity.SILVER] * 6 + \
 [Commodity.SILK] * 8 + [Commodity.SPICE] * 8 + [Commodity.LEATHER] * 10 + \
 [Commodity.CAMEL] * 8
 random.shuffle(self._pile)
 self.market = Counter()
 for i in Commodity:
 self.market[i] = 0
 self.market[Commodity.CAMEL] = 3
 for i in range(2):
 self.market[self._pile.pop()] += 1
 self._player1 = player1_type(tag='P1', game=self)
 self._player2 = player2_type(tag='P2', game=self)
 for i in range(5):
 for _player in self._player1, self._player2:
 commodity = self._pile.pop()
 if commodity == Commodity.CAMEL:
 _player.camel_count += 1
 else:
 _player.hand[commodity] += 1
 self.winner = None
 self._players_gen = cycle([self._player1, self._player2]) 
 self.player_turn = next(self._players_gen)
 def pile_size(self):
 return len(self._pile)
 def pick_commodity(self, commodity=None):
 if sum(self.market.values()) == 0:
 return (None, 0)
 if commodity is not None and self.market[commodity] > 0:
 picked_commodity = commodity
 else:
 market_list = []
 for c in self.market:
 if self.market[c] > 0:
 market_list += [c] * self.market[c]
 picked_commodity = random.choice(market_list)
 
 pick_count = 0
 # When player takes camel, all camels in market must be taken
 if picked_commodity == Commodity.CAMEL:
 market_camels = self.market[Commodity.CAMEL]
 pick_count = market_camels 
 self.market[Commodity.CAMEL] = 0
 for i in range(market_camels):
 if self._pile:
 self.market[self._pile.pop()] += 1
 else:
 pick_count = 1
 self.market[picked_commodity] -= 1
 if self._pile:
 self.market[self._pile.pop()] += 1
 return (picked_commodity, pick_count)
 def pprint(self, s, c):
 print(s, end=' ')
 for i in c.keys():
 if c[i] > 0:
 print('%s: %d,'%(i, c[i]), end=' ')
 print()
 def print_game(self):
 if self.muted:
 return
 print('price_tokens: ', self.price_tokens.values())
 print('pile size:', self.pile_size())
 self.pprint('market: ', self.market)
 self.pprint('P1 hand: ', self._player1.hand)
 self.pprint('P2 hand: ', self._player2.hand)
 print('P1 camels:', self._player1.camel_count)
 print('P2 camels:', self._player2.camel_count)
 print('P1 tokens: ', self._player1.tokens)
 print('P2 tokens: ', self._player2.tokens)
 print('P1 score:', self._player1.score())
 print('P2 score:', self._player2.score())
 print('Winner is', self.winner)
 print()
 def play_game(self, learn, muted=False):
 self.muted = muted
 print('----------------- GAME STARTED -------------------')
 self.print_game()
 while self.winner is None:
 if not self.muted:
 print('---------------------', self.player_turn.tag, ' turn', '---------------------')
 self.print_game()
 self = self.switch_player(learn)
 self.game_winner()
 else:
 print('----------------- GAME ENDED -------------------')
 self.print_game()
 print('P1 final score:', self._player1.final_score)
 print('P2 final score:', self._player2.final_score)
 print()
 
 if isinstance(self._player1, Agent):
 self._player1.learn_state(self._player1.get_state(), self.winner)
 
 if isinstance(self._player2, Agent):
 self._player2.learn_state(self._player2.get_state(), self.winner)
 return self.winner
 def switch_player(self, learn):
 self = self.player_turn.make_move(self.winner, learn)
 self.player_turn = next(self._players_gen)
 return self
 def game_winner(self):
 # End game if 3 resources are sold completely
 # Or if market goes less than 5
 if len(['empty' for i in self.price_tokens.values() if not i]) >= 3 or (sum(self.market.values()) < 5):
 self._player1.final_score = self._player1.score()
 self._player2.final_score = self._player2.score()
 if self._player1.camel_count > self._player2.camel_count:
 self._player1.final_score += 5
 elif self._player1.camel_count < self._player2.camel_count:
 self._player2.final_score += 5
 if self._player1.final_score > self._player2.final_score:
 self.winner = self._player1.tag
 elif self._player1.final_score < self._player2.final_score:
 self.winner = self._player2.tag
 else:
 self.winner = self._player2.tag #TODO
 return self.winner
class Player:
 def __init__(self, tag, game):
 self.tag = tag
 self.camel_count = 0
 self.hand = Counter()
 for i in Commodity:
 self.hand[i] = 0
 self.tokens = []
 self.final_score = 0
 self._game = game
 self.prev_state = self.get_state()
 def hand_size(self):
 return sum(self.hand.values())
 def score(self):
 return sum(self.tokens)
 def get_state(self): #TODO
 #return tuple((self.hand_size(), self.camel_count))
 score = self.score() // 10
 pile_size = self._game.pile_size() // 5
 camel = self.camel_count // 4
 # hand = tuple(self.hand.items())
 hand = tuple(self.hand[i] for i in Commodity)
 hand_size = self.hand_size()
 # market = tuple(self._game.market.items())
 market_costly = sum([self._game.market[i] for i in Commodity if Commodity.is_costly(i)])
 market_non_costly = sum([self._game.market[i] for i in Commodity if (not Commodity.is_costly(i)) and (not i == Commodity.CAMEL)])
 market_camel = sum([self._game.market[i] for i in Commodity if i == Commodity.CAMEL])
 market = (market_costly, market_non_costly, market_camel)
 state = tuple((score, pile_size, hand_size, camel, market))
 return state
 def get_possible_trades(self, give_commodities, take_commodities):
 # print('give commodities', give_commodities)
 # print('take commodities', take_commodities)
 if len(give_commodities) < 2 or len(take_commodities) < 2:
 return []
 give_commodities = sorted(give_commodities)
 take_commodities = sorted(take_commodities)
 possible_trades = []
 for trade_size in range(2, min(len(give_commodities), len(take_commodities)) + 1):
 give_subsets = set(combinations(give_commodities, trade_size))
 take_subsets = set(combinations(take_commodities, trade_size))
 all_combinations = product(give_subsets, take_subsets)
 for give, take in all_combinations:
 if len(set(give).intersection(set(take))) == 0:
 possible_trades += [(give, take)]
 # print('possible trades')
 # for i in possible_trades:
 # print(i[0])
 # print(i[1])
 # print()
 return possible_trades
 def get_all_moves(self):
 moves = [0, 1, 2] # TAKE, SELL, TRADE
 take_commodities = [i for i in self._game.market if self._game.market[i] > 0]
 sell_commodities = [i for i in self.hand if (self.hand[i] > 1) or (not Commodity.is_costly(i) and self.hand[i] > 0)]
 all_moves = []
 if self.hand_size() < 7:
 all_moves += [(moves[0], i) for i in take_commodities]
 all_moves += [(moves[1], i) for i in sell_commodities]
 trade_give_commodities = []
 for i in self.hand:
 trade_give_commodities += [i] * self.hand[i]
 trade_give_commodities += [Commodity.CAMEL] * self.camel_count
 trade_take_commodities = []
 for i in self._game.market:
 if i != Commodity.CAMEL:
 trade_take_commodities += [i] * self._game.market[i]
 # TODO Enable trading 
 # possible_trades = self.get_possible_trades(trade_give_commodities, trade_take_commodities)
 # all_moves += [(moves[2], i) for i in possible_trades]
 return all_moves
 def take(self, commodity=None):
 # self._game.pprint('before taking:', self.hand)
 if not self._game.muted:
 print('taking..', commodity)
 if self.hand_size() < 7:
 taken, take_count = self._game.pick_commodity(commodity)
 if taken == Commodity.CAMEL:
 self.camel_count += take_count
 else:
 self.hand[taken] += take_count
 # self._game.pprint('after taking:', self.hand)
 def sell(self, commodity=None, count=0):
 # print('before selling..', self.tokens)
 if not self._game.muted:
 print('selling..', commodity)
 if commodity is None:
 commodity = self.hand.most_common(1)[0][0]
 if ((not Commodity.is_costly(commodity)) and self.hand[commodity] > 0) or self.hand[commodity] > 1:
 count = self.hand[commodity] # TODO As of now sell all cards of this type
 for i in range(count):
 if self._game.price_tokens[commodity]:
 self.tokens.append(self._game.price_tokens[commodity].pop())
 self.hand[commodity] -= count
 if count == 3:
 self.tokens.append(random.randint(1, 4))
 elif count == 4:
 self.tokens.append(random.randint(4, 7))
 elif count >= 5:
 self.tokens.append(random.randint(7, 11))
 # print('after selling...', self.tokens)
 def trade(self, give=None, take=None):
 # if not self._game.muted:
 # print('trading..', (give, take))
 if give == None or take == None:
 return
 
 if len(give) != len(take):
 return 
 if len(give) < 2:
 return 
 if(set(give).intersection(set(take))):
 return
 give = Counter(give)
 take = Counter(take)
 self.hand -= give
 self._game.market += give
 self._game.market -= take
 self.hand += take
 self.camel_count -= give[Commodity.CAMEL]
 
 def make_move(self, winner, learn=False):
 all_moves = self.get_all_moves()
 # for i, move in enumerate(all_moves):
 # print(i, move)
 # move = int(input('Choose move..'))
 move = random.choice(all_moves)
 if move[0] == 0:
 self.take(move[1])
 elif move[0] == 1:
 self.sell(move[1])
 elif move[0] == 2:
 self.trade(move[1][0], move[1][1])
 return self._game
class Agent(Player):
 def __init__(self, tag, game):
 super().__init__(tag, game)
 def make_move(self, winner, learn):
 if learn:
 self.learn_state(self.get_state(), winner)
 
 if learn:
 epsilon = 0.8
 else:
 epsilon = 1
 p = random.uniform(0, 1)
 if p < epsilon:
 self._game = self.make_optimal_move()
 else:
 super().make_move(winner, learn)
 return self._game
 def make_optimal_move(self):
 opt_self = None
 v = -float('Inf')
 all_moves = self.get_all_moves()
 # print('all_moves')
 # for i in all_moves:
 # print(i)
 for m, c in all_moves:
 temp_self = copy.deepcopy(self)
 if m == 0:
 temp_self.take(c)
 elif m == 1:
 temp_self.sell(c)
 elif m == 2:
 temp_self.trade(c[0], c[1])
 # print('after making move', m, c)
 # temp_self._game.print_game()
 # print()
 
 temp_state = self.get_state()
 v_temp = self.calc_value(temp_state)
 # Encourage exploration
 if v_temp is None:
 v_temp = 1
 if v_temp > v:
 opt_self = copy.deepcopy(temp_self)
 v = v_temp
 elif v_temp == v:
 toss = random.randint(0, 1)
 if toss == 1:
 opt_self = copy.deepcopy(temp_self)
 self = copy.deepcopy(opt_self)
 # print('Optimal self')
 # opt_self._game.print_game()
 # print()
 # print('After making optimal move')
 # self._game.print_game()
 return self._game
 def calc_value(self, state):
 global state_values
 if state in state_values.keys():
 return state_values[state]
 def learn_state(self, state, winner):
 global state_values
 # if winner is not None:
 # state_values[state] = self.reward(winner)
 if self.prev_state in state_values.keys():
 v_s = state_values[self.prev_state]
 else:
 v_s = int(0)
 R = self.reward(winner)
 if state in state_values.keys() and winner is None:
 v_s_tag = state_values[state]
 else:
 v_s_tag = int(0)
 state_values[self.prev_state] = v_s + 0.5 * (R + v_s_tag - v_s)
 self.prev_state = state
 def reward(self, winner):
 if winner is self.tag:
 R = 1
 elif winner is None:
 R = 0
 else:
 R = -1
 return R
def load_values():
 global state_values
 try:
 f = open('state_values.pickle', 'rb')
 state_values = pickle.load(f)
 except:
 state_values = dict()
def save_values():
 global state_values
 f = open('state_values.pickle', 'wb')
 try:
 os.remove(f)
 except:
 pass
 pickle.dump(state_values, f)
def play_to_learn(episodes, muted=True):
 load_values()
 print(len(state_values))
 for i in range(episodes):
 print('Episode', i)
 game = Jaipur(Agent, Player)
 game.play_game(learn=True, muted=muted)
 game = Jaipur(Player, Agent)
 game.play_game(learn=True, muted=muted)
 if i % 1000 == 0:
 save_values()
 save_values()
 
 print(len(state_values))
 count = 0
 for i in state_values:
 if state_values[i] not in (-0.5, 0, 0.5):
 print(i, state_values[i])
 count += 1
 print(count)
 # print(state_values)
def test(n=100):
 load_values()
 # print('----------------------------------------------------------------- Agent vs Agent')
 # ava_p1_wins = 0
 # for i in range(n):
 # game = Jaipur(Agent, Agent)
 # winner = game.play_game(learn=False, muted=True)
 # if winner == 'P1':
 # ava_p1_wins += 1
 print('----------------------------------------------------------------- Agent vs Player')
 avp_p1_wins = 0
 for i in range(n):
 game = Jaipur(Agent, Player)
 winner = game.play_game(learn=False, muted=True)
 if winner == 'P1':
 avp_p1_wins += 1
 print('----------------------------------------------------------------- Player vs Agent')
 pva_p1_wins = 0
 for i in range(n):
 game = Jaipur(Player, Agent)
 winner = game.play_game(learn=False, muted=True)
 if winner == 'P1':
 pva_p1_wins += 1
 print('----------------------------------------------------------------- Player vs Player')
 pvp_p1_wins = 0
 for i in range(n):
 game = Jaipur(Player, Player)
 winner = game.play_game(learn=False, muted=True)
 if winner == 'P1':
 pvp_p1_wins += 1
 print('----------------------------------------------------------------- Result')
# print('----------------------------------------------------------------- Agent vs Agent')
# print('Total:', n)
# print('P1:', ava_p1_wins)
# print('P2:', n - ava_p1_wins)
 print('----------------------------------------------------------------- Agent vs Player')
 print('Total:', n)
 print('P1:', avp_p1_wins)
 print('P2:', n - avp_p1_wins)
 print('----------------------------------------------------------------- Player vs Agent')
 print('Total:', n)
 print('P1:', pva_p1_wins)
 print('P2:', n - pva_p1_wins)
 print('----------------------------------------------------------------- Player vs Player')
 print('Total:', n)
 print('P1:', pvp_p1_wins)
 print('P2:', n - pvp_p1_wins)
def play():
 # play_to_learn(10000, muted=True)
 game = Jaipur(Player, Agent)
 game.play_game(learn=False, muted=False)
 test()
if __name__ == "__main__":
 play()

All suggestions are welcome.

python python-3.x object-oriented game

lang-py