Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit f4192fe

Browse files
author
YuHang
committed
GTP use NN direct prediction
1 parent cb05575 commit f4192fe

File tree

2 files changed

+30
-18
lines changed

2 files changed

+30
-18
lines changed

‎model/APV_MCTS.py‎

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,16 @@ def shift_node(self,move,pos_to_shift=None):
183183

184184
def suggest_move(self, position):
185185

186-
move_prob = self.suggest_move_prob(position)
186+
"""Use MCTS guided by NN"""
187+
#move_prob = self.suggest_move_prob(position)
188+
189+
"""Use direct NN predition (pretty weak)"""
190+
move_probs,value = self.api.run_many(bulk_extract_features([position]))
191+
move_prob = move_probs[0]
192+
idx = np.argmax(move_prob)
193+
greedy_move = divmod(idx,go.N)
194+
prob = move_prob[idx]
195+
logger.debug(f'Greedy move is: {greedy_move} with prob {prob:.3f}')
187196

188197
on_board_move_prob = np.reshape(move_prob[:-1],(go.N,go.N))
189198
if position.n < 30:
@@ -192,8 +201,11 @@ def suggest_move(self, position):
192201
move = select_most_likely(position, on_board_move_prob)
193202

194203
player = 'B' if position.to_play==1 else 'W'
195-
win_rate = self.children[move].Q/2+0.5
196-
logger.info(f'Win rate for player {player} is {win_rate:2f}')
204+
"""Use MCTS guided by NN average win ratio"""
205+
#win_rate = self.children[move].Q/2+0.5
206+
"""Use direct NN value prediction (almost always 50/50)"""
207+
win_rate = value[0,0]/2+0.5
208+
logger.info(f'Win rate for player {player} is {win_rate:.4f}')
197209

198210
return move
199211

@@ -206,12 +218,6 @@ def suggest_move_prob(self, position):
206218
logger.debug(f'Expadning Root Node...')
207219

208220
move_probs,_ = self.api.run_many(bulk_extract_features([position]))
209-
'''
210-
move_prob = move_probs[0]
211-
greedy_move = divmod(np.argmax(move_prob),go.N)
212-
prob = move_prob[np.argmax(move_prob)]
213-
logger.debug(f'Greedy move is: {greedy_move} with prob {prob} at game step {position.n}')
214-
'''
215221
self.position = position
216222
self.expand(move_probs[0])
217223

‎model/APV_MCTS_C.pyx‎

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,16 @@ class MCTSPlayerMixin(object):
183183

184184
def suggest_move(self, position):
185185

186-
move_prob = self.suggest_move_prob(position)
186+
"""Use MCTS guided by NN"""
187+
#move_prob = self.suggest_move_prob(position)
188+
189+
"""Use direct NN predition (pretty weak)"""
190+
move_probs,value = self.api.run_many(bulk_extract_features([position]))
191+
move_prob = move_probs[0]
192+
idx = np.argmax(move_prob)
193+
greedy_move = divmod(idx,go.N)
194+
prob = move_prob[idx]
195+
logger.debug(f'Greedy move is: {greedy_move} with prob {prob:.3f}')
187196

188197
on_board_move_prob = np.reshape(move_prob[:-1],(go.N,go.N))
189198
if position.n < 30:
@@ -192,8 +201,11 @@ class MCTSPlayerMixin(object):
192201
move = select_most_likely(position, on_board_move_prob)
193202

194203
player = 'B' if position.to_play==1 else 'W'
195-
win_rate = self.children[move].Q/2+0.5
196-
logger.info(f'Win rate for player {player} is {win_rate:2f}')
204+
"""Use MCTS guided by NN average win ratio"""
205+
#win_rate = self.children[move].Q/2+0.5
206+
"""Use direct NN value prediction (almost always 50/50)"""
207+
win_rate = value[0,0]/2+0.5
208+
logger.info(f'Win rate for player {player} is {win_rate:.4f}')
197209

198210
return move
199211

@@ -206,12 +218,6 @@ class MCTSPlayerMixin(object):
206218
logger.debug(f'Expadning Root Node...')
207219

208220
move_probs,_ = self.api.run_many(bulk_extract_features([position]))
209-
'''
210-
move_prob = move_probs[0]
211-
greedy_move = divmod(np.argmax(move_prob),go.N)
212-
prob = move_prob[np.argmax(move_prob)]
213-
logger.debug(f'Greedy move is: {greedy_move} with prob {prob} at game step {position.n}')
214-
'''
215221
self.position = position
216222
self.expand(move_probs[0])
217223

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /