赞
踩
本文代码参考蒙特卡洛树实现黑白棋
采用蒙特卡洛树算法,即采用选择-拓展-模拟-反向传播,获取下棋的近似最优解。
- from copy import deepcopy
- import math
- import random
-
- class AIPlayer:
- """
- AI 玩家
- """
- def __init__(self, color):
- """
- 玩家初始化
- :param color: 下棋方,'X' - 黑棋,'O' - 白棋
- """
- self.color = color #下棋方
- self.max_times = 200 #最大迭代次数
- self.c = 1.2 #UCB值中的参数c
-
- def get_move(self, board):
- """
- 根据当前棋盘状态获取最佳落子位置
- :param board: 棋盘
- :return: action 最佳落子位置, e.g. 'A1'
- """
- if self.color == 'X':
- player_name = '黑棋'
- else:
- player_name = '白棋'
- print("请等一会,对方 {}-{} 正在思考中...".format(player_name, self.color))
-
- board_state = deepcopy(board) #深拷贝棋盘,因为算法会改变棋盘状态
- root = Node(state = board_state, color=self.color)
- action = self.mcts(root) #蒙特卡洛树搜索
-
- return action
-
- #蒙特卡洛树搜索
- def mcts(self, root):
- for t in range(self.max_times):
- leave_node = self.select_expand_node(root) #选择并拓展
- reward = self.simulate(leave_node) #模拟
- self.back_propagate(leave_node, reward) #反向传播
- best_child = self.select(root, 0) #选择胜率最大的子节点
- return best_child.action
-
- #选择并拓展
- def select_expand_node(self, node):
- while not self.game_over(node.state): #当前节点不是叶子节点
- if len(node.children) == 0: #当前节点没有被拓展过
- return self.expand(node) #拓展任意节点
- else:
- if not node.full_expand(): #当前节点没有被完全拓展
- return self.expand(node)
- else:
- node = self.select(node, self.c) #若被完全拓展,继续选择节点
- return node
-
- #选择
- def select(self, node, c):
- max_ucb = -float('inf')
- select_children = []
- #在所有的子节点中,选择UCB值最大的子节点
- for child in node.children:
- if child.visits == 0:
- select_children = [child]
- break
- ucb = child.reward / child.visits + c * math.sqrt(2.0 * math.log(node.visits) / float(child.visits))
- if ucb == max_ucb:
- select_children.append(child)
- if ucb > max_ucb:
- select_children = [child]
- max_ucb = ucb
- if len(select_children) == 0:
- return node.parent
- return random.choice(select_children) #如果有多个UCB值相同的子节点,选择任意一个
-
- #拓展
- def expand(self, node):
- #下一步合法行动
- legal_actions = list(node.state.get_legal_actions(node.color))
- if len(legal_actions) == 0: #如果没有合法行动
- return node.parent
- action = random.choice(legal_actions) #随机选择一个合法行动
- expanded_actions = [child.action for child in node.children]
- while action in expanded_actions: #选择一个没有被拓展过的子节点
- action = random.choice(legal_actions)
- #下棋并更新棋盘
- new_state = deepcopy(node.state)
- new_state._move(action, node.color)
- #子节点下棋方相反
- if node.color == 'X':
- new_color = 'O'
- else:
- new_color = 'X'
- #新建节点
- node.add_child(new_state, action=action, color=new_color)
- return node.children[-1] #返回最新拓展的节点
-
- #模拟并返回奖励
- def simulate(self, node):
- board = deepcopy(node.state) #深拷贝棋盘,因为模拟会改变棋盘
- color = node.color
- count = 0
- while not self.game_over(board): #模拟到游戏结束为止
- legal_actions = list(node.state.get_legal_actions(color))
- if not len(legal_actions) == 0: #若有合法行动
- action = random.choice(legal_actions) #任意选择一个合法行动
- board._move(action, color)
- count = count + 1
- #交换下棋方
- if color == 'X':
- color = 'O'
- else:
- color = 'X'
- if count >= 50: #最大模拟次数
- break
- winner, win_count = board.get_winner() #获得赢家和领先几个棋子
- #赢家+10分,每领先一个棋子多1分
- reward = 0
- if not winner == 2:
- reward = 10 + win_count
- if (self.color == 'X' and winner == 1) or (self.color == 'O' and winner == 0):
- reward = - reward
- return reward
-
- #反向传播
- def back_propagate(self, node, reward):
- while node is not None:
- node.visits += 1
- if node.color == self.color:
- node.reward += reward
- else:
- node.reward -= reward
- node = node.parent
- return 0
-
- #判断游戏是否结束
- def game_over(self, state):
- #如果两个选手都没有合法的下棋位置,则比赛停止
- now_loc = list(state.get_legal_actions('X'))
- next_loc = list(state.get_legal_actions('O'))
- over = len(now_loc) == 0 and len(next_loc) == 0 #返回值 True/False
- return over
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。