当前位置:   article > 正文

蒙特卡洛树算法实现mini AlphaGo Reversi_黑白棋(mini alphago) 代码

黑白棋(mini alphago) 代码

本文代码参考蒙特卡洛树实现黑白棋

设计思想

采用蒙特卡洛树算法,即采用选择-拓展-模拟-反向传播,获取下棋的近似最优解。

代码内容

  1. from copy import deepcopy
  2. import math
  3. import random
  4. class AIPlayer:
  5. """
  6. AI 玩家
  7. """
  8. def __init__(self, color):
  9. """
  10. 玩家初始化
  11. :param color: 下棋方,'X' - 黑棋,'O' - 白棋
  12. """
  13. self.color = color #下棋方
  14. self.max_times = 200 #最大迭代次数
  15. self.c = 1.2 #UCB值中的参数c
  16. def get_move(self, board):
  17. """
  18. 根据当前棋盘状态获取最佳落子位置
  19. :param board: 棋盘
  20. :return: action 最佳落子位置, e.g. 'A1'
  21. """
  22. if self.color == 'X':
  23. player_name = '黑棋'
  24. else:
  25. player_name = '白棋'
  26. print("请等一会,对方 {}-{} 正在思考中...".format(player_name, self.color))
  27. board_state = deepcopy(board) #深拷贝棋盘,因为算法会改变棋盘状态
  28. root = Node(state = board_state, color=self.color)
  29. action = self.mcts(root) #蒙特卡洛树搜索
  30. return action
  31. #蒙特卡洛树搜索
  32. def mcts(self, root):
  33. for t in range(self.max_times):
  34. leave_node = self.select_expand_node(root) #选择并拓展
  35. reward = self.simulate(leave_node) #模拟
  36. self.back_propagate(leave_node, reward) #反向传播
  37. best_child = self.select(root, 0) #选择胜率最大的子节点
  38. return best_child.action
  39. #选择并拓展
  40. def select_expand_node(self, node):
  41. while not self.game_over(node.state): #当前节点不是叶子节点
  42. if len(node.children) == 0: #当前节点没有被拓展过
  43. return self.expand(node) #拓展任意节点
  44. else:
  45. if not node.full_expand(): #当前节点没有被完全拓展
  46. return self.expand(node)
  47. else:
  48. node = self.select(node, self.c) #若被完全拓展,继续选择节点
  49. return node
  50. #选择
  51. def select(self, node, c):
  52. max_ucb = -float('inf')
  53. select_children = []
  54. #在所有的子节点中,选择UCB值最大的子节点
  55. for child in node.children:
  56. if child.visits == 0:
  57. select_children = [child]
  58. break
  59. ucb = child.reward / child.visits + c * math.sqrt(2.0 * math.log(node.visits) / float(child.visits))
  60. if ucb == max_ucb:
  61. select_children.append(child)
  62. if ucb > max_ucb:
  63. select_children = [child]
  64. max_ucb = ucb
  65. if len(select_children) == 0:
  66. return node.parent
  67. return random.choice(select_children) #如果有多个UCB值相同的子节点,选择任意一个
  68. #拓展
  69. def expand(self, node):
  70. #下一步合法行动
  71. legal_actions = list(node.state.get_legal_actions(node.color))
  72. if len(legal_actions) == 0: #如果没有合法行动
  73. return node.parent
  74. action = random.choice(legal_actions) #随机选择一个合法行动
  75. expanded_actions = [child.action for child in node.children]
  76. while action in expanded_actions: #选择一个没有被拓展过的子节点
  77. action = random.choice(legal_actions)
  78. #下棋并更新棋盘
  79. new_state = deepcopy(node.state)
  80. new_state._move(action, node.color)
  81. #子节点下棋方相反
  82. if node.color == 'X':
  83. new_color = 'O'
  84. else:
  85. new_color = 'X'
  86. #新建节点
  87. node.add_child(new_state, action=action, color=new_color)
  88. return node.children[-1] #返回最新拓展的节点
  89. #模拟并返回奖励
  90. def simulate(self, node):
  91. board = deepcopy(node.state) #深拷贝棋盘,因为模拟会改变棋盘
  92. color = node.color
  93. count = 0
  94. while not self.game_over(board): #模拟到游戏结束为止
  95. legal_actions = list(node.state.get_legal_actions(color))
  96. if not len(legal_actions) == 0: #若有合法行动
  97. action = random.choice(legal_actions) #任意选择一个合法行动
  98. board._move(action, color)
  99. count = count + 1
  100. #交换下棋方
  101. if color == 'X':
  102. color = 'O'
  103. else:
  104. color = 'X'
  105. if count >= 50: #最大模拟次数
  106. break
  107. winner, win_count = board.get_winner() #获得赢家和领先几个棋子
  108. #赢家+10分,每领先一个棋子多1分
  109. reward = 0
  110. if not winner == 2:
  111. reward = 10 + win_count
  112. if (self.color == 'X' and winner == 1) or (self.color == 'O' and winner == 0):
  113. reward = - reward
  114. return reward
  115. #反向传播
  116. def back_propagate(self, node, reward):
  117. while node is not None:
  118. node.visits += 1
  119. if node.color == self.color:
  120. node.reward += reward
  121. else:
  122. node.reward -= reward
  123. node = node.parent
  124. return 0
  125. #判断游戏是否结束
  126. def game_over(self, state):
  127. #如果两个选手都没有合法的下棋位置,则比赛停止
  128. now_loc = list(state.get_legal_actions('X'))
  129. next_loc = list(state.get_legal_actions('O'))
  130. over = len(now_loc) == 0 and len(next_loc) == 0 #返回值 True/False
  131. return over

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/我家小花儿/article/detail/612502
推荐阅读
相关标签
  

闽ICP备14008679号