1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
| import math import random import copy from abc import ABC, abstractmethod
class GameState(ABC): @abstractmethod def get_legal_actions(self): pass @abstractmethod def apply_action(self, action): pass @abstractmethod def is_terminal(self): pass
class TicTacToe(GameState): def __init__(self, board=None, current_player=1): self.board = board if board is not None else [0] * 9 self.current_player = current_player
def get_legal_actions(self): """返回所有空位的索引""" return [i for i, x in enumerate(self.board) if x == 0]
def apply_action(self, action): """落子,并返回切换了玩家的新状态""" new_board = copy.deepcopy(self.board) new_board[action] = self.current_player return TicTacToe(new_board, current_player=-self.current_player)
def get_winner(self): """检查是否有玩家获胜,返回 1, -1, 或 None""" lines = [ (0, 1, 2), (3, 4, 5), (6, 7, 8), (0, 3, 6), (1, 4, 7), (2, 5, 8), (0, 4, 8), (2, 4, 6) ] for a, b, c in lines: if self.board[a] != 0 and self.board[a] == self.board[b] == self.board[c]: return self.board[a] return None
def is_terminal(self): """判断游戏是否结束(有人赢了,或者没空位了)""" return self.get_winner() is not None or len(self.get_legal_actions()) == 0
def get_result(self, player): """ 核心方法:返回特定 player 在当前终局下的得分。 赢=1.0,输=0.0,平局=0.5 """ winner = self.get_winner() if winner is None: return 0.5 if winner == player: return 1.0 else: return 0.0
def __str__(self): symbols = {1: 'X', -1: 'O', 0: '.'} res = "" for i in range(3): res += " ".join([symbols[self.board[i*3 + j]] for j in range(3)]) + "\n" return res
class MCTSNode: def __init__(self, state: TicTacToe, parent=None, action=None): self.state = state self.parent = parent self.action = action self.children = [] self.untried_actions = state.get_legal_actions() self.visits = 0 self.wins = 0.0
def is_fully_expanded(self): return len(self.untried_actions) == 0
def best_child(self, c_param=1.414): """使用 UCT 公式选择最优子节点""" choices_weights = [] for child in self.children: exploit = child.wins / child.visits explore = c_param * math.sqrt((2 * math.log(self.visits) / child.visits)) choices_weights.append(exploit + explore) return self.children[choices_weights.index(max(choices_weights))]
def mcts_search(root_state: TicTacToe, iterations: int = 2000): root_node = MCTSNode(state=root_state)
for _ in range(iterations): node = root_node
while node.is_fully_expanded() and not node.state.is_terminal(): node = node.best_child()
if not node.state.is_terminal(): action = random.choice(node.untried_actions) node.untried_actions.remove(action) next_state = node.state.apply_action(action) child_node = MCTSNode(state=next_state, parent=node, action=action) node.children.append(child_node) node = child_node
current_rollout_state = node.state while not current_rollout_state.is_terminal(): possible_actions = current_rollout_state.get_legal_actions() action = random.choice(possible_actions) current_rollout_state = current_rollout_state.apply_action(action)
while node is not None: node.visits += 1 if node.parent is not None: player_who_made_the_move = node.parent.state.current_player node.wins += current_rollout_state.get_result(player_who_made_the_move) node = node.parent
best_final_node = max(root_node.children, key=lambda c: c.visits) return best_final_node.action
def play_game(): game = TicTacToe() print("=====================================") print("欢迎来到 MCTS 井字棋对战!") print("AI 执 X (先手),你执 O (后手)。") print("棋盘的位置索引 (0-8) 如下所示:") print(" 0 | 1 | 2 ") print("---+---+---") print(" 3 | 4 | 5 ") print("---+---+---") print(" 6 | 7 | 8 ") print("=====================================\n") print("初始棋盘:") print(game)
while not game.is_terminal(): if game.current_player == 1: print("AI (MCTS) 正在思考中... (这可能需要一两秒)") best_move = mcts_search(game, iterations=2000) print(f"AI 落子在位置: {best_move}\n") game = game.apply_action(best_move) else: legal_moves = game.get_legal_actions() while True: try: move = input(f"轮到你了,请输入你要落子的位置 {legal_moves}: ") move = int(move) if move in legal_moves: break else: print("⚠ 错误:该位置已有棋子或超出范围,请重新输入。") except ValueError: print("⚠ 错误:请输入一个有效的数字。") print(f"你落子在位置: {move}\n") game = game.apply_action(move)
print(game)
print("=====================================") winner = game.get_winner() if winner == 1: print("🤖 游戏结束:AI (X) 获胜!(MCTS 确实很难被击败哦)") elif winner == -1: print("🎉 游戏结束:恭喜你 (O) 获胜!(这说明你找到了算法的盲区!)") else: print("🤝 游戏结束:平局!(面对完美的对手,平局已经是最好的结果了)") print("=====================================")
if __name__ == "__main__": play_game()
|