minimax_tictactoe.py

"""
 minimax_tictactoe.py

 A demonstration of the minimax algorithm 
 which searches the tree of possible moves
 in a two player game - tic tac toe in this case.

 See for example https://en.wikipedia.org/wiki/Minimax .

 The interesting game search pieces are
    minimax(board)       starts move search; calls max_value or min_value
    max_value(board)     calls min_value
    min_value(board)     calls max_value

 An older version of this code used alpha/beta search
 (minmax in one function, with tree pruning) 
 which is faster but harder to understand. I've left
 the code for that here but commented it out.

 Sample session :

    $ ./tictactoe.py
     === Tic Tac Toe === 
     Inputs may be abbreviated by their first letter. 
     Type 'quit' at any prompt to quit. 
     X always goes first.
    
     == Playing a new game. ==
     Player X is user, random, or smart ? smart
     Player O is user, random, or smart ? random
    
         |   |            a | b | c 
      ---+---+---        ---+---+---
         |   |            d | e | f 
      ---+---+---        ---+---+---
         |   |            g | h | i 
    
     SmartPlayer X chooses i.
    
         |   |            a | b | c 
      ---+---+---        ---+---+---
         |   |            d | e | f 
      ---+---+---        ---+---+---
         |   | X          g | h | i 
    
     RandomPlayer O chooses g.
    
         |   |            a | b | c 
      ---+---+---        ---+---+---
         |   |            d | e | f 
      ---+---+---        ---+---+---
       O |   | X          g | h | i 
    
     SmartPlayer X chooses a.
    
       X |   |            a | b | c 
      ---+---+---        ---+---+---
         |   |            d | e | f 
      ---+---+---        ---+---+---
       O |   | X          g | h | i 
    
     RandomPlayer O chooses c.
    
       X |   | O          a | b | c 
      ---+---+---        ---+---+---
         |   |            d | e | f 
      ---+---+---        ---+---+---
       O |   | X          g | h | i 
    
     SmartPlayer X chooses e.
    
       X |   | O          a | b | c 
      ---+---+---        ---+---+---
         | X |            d | e | f 
      ---+---+---        ---+---+---
       O |   | X          g | h | i 
    
     X wins!
    
     == Playing a new game. ==
     Player X is user, random, or smart ? quit
     Bye. 
  
 history :
   * Oct 2006 : created with simple syntax for an intro programming course
   * Feb 2010 : cleaned up; alphabeta search added for programming workshop
   * Mar 2019 : converted python2.x to python3.5; 
                added minimax code - cruder but simpler than alphabeta
   * Dec 2019 : minor edits; commented out alphabeta

 Jim Mahoney | cs.marlboro.college | MIT License
"""

#
# Board coordinates are(row,column) points           0,0 | 0,1 | 0,2
#                                                   -----+-----+-----
#                                                    1,0 | 1,1 | 1,2
#                                                   -----+-----+-----
#                                                    2,0 | 2,1 | 2,2
#
# or letters for easier user input.                  a | b | c
#                                                   ---+---+---
#                                                    d | e | f
#                                                   ---+---+---
#                                                    g | h | i
#
# The symbols on the board are
#   'X'    first player,
#   'O'    second player, or
#   ' '    empty.
#

import sys, random
infinity = float('inf')

def letter2point(letter):
    """ Convert a letter 'a'...'i' to a point (0,0) to (2,2). 
        >>> letter2point('a')
        (0, 0)
        >>> letter2point('b')
        (0, 1)
        >>> letter2point('d')
        (1, 0)
    """
    zeroToEight = ord(letter) - ord('a')
    assert(0 <= zeroToEight <= 8)
    row = int(zeroToEight / 3)      #  0, 0, 0, 1, 1, 1, 2, 2, 2
    column = zeroToEight % 3        #  0, 1, 2, 0, 1, 2, 0, 1, 2
    return (row, column)

def point2letter(point):
    """ Convert a point (0,0) to (2,2) to a letter 'a' ... 'i'. 
        >>> point2letter((0,0))
        'a'
        >>> point2letter((2,2))
        'i'
    """
    (row, column) = point
    return chr(ord('a') + row*3 + column)

def quit():
    print(" Bye. ")
    sys.exit()

def ask(question, legalResponses=()):
    """ Get and return a user entered string.
        If the first letter is 'q' (for 'quit'), quit the program.
        A list of legal responses (first letters) may be specified;
        if so, keep asking until one of those is seen. """
    while True:
        try:
            answer = input(question)
        except EOFError:                 # user types control-d
            quit()
        if (answer):
            firstChar = answer[0]
        else:
            firstChar = ""
        if (firstChar == 'q'):
            quit()
        elif (firstChar in legalResponses or not legalResponses):
            return answer
        else:
            print("  Oops: first letter isn't in {}; please try again.".format(
                str(legalResponses + ('q',))))

def minimax(board):
    """ Return best move using minimax algorithm """
    values_moves = []
    for move in board.possibleMoves():
        board.doMove(move)
        if board.whoseMove == 'X':
            value = max_value(board)
        else:
            value = min_value(board)
        board.undoMove(move)
        values_moves.append((value, move))
    if board.whoseMove == 'X':
        (best_value, best_move) = max(values_moves)
    else:
        (best_value, best_move) = min(values_moves)
    return best_move

def max_value(board):
    """ Return value of position if next move is X  """
    # mututal recursion with min_value
    if board.isTerminalPosition():
        return board.value()
    else:
        values = []
        for move in board.possibleMoves():
            board.doMove(move)
            values.append(min_value(board))
            board.undoMove(move)
        return max(values)

def min_value(board):
    """ Return value of position if next move is O """
    if board.isTerminalPosition():
        return board.value()
    else:
        values = []
        for move in board.possibleMoves():
            board.doMove(move)
            values.append(max_value(board))
            board.undoMove(move)
        return min(values)

class RandomPlayer:
    """ A computer player that makes random moves. """

    def __init__(self, symbol, verbose=True):
        """ symbol is X (first player) or O (second player) """
        self.symbol = symbol
        self.verbose = verbose

    def printMove(self, move):
        if self.verbose:
            print(" {} {} chooses {}.\n".format(
                self.__class__.__name__, self.symbol, point2letter(move)))

    def getMove(self, board):
        """ Return this player's next move as a (row,column) point. """
        move = random.choice(board.possibleMoves())
        self.printMove(move)
        return move

class InteractivePlayer (RandomPlayer):
    """ A person typing in moves from the console. """
    legalLetters = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i')

    def getMove(self, board):
        """ Return the (row,column) point taken from user's input. """
        while True:
            prompt = " %s : Where do you want to move? " % self.symbol
            move = letter2point(ask(prompt, self.legalLetters))
            if (board[move] != ' '):
                print("  Oops: that place is occupied.  Try again.")
            else:
                print()
                return move

class SmartPlayer (RandomPlayer):
    """ A smart computer player using minimax search. """
    gameStartRandomMoves = 1     # for variety on opening move(s)

    def getMove(self, board):
        nMoves = board.nMovesMade()
        if nMoves < self.gameStartRandomMoves:
            return RandomPlayer.getMove(self, board)
        else:
            return minimax(board)
        
class Board (object):
    """ The game implementation: board, moves, and search engine.
        >>> board = Board()
        >>> board[(1,1)]         # board access with a point ...
        ' '
        >>> board[1,1]           # ... or with two integers.
        ' '
        >>> board.doMove((1,1))  # 'X' (always first player)
        >>> board[1,1]
        'X'
        >>> board.getWinner()
        '?'
        >>> board.isTerminalPosition()
        False
        >>> print(board)
             |   |            a | b | c 
          ---+---+---        ---+---+---
             | X |            d | e | f 
          ---+---+---        ---+---+---
             |   |            g | h | i 
        >>> board.doMove((0,1)) # 'O'    Again, argument can be point ...
        >>> board.doMove(2,2)   # 'X'    ... or two integers.
        >>> board.doMove(1,0)   # 'O'
        >>> board.doMove(0,0)   # 'X'
        >>> print(board)
           X | O |            a | b | c 
          ---+---+---        ---+---+---
           O | X |            d | e | f 
          ---+---+---        ---+---+---
             |   | X          g | h | i 
        >>> board.getWinner()            # X has won.
        'X'
        >>> board.isTerminalPosition()
        True
        >>> board.value()                # positive value => good for X
        50
        >>> board.nMovesMade()
        5
        >>> board.possibleMoves()
        [(0, 2), (1, 2), (2, 0), (2, 1)]
    """                               # the coords of ways to win :
    lines = (((0,0), (0,1), (0,2)),   #     3 horizontal
             ((1,0), (1,1), (1,2)),
             ((2,0), (2,1), (2,2)),
             ((0,0), (1,0), (2,0)),   #     3 vertical
             ((0,1), (1,1), (2,1)),
             ((0,2), (1,2), (2,2)),
             ((0,0), (1,1), (2,2)),   #     2 diagonal
             ((0,2), (1,1), (2,0)))

    def __init__(self):
        self.grid = [[' ']*3 for i in range(3)]
        self.movesMade = []
        self.whoseMove = 'X'
        self._winner = None  # cache for getWinner() result.

    def __str__(self):
        """ Return current board with X's and O's as a string. """
        return self.row2string(0) + "          a | b | c \n"  + \
                        "  ---+---+---        ---+---+---\n"  + \
               self.row2string(1) + "          d | e | f \n"  + \
                        "  ---+---+---        ---+---+---\n"  + \
               self.row2string(2) + "          g | h | i "

    def __getitem__(self, coord, secondArg=None):
        """ Return symbol on board via board[(row,col)] or board[row,col]. """
        if secondArg != None:
            (row, column) = (coord, secondArg)
        else:
            (row, column) = coord
        return self.grid[row][column]

    def nMovesMade(self):
        return len(self.movesMade)

    def row2string(self, row):
        """ Return one row of the board as a string. """
        return "   " + self[row,0] + " | " + self[row,1] + " | " + self[row,2]

    def flipWhoseMove(self):
        self.whoseMove = {'X':'O', 'O':'X'}[self.whoseMove]
        self._winner = None

    def getWinner(self):
        """ Return 'X', 'O', 'tie', or (if the game isn't done), '?'. """
        if self._winner: return self._winner
        for line in self.lines:
            marks = list(self[line[i]] for i in (0,1,2))
            if (marks[0] in ('X','O')) and (marks[0] == marks[1] == marks[2]):
                self._winner = marks[0]
                return self._winner
        if (self.nMovesMade() == 9):
            self._winner = 'tie'
        else:
            self._winner = '?'
        return self._winner

#    def alphabeta(self, alpha, beta, alphaHistory, betaHistory):
#        """ An implementation of an alpha/beta game tree search. """
#        # A pruned min/max search for the best move.
#        # * Positive values are good for this player;
#        #   negative ones are good for the other player.
#        # * Alpha is this player's best previous result;
#        #   beta is the other player's best previous result.
#        # * Also returned is the move histories matching the alpha/beta values.
#        # * The search ends at "terminal nodes", typically
#        #   where the game is finished or the search depth is exceeded;
#        #   at which point game.isTerminalPosition() is True 
#        #   and game.value() should give a reasonable value without a search.
#        # * The search starts with
#        #     (value, history) = alphabeta(gameStart, -inf, inf, [], [])
#        # * Based on http://en.wikipedia.org/wiki/Alpha-beta_pruning 2/28/10
#        if self.isTerminalPosition():
#            return (self.value(), self.getHistory())
#        for move in self.possibleMoves():
#            self.doMove(move)
#            (value, history) = self.alphabeta(-beta, -alpha, 
#                                               betaHistory, alphaHistory)
#            self.undoMove(move)
#            value = -value                      # other player's point of view
#            if value > alpha:                   # min/max search
#                (alpha, alphaHistory) = (value, history)
#            if beta <= alpha:                   # alpha/beta tree pruning
#                break
#        return (alpha, alphaHistory)

    def doMove(self, move, secondArg=None):
        """ Place the next player's symbol at move (row,column). """
        if secondArg != None:
            (row, column) = (move, secondArg)
        else:
            (row, column) = move
        self.grid[row][column] = self.whoseMove
        self.movesMade.append((row,column))
        self.flipWhoseMove()

    def undoMove(self, move):
        """ Remove the given last move from the board. """
        (row, column) = move
        self.grid[row][column] = ' '
        self.movesMade.pop()
        self.flipWhoseMove()

    def isTerminalPosition(self):
        """ Return True if the search shouldn't continue past this position. """
        # Tic-Tac-Toe is a simple game; we'll just search all the way to the end.
        return self.getWinner() != '?'

    def getHistory(self):
        """ Return move history. """
        return list(self.movesMade)  # copy it so changes don't propagate out.

    def value(self):
        """ Return the value of a board position """
        # (This really only makes sense for terminal positions.)
        # Positive is good for X.
        # Negative is good for O;
        # Winning sooner is better. 
        winner = self.getWinner()
        score = 10 * (10 - self.nMovesMade())
        if winner == 'X':
            return score
        if winner == 'O':
            return -score
        else:
            return 0

    def possibleMoves(self):
        """ Return list of remaining legal move coordinates. """
        # Re-ordering this to put moves more likely be be good earlier
        # will improve the runtime of the alpha/beta pruning algorithm.
        return [(i, j) for i in (0,1,2)
                       for j in (0,1,2) if self[i, j] == ' ']

class Game (object):
    """ Choose players interactively and play one game, printing each move. """

    def __init__(self, players=[]):
        """ Start a new game with the given players,
            or prompt for the players if not provided."""
        print()
        print(" == Playing a new game. ==")
        if (players):
            self.players = players
        else:
            self.players = [self.askForPlayer('X'), self.askForPlayer('O')]
        self.whoseTurn = 0
        self.board = Board()

    def askForPlayer(self, symbol):
        """ Return a player based on input choice. """
        choices = "user, random, or smart"
        letters = ('u','s','r')
        answer = ask(" Player %s is %s ? " % (symbol, choices), letters)
        return {'u': InteractivePlayer, 
                's': SmartPlayer,
                'r': RandomPlayer,
                }[answer[0]](symbol)

    def play(self):
        """ Play one game, printing each move to stdout. """
        print()
        winner = '?'
        while (winner == '?'):
            print(self.board)
            print()
            player = self.players[self.whoseTurn]
            nextMove = player.getMove(self.board)
            self.board.doMove(nextMove)
            winner = self.board.getWinner()
            self.whoseTurn = (self.whoseTurn + 1) % 2
        if (winner in ('X', 'O')):
            print(self.board)
            print()
            print(" {} wins!".format(winner))
        else:
            print(self.board)
            print()
            print(" Tie game.")

class Referee:
    """ Run a series of games. """

    def run(self):
        print(" === Tic Tac Toe === ")
        print(" Inputs may be abbreviated by their first letter. ")
        print(" Type 'quit' at any prompt to quit. ")
        print(" X always goes first.")
        while True:
            Game().play()


if __name__ == '__main__':
    import doctest
    doctest.testmod()
    Referee().run()