"""recursive_descent.py An example of a recursive descent parser using simple grammar, "fully parenthesized expression" as described at https://math.hws.edu/javanotes/c9/s5.html, where they say in BNF ::= | "(" ")" ::= "+" | "-" | "*" | "/" I'll expand on this a bit, defining number explicitly and using this notation : ::= production rule "x" terminal character name nontermimal symbol | alternative productions * 0 or any number of times of the preceding grammar : expression ::= "(" expression operator expression ")" | number operator ::= "+" | "-" | "*" | "/" number ::= digit digit* digit ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" I've also changed the order of the alternatives in expression because it seemed simpler to recognize the "(" . examples : 123 (15-32) ((1+2)*(13/77)) The generated parse tree is made of nodes that look like this : parse tree node : [name, part1, part2, part3, ...] which come from a rule like this name ::= part1 part2 part3 and which form a subtree ___name______ / | \ part1 part2 part3 For example, here are a few parse tree nodes : ["expression", "(", expr1, op1, expr2, ")"] ["operator", "+"] ["number", digit1, digit2] ["digit", "4"] And here are two full parse trees examples : "123" => [EXP, [NUM, [DIG, "1"] [DIG, "2"], [DIG, "3"]]] expression | number / | \ digit digit digit | | | 1 2 3 "(15*32)" => [EXP, "(", [EXP, [NUM, [DIG, "1"], [DIG, "5"]]], [OP, "-"], [EXP, [NUM, [DIG, "3"], [DIG, "2"]]], ")"] expression / | | | \ ( | - | ) | | num num / \ / \ dig dig dig dig | | | | 1 5 3 2 In practice we wouldn't want to be quite this pendantic; we'd instead use a regular expression in the lexer to recognize the number, and not bother to put the intermediate DIGIT steps into the parser tree. For this simple example, I'll treat each token is just one character, one of "(", ")", "+", "-", "*", "/", "0" ... "9" . Again, in practice we would use regular expressions to have the lexer do more work, pulling out a multiple-character number as a single token. To run this and send output and errors to an output file : $ python recursive_descent.py > recursive_descent_out.txt 2>&1 Jim Mahoney | Nov 18 2021 | cs.bennington.college | MIT License """ import pprint pretty_print = pprint.PrettyPrinter().pprint class LexerQueue: """ an API for moving through the tokens of the code string """ def __init__(self, code): self.code = code # the code string to analyze i.e. "(1+2)" self.offset = 0 # current character is self.code[self.offset] def peek(self): """ return the next token (character) or the empty string if there are no more.""" try: return self.code[self.offset] except: return "" def pop(self): """ return the next token and move on to the next """ result = self.peek() self.offset += 1 return result def is_empty(self): """ return True if there are no more tokens """ return self.peek() == "" # -- a LexerQueue test ----------------------------------------- lex_test = LexerQueue("(1+2)") assert lex_test.peek() == "(" # first token ; don't move forward assert lex_test.pop() == "(" # first token ; do move forward assert lex_test.pop() == "1" # next token should be this while not lex_test.is_empty(): # slurp the remaining tokens lex_test.pop() assert lex_test.is_empty() # no more tokens left # -- parser --------- digit_tokens = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9') operator_tokens = ('+', '-', '*', '/') def expression(lq): if lq.peek() == "(": left = lq.pop() expr1 = expression(lq) op = operator(lq) expr2 = expression(lq) right = lq.pop() assert left == "(" and right == ")", \ "OOPS - didn't find expected ( or )" return ["expression", left, expr1, op, expr2, right] else: return ["expression", number(lq)] def operator(lq): op = lq.pop() assert op in operator_tokens, \ "OOPS - failed to find expected operator" return ['operator', op] def number(lq): result = ['number', digit(lq)] while lq.peek() in digit_tokens: result.append(digit(lq)) return result def digit(lq): value = lq.pop() assert value in digit_tokens, \ "OOPS - didn't find expected digit" return ["digit", value] # -- parser tests ------------- examples = [ "1", "(1+2)", "((12+34)*(56-78))", "(1+" # should throw an error ] for code in examples: print(f"code = '{code}'") pretty_print(expression(LexerQueue(code))) print()