""" word_ladder.py an example of a graph breadth-first search See https://en.wikipedia.org/wiki/Word_ladder https://runestone.academy/ns/books/published/pythonds/ Graphs/TheWordLadderProblem.html Make a graphviz graph from a .dot file like this ... though the graph is too big and doesn't display well. $ sfdp -Tsvg group2.dot > group2.svg Jim Mahoney | cs.bennington.college | April 2022 | MIT License """ from tqdm import tqdm from random import choice def is_fiveletter_lower(word): """ true if the word is lowercase and has five letters """ return len(word)==5 and word.lower()==word assert is_fiveletter_lower("poker"), "is five letter" assert not is_fiveletter_lower("cat"), "not is five letter" assert not is_fiveletter_lower("Spade"), "not is lowercase letter" def get_words(filename="./words.txt"): """ return a list of lowercase five letter words """ return list(filter(is_fiveletter_lower, open(filename).read().split('\n'))) def differ_by_one_letter(word1, word2): """ true if word1, word2 differ by exactly one letter """ n_different = 0 for i in range(len(word1)): if word1[i] != word2[i]: n_different += 1 return n_different == 1 assert differ_by_one_letter("salty", "silty"), "differ one letter" assert not differ_by_one_letter("rapid", "testy"), "not differ one letter" def mean(values): """ return mean (i.e. average) of a list of values """ return sum(values)/len(values) def get_ladder_graph_2(words): """ return an adjacency list {word: set_of_neighbors} """ # from https://runestone.academy/ # ns/books/published/pythonds/Graphs/BuildingtheWordLadderGraph.html # For n=len(words), m=len(word), O(n*m) ... fast; under 1 sec. buckets = {} for word in words: for i in range(len(word)): underbar_word = word[:i] + '_' + word[i+1:] #print(f"underbard word is '{underbar_word}'") if not underbar_word in buckets: buckets[underbar_word] = set() buckets[underbar_word].add(word) graph = {} for word in words: graph[word] = set() for bucket in buckets: for word1 in buckets[bucket]: for word2 in buckets[bucket]: if word1 != word2: graph[word1].add(word2) graph[word2].add(word1) return graph def print_stats(graph): neighbor_counts = [len(neighbors) for neighbors in graph.values()] print(" Number of neighbors statistics: ") print(f" largest number of neighbors is {max(neighbor_counts)},") print(f" smallest is {min(neighbor_counts)},") print(f" mean is {mean(neighbor_counts):.3f}.") print(" ... finished creating word ladder graph.") def get_ladder_graph(words): """ return an adjacency list {word: set_of_neighbors} and print stats """ # For n=len(words), O(n*n) ... with 8e3 words, 64e6 comparisons ... slow. # Also I'll print some diagnostics # including a fancy tqdm progress graph from https://github.com/tqdm/tqdm # Takes about 30 sec. graph = {} print(" Creating word ladder graph ...") for word in tqdm(words): graph[word] = set() for maybe_neighbor in words: if differ_by_one_letter(word, maybe_neighbor): graph[word].add(maybe_neighbor) return graph class Stack: """ first in, last out, with O(1) "x in stack" >>> s = Stack() >>> s.push(1); s.push(2); s.push(3) >>> 1 in s True >>> (s.pop(), s.pop(), s.pop()) (3, 2, 1) >>> len(s) 0 """ def __init__(self): self.values = [] self.has = {} # also put values in a dictionary def __len__(self): return len(self.values) def __contains__(self, value): # implement the "_ in _" python syntax. return value in self.has def push(self, value): self.values.append(value) self.has[value] = True def pop(self): value = self.values.pop() self.has.pop(value) # remove from dictionary return value def explore(graph, forest, start_word, group_id): """ Do a breadth-first search. Update forest with group_id and parent """ fringe = Stack() fringe.push(start_word) while len(fringe) > 0: node = fringe.pop() forest[node]['group'] = group_id forest[node]['visited'] = True for neighbor in graph[node]: if not forest[neighbor]['visited'] and not neighbor in fringe: fringe.push(neighbor) forest[neighbor]['parent'] = node def make_graphviz(forest, filename='forest.dot', group=None): """ output to filename a graphviz graph of forest[node]['parent'] """ dot = "graph forest {\n" for word in forest: if forest[word]['group'] == group: parent = str(forest[word]['parent']) if parent != 'None': dot += f' "{parent}" -- "{word}" \n' dot += "}\n" open(filename, 'w').write(dot) def main(): print("-- word ladder --") words = get_words() print(f" The number of five letter words is {len(words)}.") if False: print(" -- ladder graph slow --") ladder_graph_slow = get_ladder_graph(words) print_stats(ladder_graph_slow) print(" Making graph ...") graph = get_ladder_graph_2(words) print(" ... done making graph.") print_stats(graph) forest = {} for word in words: forest[word] = {'parent': None, 'visited': False, 'group':0} group_id = 0 start_words = [] print(" Exploring forest ...") for word in words: if forest[word]['group'] == 0: # seen already? start_words.append(word) group_id += 1 explore(graph, forest, word, group_id) print(" ... done exploring forest.") print(f" Number of trees is {group_id}.") tree_size = {} for id in range(1, group_id+1): tree_size[id] = 0 for word in forest: if forest[word]['group'] == id: tree_size[id] += 1 counts = tree_size.values() print(f" Smallest tree size is {min(counts)}.") print(f" Largest tree size is {max(counts)}.") print(f" Average tree size is {mean(counts):.3f}.") print() print(f" start tree_size group ") print(f" ----- --------- ------ ") for id in range(1, group_id+1): print(f" {start_words[id-1]} {tree_size[id]:4d} {id:4d}") print() group2 = [] print("words in group 2:") for word in words: if forest[word]['group'] == 2: group2.append(word) print(" " + word) (word1, word2) = (choice(group2), choice(group2)) print() print(f" Choosing two random words from group 2 : {word1} -> {word2} ") forest2 = {} for word in words: forest2[word] = {'parent': None, 'visited': False, 'group':0} explore(graph, forest2, word1, 2) path = [] word = word2 while word != None: path.append(word) word = forest2[word]['parent'] path.reverse() print(f" The length {len(path)} ladder is ") for word in path: print(f" {word}") print() print(" Are we having fun yet?") #print() #outfile = 'group2.dot' #print("f Making graphvis file '{outfile}'") #make_graphviz(forest, group=2, filename=outfile) main()