"""
 analyze_wander.py

 Find the information entropy and huffman code for
 the first paragraph of text of "The Wandering Inn"
 https://wanderinginn.com/2016/07/27/1-00/
 (which is what I happen to be reading this week).

 Running this :

   $ python2 analyze_wander.py ; dot wander.dot -Tpng > wander.png

 (The "dot" program is a graphviz, a graph generating tool)

 produces this :

     " "  0.17647   111
     ","  0.00929   1001111
     "."  0.01238   011010
     ";"  0.00310   110101110
     "B"  0.00310   110101111
     "I"  0.00310   01101110
     "R"  0.00310   01101111
     "T"  0.00310   10011100
     "a"  0.04644   0000
     "c"  0.00929   1101000
     "b"  0.01548   100100
     "e"  0.08050   1100
     "d"  0.05573   0111
     "g"  0.01548   100101
     "f"  0.01548   100110
     "i"  0.04954   0100
     "h"  0.04334   11011
     "k"  0.00310   10011101
     "m"  0.00929   1101001
     "l"  0.03715   10110
     "o"  0.07430   1010
     "n"  0.05882   1000
     "p"  0.00929   1101010
     "s"  0.04954   0101
     "r"  0.04644   0001
     "u"  0.03715   10111
     "t"  0.09598   001
     "w"  0.02477   01100
     "y"  0.00619   0110110
     "x"  0.00310   11010110
    entropy =  4.16674676297
    mean code length =  4.21052631579

 Jim Mahoney | cs.marlboro.college | Dec 2019 | MIT License
"""
from huffman import *

# From wanderinginn.com, chapter 1
text = 'The inn was dark and empty. It stood, silent, on the grassy hilltop, the ruins of other structures around it. Rot and age had brought low other buildings; the weather and wildlife had reduced stone foundations to rubble and stout wooden walls to a few rotten pieces of timber mixed with the ground. But the inn still stood.'

print('--- info entropy & Huffman ---')
print('text:')
print(text)
print('probabilities & huffman codes ')
probabilities = get_probabilities(text)
h = Huffman(probabilities)
for char in probabilities:
    print(' "{}"  {:.5f}   {}'.format(
        char, probabilities[char], h.huffman_code[char]))
print('entropy = ', entropy(probabilities))
print('mean code length = ', h.mean_code_length())
open('wander.dot', 'w').write(h.huffman_tree.graphviz(labels=True))