from numpy import *
from matplotlib.pyplot import plot
import matplotlib.pyplot as plt
import numpy as np


# Entropy function definitions.

def h(probability):
    """ Partial information entropy from a single probability. """
    return probability * log2(1/probability)

def H(probabilities):
    """ Information entropy of a probability distribution. """
    # The probabilities should sum to 1 within some roundoff error.
    assert(abs(sum(probabilities) - 1.0) < 1e-6)
    # For zero probabilities, 0*log2(1/0) is zero ... but
    # log2(1/0) is an error, so remove zero probabilities before the sum.
    nonzero_probabilitites = filter(lambda x: x != 0, probabilities)
    return sum(list(map(h, nonzero_probabilitites)))


# Given f and Px=[_, _, _, _], we can calculate everything else:
Pxy = lambda f, px: [(1-2*f)*px[0], f*px[0],        f*px[0],       0,
                     f*px[1],       (1-2*f)*px[1],  0,             f*px[1],
                     f*px[2],       0,              (1-2*f)*px[2], f*px[2],
                     0,             f*px[3],        f*px[3],       (1-2*f)*px[3]]
Py  = lambda f, px: list(map(lambda i: sum(Pxy(f, px)[i::4]), range(4)))
Hx  = lambda f, px: H(px)
Hy  = lambda f, px: H(Py(f, px))
Hxy = lambda f, px: H(Pxy(f, px))
I   = lambda f, px: Hx(f, px) + Hy(f, px) - Hxy(f, px)
def printSummary(f, px):
    """ Print probabilities, entropies, and channel capacity for a given f and Px """
    print("--- keyboard channel ---")
    print(" f   = ", f)
    print(" Px  = ", px)
    print(" Py  = ", Py(f, px))
    print(" Pxy = ", Pxy(f, px))
    print(" Hx  = ", Hx(f, px))
    print(" Hy  = ", Hy(f, px))
    print(" Hxy = ", Hxy(f, px))
    print(" I   = ", I(f, px))


# 10% chance of sliding to a neighboring key.
# Channel capacity turns out to be 1.078, or just over 1 bit per key pressed.
printSummary(0.1, [0.25]*4)

--- keyboard channel ---
 f   =  0.1
 Px  =  [0.25, 0.25, 0.25, 0.25]
 Py  =  [0.25, 0.25, 0.25, 0.25]
 Pxy =  [0.2, 0.025, 0.025, 0, 0.025, 0.2, 0, 0.025, 0.025, 0, 0.2, 0.025, 0, 0.025, 0.025, 0.2]
 Hx  =  2.0
 Hy  =  2.0
 Hxy =  2.921928094887362
 I   =  1.0780719051126382


# A slightly asymmetric Px gives a slightly smaller channel capacity (1.077)
# confirming the idea that a symmetric Px=[0.25]*4 gives the highest capacity.
printSummary(0.1, [0.24, 0.26, 0.25, 0.25])
printSummary(0.1, [0.25, 0.24, 0.26, 0.25])

--- keyboard channel ---
 f   =  0.1
 Px  =  [0.24, 0.26, 0.25, 0.25]
 Py  =  [0.243, 0.257, 0.249, 0.251]
 Pxy =  [0.192, 0.024, 0.024, 0, 0.026000000000000002, 0.20800000000000002, 0, 0.026000000000000002, 0.025, 0, 0.2, 0.025, 0, 0.025, 0.025, 0.2]
 Hx  =  1.9994227679976009
 Hy  =  1.9997114240164597
 Hxy =  2.921350862884963
 I   =  1.0777833291290975
--- keyboard channel ---
 f   =  0.1
 Px  =  [0.25, 0.24, 0.26, 0.25]
 Py  =  [0.25, 0.242, 0.258, 0.25]
 Pxy =  [0.2, 0.025, 0.025, 0, 0.024, 0.192, 0, 0.024, 0.026000000000000002, 0, 0.20800000000000002, 0.026000000000000002, 0, 0.025, 0.025, 0.2]
 Hx  =  1.9994227679976009
 Hy  =  1.999630607011392
 Hxy =  2.9213508628849634
 I   =  1.0777025121240293


# Nearly noiseless transmission.
# The four symbols are two bits of information, (00, 01, 10, 11),
# and so I approaches 2.
printSummary(1e-5, [0.25]*4)

--- keyboard channel ---
 f   =  1e-05
 Px  =  [0.25, 0.25, 0.25, 0.25]
 Py  =  [0.24999999999999997, 0.24999999999999997, 0.24999999999999997, 0.25]
 Pxy =  [0.249995, 2.5e-06, 2.5e-06, 0, 2.5e-06, 0.249995, 0, 2.5e-06, 2.5e-06, 0, 0.249995, 2.5e-06, 0, 2.5e-06, 2.5e-06, 0.249995]
 Hx  =  2.0
 Hy  =  2.0
 Hxy =  2.000361046421765
 I   =  1.999638953578235

keyboard problem¶