"""
 hash_table_again.py

 Another hash table implementation

 A *lot* faster than the hash_table_demo.py,
 though to be fair this one does use more memory 
 as needed, in the "chain lists" if not in the hash table.

   $ python hash_table_again.py
   -- hash table test 1
   <HashTable [[], [], [], [], [], [('cat', 'meow')], []] >
   <HashTable [[], [], [], [('dog', 'woof')], [], [('cat', 'meow')], []] >
    get('cat') :  meow
    get('dog') :  woof
    get('not in table') :  None
   -- hash table test 2 --
   <HashTable [[], [], [], [('abc', 1)], []] >
   <HashTable [[], [], [], [('abc', 1), ('abcd', 2)], []] >
   <HashTable [[], [('bbb', 3)], [], [('abc', 1), ('abcd', 2)], []] >
   Found key 'abc', value is '1'.
   Did not find key 'def'.
   Found key 'bbb', value is '3'.
   -- hash table test 3 --
   With n=  101, half each yes/no,  elapsed=    0.000429.
   With n= 1001, half each yes/no,  elapsed=    0.004252.
   With n=10001, half each yes/no,  elapsed=    0.043250.

 --------------------

 This one uses "separate chaining" to handle collisions,
 with a python list (a "chain") for each hash bucket.

             table [0]     is  []
                   [1]     is  [ ('a', 1) ]
                   ...

    key --->       [hash]  is  [ ('b', 2), ('c', 3) ]

                               chain

 One 
 
 Jim Mahoney | cs.bennington.college | MIT License | March 2021
"""

from utilities import random_words
from time import time

def hash_function(string):
    """ Given a string, return a (largish) integer hash value """
    # This is one of the classic hash functions, by Dan Bernstein,
    # known as "djb2"
    #
    # See for example https://theartincode.stanis.me/008-djb2/
    # The mod arithmetic keeps the hash value between 0 and 2**32,
    # in other words and unsigned 32 bit integer.
    # There are two "magic numbers" here, 5381 and 33,
    # which apparently give pretty good pseudo-random uniform
    # behavior for typical strings.
    hash = 5381
    for letter in string:
        hash = (hash * 33 + ord(letter)) % 4294967296
    return hash
    
class HashTable:
    """ a hash table (python dictionary) with "get" and "set" methods """

    def __init__(self, size=1001):
        """ create an initially empty hash table """
        # Each entry in self.table is either
        #   (a)   []   # the empty list
        #   (b)   [(key1,value1), (key2,value2), ...]
        # 
        self.size = size       # number of bins (both filled & empty).
        self.entries = 0       # number of bins with stuff in 'em.
        self.table = [ None ] * self.size
        # Make each entry an empty list.
        for i in range(size):
            self.table[i] = []

    def show(self):
        # return a string showing the whole table 
        return f"<HashTable {str(self.table)} >"
        
    def _index_of(self, key):
        """ Look for the location of a given key.
            If the key is already in the table, return (hash, offset),
               such that self.table[hash][offset] is (key, value)
            If not, return (hash, -1).
        """
        hash = hash_function(key) % self.size
        chain = self.table[hash]  # [(key,val), ... ] with this hash
        chain_length = len(chain)
        for i in range(chain_length):
            (maybe_key, _) = chain[i]
            if maybe_key == key:
                return (hash, i)  # found it.
        # didn't find it
        return (hash, -1)

    def set(self, key, value):
        """ add (or replace) a (key,value) in the hash table """
        (hash, offset) = self._index_of(key)
        chain = self.table[hash]
        if offset == -1:
            chain.append( (key, value) )
        self.entries += 1

    def get(self, key):
        """ return the corresponding value or None if not found """
        (hash, offset) = self._index_of(key)
        if offset == -1:
            return None
        else:
            chain = self.table[hash]
            (_, value) = chain[offset]
            return value

def test1():
    print("-- hash table test 1")
    smalltable = HashTable(7)
    smalltable.set('cat', 'meow')
    print(smalltable.show())
    smalltable.set('dog', 'woof')
    print(smalltable.show())
    print(" get('cat') : ", smalltable.get('cat'))
    print(" get('dog') : ", smalltable.get('dog'))    
    print(" get('not in table') : ", smalltable.get('not in table'))
    
def test2():
    # Some sample data to put into the table.
    # (Note that 'bbb' has the same hash as 'abc'.)
    print("-- hash table test 2 --")
    hashtable = HashTable(5)
    pairs = [('abc', 1), ('abcd', 2), ('bbb', 3)]
    for (key, value) in pairs:
        hashtable.set(key, value)
        print(hashtable.show())
    test_keys = ('abc', 'def', 'bbb')
    for key in test_keys:
        value = hashtable.get(key)
        if value:
            print(f"Found key '{key}', value is '{value}'. ")
        else:
            print(f"Did not find key '{key}'.")
    

def time_test(n, yes_words, no_words):
    """ return time it takes to put in yes_words, 
        check yes_words & no_words """
    start = time()
    hash_table = HashTable(n)
    for word in yes_words:
        hash_table.set(word, len(word))
    for word in yes_words:
        try:
            assert hash_table.get(word) == len(word)
        except AssertionError:
            print(f"Failed to hit word {word}; got {hash_table.get(word)}")
    for word in no_words:
        try:
            assert hash_table.get(word) == None
        except AssertionError:
            print(f"Failed to miss word {word}; got {hash_table.get(word)}")
    return time() - start
            
def test3():
    print("-- hash table test 3 --")
    for n in (101, 1001, 10001):
        words = random_words(n)
        yes_words = words[0:n//2]  # words to be put in hashtable
        no_words = words[n//2:]     # words not in hashtable
        elapsed = time_test(n, yes_words, no_words)
        print(f"With n={n:>5}, half each yes/no,  elapsed={elapsed:>12.6f}.")

def main():
    test1()
    test2()
    test3()

if __name__ == '__main__':
    main()