I am creating a memoization example with a function that adds up / averages the elements of an array and compares it with the cached ones to retrieve them in case they are already stored.
In addition, I want to store only if the result of the function differs considerably (passes a threshold e.g. 5000 below).
I created an example using a decorator to do so, the results using the decorator is slightly slower than without the memoization which is not OK, also is the logic of the decorator correct ?
My code is attached below:
import time
import random
from collections import OrderedDict
def memoize(f):
cache = {}
def g(*args):
sum_key_arr = sum(args[0])
print(sum_key_arr)
if sum_key_arr not in cache:
for key, value in OrderedDict(sorted(cache.items())).items():# key in dict cannot be an array so I use the sum of the array as the key
if abs(sum_key_arr - key) <= 5000:#threshold is great here so that all values are approximated!
#print('approximated')
return cache[key]
else:
#print('not approximated')
cache[sum_key_arr] = f(args[0],args[1])
return cache[sum_key_arr]
return g
@memoize
def aggregate(dict_list_arr,operation):
if operation == 'avg':
return sum(dict_list_arr) / len(list(dict_list_arr))
if operation == 'sum':
return sum(dict_list_arr)
return None
t = time.time()
for i in range(200,150000):
res = aggregate(list(range(i)),'avg')
elapsed = time.time() - t
print(res)
print(elapsed)
UPDATE: I tried to involve an id key (that captues the list content) while now using a dict as input, here's below the changes made to the code:
import time
import random
from collections import OrderedDict
def memoize(f):
cache = {}
def g(*args):
key_arr = list(args[0].keys())[0]
if key_arr not in cache:
for key, value in OrderedDict(sorted(cache.items())).items():# key in dict cannot be an array so I use the sum of the array as the key
if abs(int(key_arr) - int(key)) <= 5000:#threshold is great here so that all values are approximated!
print('approximated')
return cache[key]
else:
print('not approximated')
cache[key_arr] = f(args[0])
return cache[key_arr]
return g
#@memoize
def aggregate(dict_list_arr):
#if operation == 'avg':
return sum(list(dict_list_arr.values())[0]) / len(list(dict_list_arr.values())[0])
# if operation == 'sum':
# return sum(dict_list_arr)
# None
t = time.time()
for i in range(200,15000):
res = aggregate({str(1+i):list(range(i))})
elapsed = time.time() - t
print(res)
print(elapsed)