im trying to make a trained OCR for digits, but i keep getting a memory allocation error and cant seem to figure out whats going wrong, worked just fine in start, but after 2 runs it suddenly started spiting these errors out, tried deleting zips and re downloading for possible corrupted files but with no results cant even complete first run anymore.
Alocation error:
Traceback (most recent call last):
File "/home/pcname/PycharmProjects/Bot/venv/lib/python3.6/site-packages/theano/compile/function_module.py", line 607, in __call__
outputs = self.fn()
MemoryError: Unable to allocate array with shape (60000, 800) and data type float64
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/pcname/PycharmProjects/Bot/Utility/TextHandler.py", line 90, in <module>
train_err = train_fn(training_x, training_y)
File "/home/pcname/PycharmProjects/Bot/venv/lib/python3.6/site-packages/theano/compile/function_module.py", line 618, in __call__
storage_map=self.fn.storage_map)
File "/home/pcname/PycharmProjects/Bot/venv/lib/python3.6/site-packages/theano/gof/link.py", line 269, in raise_with_op
storage_map_list.sort(key=itemgetter(3), reverse=True)
TypeError: '<' not supported between instances of 'NoneType' and 'NoneType'
Code:
import os
import urllib.request
import gzip
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use("TkAgg")
import lasagne
import theano
import theano.tensor as T
def build_nn(input_var=None):
l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input_var)
l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)
# Verborgen lagen.
l_hid1 = lasagne.layers.DenseLayer(l_in_drop, num_units=800,
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform())
l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5)
l_hid2 = lasagne.layers.DenseLayer(l_hid1_drop, num_units=800,
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform())
l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5)
l_out = lasagne.layers.DenseLayer(l_hid2_drop, num_units=10,
nonlinearity= lasagne.nonlinearities.softmax)
return l_out
def load_dataset(file_path="../Lib/Zips/"):
first_download = True
def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
print("Downloading set: %s" % filename)
urllib.request.urlretrieve(source+filename, file_path+filename)
def load_imageset(file):
# Indien het bestand nog niet gevonden is downloaden
if not os.path.exists(file_path+file):
download(file)
# Openen van archief training set afbeeldingen.
with gzip.open(file_path+file, 'rb') as reader:
data = np.frombuffer(reader.read(), np.uint8, offset=16)
data = data.reshape(-1, 1, 28, 28)
return data/np.float32(256)
def load_labelset(file):
if not os.path.exists(file_path+file):
download(file)
with gzip.open(file_path+file, "rb") as reader:
data = np.frombuffer(reader.read(), np.uint8, offset=8)
return data
training = ("train-images-idx3-ubyte.gz", "train-labels-idx1-ubyte.gz" )
test = ("t10k-images-idx3-ubyte.gz", "t10k-labels-idx1-ubyte.gz")
return load_imageset(training[0]),load_labelset(training[1]), load_imageset(test[0]), load_labelset(test[1])
x_train, y_train, x_test, y_test = load_dataset()
input_var = T.tensor4('inputs')
target_var = T.ivector('targets')
network = build_nn(input_var)
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9)
theano.config.optimizer='fast_compile'
theano.config.exception_verbosity='high'
theano.config.compute_test_value = 'warn'
train_fn = theano.function([input_var, target_var], loss, updates=updates)
num_training_steps = 10
for step in range(num_training_steps):
train_err = train_fn(x_train, y_train)
I agree with guidot, it's likely a normal out-of-memory error.
Your input is a 384MB array followed by some dropout and dense layers of significant width.
Maybe decrease your first two Dense layers down from 800 to something else and give it another try. Perhaps decrease it to 600 and see if it runs longer but eventually gives the same error would give more evidence.