How to monkey-patch np.savez_compressed to add compression level, without editing numpy's source files?

35 views Asked by At

I need to modify the ZIP compressionlevel internally used in np.savez_compressed. There is a feature proposal on Numpy Github, but it is not implemented yet.

I see two options:

  • modify the source file /numpy/lib/npyio.py and replace zipf = zipfile_factory(file, mode="w", compression=compression) by <idem>..., compresslevel=compresslevel), but this creates the burden, that on each re-install or upgrade, after pip install numpy, I have to do this modification: this is a suboptimal solution.

  • monkey-patching the _savez function

How to do this?

Here I tried the second option, but it fails with ValueError: seek of closed file, but I don't see why:

import numpy as np

def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
    import zipfile
    if not hasattr(file, 'write'):
        file = os_fspath(file)
        if not file.endswith('.npz'):
            file = file + '.npz'
    namedict = kwds
    for i, val in enumerate(args):
        key = 'arr_%d' % i
        if key in namedict.keys():
            raise ValueError("Cannot use un-named variables and keyword %s" % key)
        namedict[key] = val
    if compress:
        compression = zipfile.ZIP_DEFLATED
    else:
        compression = zipfile.ZIP_STORED
    zipf = np.lib.npyio.zipfile_factory(file, mode="w", compression=compression, compresslevel=2)  # !! the only modified line !!
    for key, val in namedict.items():
        fname = key + '.npy'
        val = np.asanyarray(val)
        # always force zip64, gh-10776
        with zipf.open(fname, 'w', force_zip64=True) as fid:
            format.write_array(fid, val, allow_pickle=allow_pickle, pickle_kwargs=pickle_kwargs)
    zipf.close()

np.lib.npyio._savez = _savez    

x = np.array([1, 2, 3, 4])
with open("test.npz", "wb") as f:
    np.savez_compressed(f, x=x)
1

There are 1 answers

0
Basj On BEST ANSWER

I found an ever simplier solution:

import numpy as np
def zipfile_factory(file, *args, **kwargs):
    if not hasattr(file, 'read'):
        file = os_fspath(file)
    import zipfile
    kwargs['allowZip64'] = True
    kwargs['compresslevel'] = 4
    return zipfile.ZipFile(file, *args, **kwargs)
np.lib.npyio.zipfile_factory = zipfile_factory
with open("test.npz", "wb") as f:
    np.savez_compressed(f, x=np.ones(10_000_000))

Edit: old solution:

I found the solution in the meantime: format should be replaced by np.lib.npyio.format. Now this works:

import numpy as np

def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
    import zipfile
    if not hasattr(file, 'write'):
        file = os_fspath(file)
        if not file.endswith('.npz'):
            file = file + '.npz'
    namedict = kwds
    for i, val in enumerate(args):
        key = 'arr_%d' % i
        if key in namedict.keys():
            raise ValueError("Cannot use un-named variables and keyword %s" % key)
        namedict[key] = val
    if compress:
        compression = zipfile.ZIP_DEFLATED
    else:
        compression = zipfile.ZIP_STORED
    zipf = np.lib.npyio.zipfile_factory(file, mode="w", compression=compression, compresslevel=1)
    for key, val in namedict.items():
        fname = key + '.npy'
        val = np.asanyarray(val)
        # always force zip64, gh-10776
        with zipf.open(fname, 'w', force_zip64=True) as fid:
            np.lib.npyio.format.write_array(fid, val, allow_pickle=allow_pickle, pickle_kwargs=pickle_kwargs)
    zipf.close()

np.lib.npyio._savez = _savez    

with open("test.npz", "wb") as f:
    np.savez_compressed(f, x=np.array([1, 2, 3]))