I want to operate on some images using dask/zarr, but these images are large pyramid TIFF's that can't be read by either directly and need to be read by a software like OpenSlide. At the moment my strategy has been to create a process that takes the image and reads it chunk by chunk into a new zarr array. However, I found this napari plug in online where they created a zarr store that can read from the WSI directly.
In this code they create a pyramid zarr file, and it doesn't seem to work on my images. I would like to use zarr.open to access just the highest resolution (level 0) array through zarr/dask as a YxXx3 matrix (RGB).
Here is the code I have tried if it is useful
from zarr.storage import BaseStore
import zarr
from openslide import OpenSlide
import numpy as np
class OpenSlideStore(BaseStore):
def __getitem__(self, key: str = None):
if key is None:
raise KeyError()
y, x = key.split('.')
try:
int(y)
int(x)
except ValueError:
raise ValueError("Invalid key format. Key should be in the format 'y.x'")
chunk = self._slide.read_region((int(x), int(y)), 0, (self._chunk_size, self._chunk_size)).convert('RGB')
return np.array(chunk)
def __setitem__(self, key: str = None, value: bytes = None):
raise NotImplementedError()
def __delitem__(self, key: str = None):
raise NotImplementedError()
def __len__(self) -> int:
return int(self._slide.dimensions[0] / self._chunk_size) * int(self._slide.dimensions[1] / self._chunk_size)
def __iter__(self):
for y in range(0, self._slide.dimensions[1], self._chunk_size):
for x in range(0, self._slide.dimensions[0], self._chunk_size):
yield self._slide.read_region((x, y), 0, (self._chunk_size, self._chunk_size)).convert('RGB')