Parallel Programming with Dask in Python
James Fulton
Climate informatics researcher



import numpy as npx = np.ones((4000, 6000))print(x.sum())
24000000.0

import dask.array as dax = da.ones((4000, 6000), chunks=(1000,2000))print(x.sum().compute())
24000000.0

Dask arrays have almost all the methods that NumPy arrays have.
x.max()x.min()x.sum()x.mean()print(sum_down_columns.compute())
array([1000., 1000., 1000., 1000.,
1000., 1000., 1000., 1000., 1000.,
1000.])
# Lazy mathematics with Dask array
y1 = x**2 + 2*x + 1
# Lazy slicing
y2 = x[:10]
# Applying NumPy functions is lazy too
y3 = np.sin(x)
print(y1)
dask.array<add, shape=(1000, 10), ...
print(y2)
dask.array<getitem, shape=(10, 10), ...
print(y3)
dask.array<sin, shape=(1000, 10), ...
import dask.array as daimport da.imageimage_array = da.image.imread('images/*.png')print(image_array)
dask.array<imread, shape=(40000, 256, 256, 3), dtype=uint8,
chunksize=(1, 256, 256, 3), chunktype=numpy.ndarray>
def instagram_filter(image): ... return pretty_image# Apply function to each image independently pretty_image_array = image_array.map_blocks(instagram_filter)print(pretty_image_array)
dask.array<instagram_filter, shape=(40000, 256, 256, 3), dtype=uint8,
chunksize=(1, 256, 256, 3), chunktype=numpy.ndarray>
Parallel Programming with Dask in Python