Parallel Programming with Dask in Python
James Fulton
Climate informatics researcher
import numpy as np
x = np.ones((4000, 6000))
print(x.sum())
24000000.0
import dask.array as da
x = da.ones((4000, 6000), chunks=(1000,2000))
print(x.sum().compute())
24000000.0
Dask arrays have almost all the methods that NumPy arrays have.
x.max()
x.min()
x.sum()
x.mean()
print(sum_down_columns.compute())
array([1000., 1000., 1000., 1000.,
1000., 1000., 1000., 1000., 1000.,
1000.])
# Lazy mathematics with Dask array
y1 = x**2 + 2*x + 1
# Lazy slicing
y2 = x[:10]
# Applying NumPy functions is lazy too
y3 = np.sin(x)
print(y1)
dask.array<add, shape=(1000, 10), ...
print(y2)
dask.array<getitem, shape=(10, 10), ...
print(y3)
dask.array<sin, shape=(1000, 10), ...
import dask.array as da
import da.image
image_array = da.image.imread('images/*.png')
print(image_array)
dask.array<imread, shape=(40000, 256, 256, 3), dtype=uint8,
chunksize=(1, 256, 256, 3), chunktype=numpy.ndarray>
def instagram_filter(image): ... return pretty_image
# Apply function to each image independently pretty_image_array = image_array.map_blocks(instagram_filter)
print(pretty_image_array)
dask.array<instagram_filter, shape=(40000, 256, 256, 3), dtype=uint8,
chunksize=(1, 256, 256, 3), chunktype=numpy.ndarray>
Parallel Programming with Dask in Python