Parallel Programming with Dask in Python
James Fulton
Climate Informatics Researcher
Multi-threading
Parallel processing
from dask import delayed def my_square_function(x): return x**2
# Create delayed version of above function delayed_square_function = delayed(my_square_function)
from dask import delayed def my_square_function(x): return x**2 # Create delayed version of above function delayed_square_function = delayed(my_square_function) # Use the delayed function with input 4 delayed_result = delayed_square_function(4)
# Print the delayed answer print(delayed_result)
Delayed('my_square_function-7f71b132-70a9-457a-aa52-604e8c34f8a7')
from dask import delayed
def my_square_function(x):
return x**2
# Delay and use function
delayed_result = delayed(my_square_function)(4)
print(delayed_result)
Delayed('my_square_function-7f71b132-70a9-457a-aa52-604e8c34f8a7')
from dask import delayed
def my_square_function(x):
return x**2
delayed_result = delayed(my_square_function)(4)
real_result = delayed_result.compute() # <- This line is where the calculation happens
# Print the answer
print(real_result)
16
delayed_result1 = delayed(my_square_function)(4)
# Math operations return delayed object
delayed_result2 = (4 + delayed_result1) * 5
print(delayed_result2.compute())
100
x_list = [30, 85, 14, 12, 27, 62, 89, 15, 78, 0]
sum_of_squares = 0
for x in x_list:
# Square and add numbers
sum_of_squares += delayed(my_square_function)(x)
x_list = [30, 85, 14, 12, 27, 62, 89, 15, 78, 0]
sum_of_squares = 0
for x in x_list:
# Square and add numbers
sum_of_squares += delayed(my_square_function)(x)
result = sum_of_squares.compute()
# Print the answer
print(result)
27268
delayed_intermediate = delayed(my_square_function)(3)
# These two results both use delayed_intermediate
delayed_result1 = delayed_intermediate - 5
delayed_result2 = delayed_intermediate + 4
# delayed_3_squared will be computed twice
print('delayed_result1:', delayed_result1.compute())
print('delayed_result2:', delayed_result2.compute())
delayed_result1: 4
delayed_result2: 13
import dask
# delayed_intermediate will be computed once
comp_result1, comp_result2 = dask.compute(delayed_result1, delayed_result2)
print('comp_result1:', comp_result1)
print('comp_result2:', comp_result2)
delayed_result1: 4
delayed_result2: 13
Parallel Programming with Dask in Python