Parallel Programming with Dask in Python
James Fulton
Climate Informatics Researcher
# Create 2 delayed objects delayed_num1 = delayed(my_square_function)(3) delayed_num2 = delayed(my_square_function)(4) # Add them result = delayed_num1 + delayed_num2
# Plot the task graph result.visualize()
delayed_intermediate = delayed(my_square_function)(3)
# These two results both use delayed_intermediate_result
delayed_result1 = delayed_intermediate - 5
delayed_result2 = delayed_intermediate + 4
delayed_result1.visualize()
delayed_result2.visualize()
# Plot the task graph
dask.visualize(delayed_result1, delayed_result2)
# Run a sum on two big arrays
sum1 = delayed(np.sum)(big_array1)
sum2 = delayed(np.sum)(big_array2)
# Compute using processes
dask.compute(sum1, sum2)
# Run a sum on two big arrays
sum1 = delayed(np.sum)(big_array1)
sum2 = delayed(np.sum)(big_array2)
# Compute using threads
dask.compute(sum1, sum2)
Global interpreter lock - only one thread can read the Python script at a time
def sum_to_n(n):
"""Sums numbers from 0 to n"""
total = 0
for i in range(n+1):
total += i
return total
sum1 = delayed(sum_to_n)(1000)
sum2 = delayed(sum_to_n)(1000)
pd.read_csv()
function releases the GILdf1 = delayed(pd.read_csv)('file1.csv')
df2 = delayed(pd.read_csv)('file2.csv')
Parallel Programming with Dask in Python