Parallel Programming with Dask in Python
James Fulton
Climate Informatics Researcher
dask.delayed()
# Use default result = x.compute()
result = dask.compute(x)
# Use threads result = x.compute(scheduler='threads')
result = dask.compute(x, scheduler='threads')
# Use processes result = x.compute(scheduler='processes')
result = dask.compute(x, scheduler='processes')
from dask.distributed import LocalCluster
cluster = LocalCluster(
processes=True,
n_workers=2,
threads_per_worker=2
)
print(cluster)
LocalCluster(..., workers=2, threads=4, memory=31.38 GiB)
from dask.distributed import LocalCluster
cluster = LocalCluster(
processes=False,
n_workers=2,
threads_per_worker=2
)
print(cluster)
LocalCluster(..., workers=2, threads=4, memory=31.38 GiB)
cluster = LocalCluster(processes=True)
print(cluster)
LocalCluster(..., workers=4 threads=8, memory=31.38 GiB)
cluster = LocalCluster(processes=False)
print(cluster)
LocalCluster(..., workers=1 threads=8, memory=31.38 GiB)
from dask.distributed import Client, LocalCluster
cluster = LocalCluster(
processes=True,
n_workers=4,
threads_per_worker=2
)
client = Client(cluster)
print(client)
<Client: 'tcp://127.0.0.1:61391' processes=4 threads=8, memory=31.38 GiB>
Create cluster then pass it into client
cluster = LocalCluster(
processes=True,
n_workers=4,
threads_per_worker=2
)
client = Client(cluster)
print(client)
<Client: ... processes=4 threads=8, ...>
Create client which will create its own cluster
client = Client(
processes=True,
n_workers=4,
threads_per_worker=2
)
print(client)
<Client: ... processes=4 threads=8, ...>
client = Client(processes=True) # Default uses the client result = x.compute()
# Can still change to other schedulers result = x.compute(scheduler='threads')
# Can explicitly use client result = client.compute(x)
LocalCluster()
- A cluster on your computer.Parallel Programming with Dask in Python