Databricks with the Python SDK
Avi Steinberg
Senior Software Engineer
WorkspaceClient.current_user.me().user_name gets the username of the logged in user/Users/${username}/${notebook_name}# Assume there is a notebook in our workspace called My_Notebook
w = WorkspaceClient()
notebook_path = f'/Users/{w.current_user.me().user_name}/My_Notebook'
WorkspaceClient.jobs.create(createParams)
Pass in the name and tasks parameters to describe the job being created
createParams:
name: str
tasks: List[Task]
Task:
description=str
notebook_task=NotebookTask
task_key=str
NotebookTask:
notebook_path: str
from databricks.sdk import WorkspaceClient # Create notebook path pointing to notebook called "My_Notebook" w = WorkspaceClient() notebook_path = f'/Users/{w.current_user.me().user_name}/My_Notebook'# Create a job that runs the Datacamp_Test_Notebook new_job = w.jobs.create(name='sdk-dc-project-task', tasks=[ jobs.Task( description="create_notebook_test", notebook_task=jobs.NotebookTask( notebook_path=notebook_path), task_key="my-key") ]) print(f"New Job Id={new_job.job_id})w.jobs.run_now(job_id=new_job.job_id).result() # Run created Job
from databricks.sdk import WorkspaceClient
w = WorkspaceClient()
jobs = w.jobs.list()
for job in jobs:
print(f"JobId={job.job_id}")
JobId=888763141802192
JobId=37050453972815
JobId=681550316180975
JobId=629994089852037
WorkspaceClient.jobs.delete(job_id: str)
# Pre-requisite to this is to create desired # Databricks notebook in databricks cluster w = WorkspaceClient() notebook_path = f'/Users/{w.current_user.me().user_name}/My_Notebook' # Create a job that runs the Datacamp_Test_Notebook new_job = w.jobs.create(name='sdk-dc-project-task', tasks=[ jobs.Task(description="create_notebook_test", existing_cluster_id=cluster_id, notebook_task=jobs.NotebookTask(notebook_path=notebook_path), task_key="my-key") ])w.jobs.delete(job_id=new_job.job_id)
Cron Expression to Schedule Job at:
0 30 3 * * ? 0 45 2 * * ?We can schedule a job to run notebook every day at 3 am
# Create a job that runs the Datacamp_Test_Notebook cron_expression = "0 0 3 * * ?"created_job = w.jobs.create( name='sdk-dc-project-task', tasks=[jobs.Task(description="test",notebook_task=jobs.NotebookTask(notebook_path=notebook_path), task_key="my-key")], timeout_seconds=3600,schedule=jobs.CronSchedule(quartz_cron_expression=cron_expression, timezone_id="America/New_York") )
Databricks with the Python SDK