Writing Efficient Python Code
Logan Thomas
Scientific Software Technical Trainer, Enthought
print(baseball_df.head())
Team League Year RS RA W G Playoffs
0 ARI NL 2012 734 688 81 162 0
1 ATL NL 2012 700 600 94 162 1
2 BAL AL 2012 712 705 93 162 1
3 BOS AL 2012 734 806 69 162 0
4 CHC NL 2012 613 759 61 162 0
def calc_run_diff(runs_scored, runs_allowed):
run_diff = runs_scored - runs_allowed
return run_diff
run_diffs_iterrows = []
for i,row in baseball_df.iterrows():
run_diff = calc_run_diff(row['RS'], row['RA'])
run_diffs_iterrows.append(run_diff)
baseball_df['RD'] = run_diffs_iterrows
print(baseball_df)
Team League Year RS RA W G Playoffs RD
0 ARI NL 2012 734 688 81 162 0 46
1 ATL NL 2012 700 600 94 162 1 100
2 BAL AL 2012 712 705 93 162 1 7
...
0
for columns; 1
for rows)lambda
functions)baseball_df.apply(
lambda row: calc_run_diff(row['RS'], row['RA']),
axis=1 )
run_diffs_apply = baseball_df.apply( lambda row: calc_run_diff(row['RS'], row['RA']), axis=1)
baseball_df['RD'] = run_diffs_apply print(baseball_df)
Team League Year RS RA W G Playoffs RD
0 ARI NL 2012 734 688 81 162 0 46
1 ATL NL 2012 700 600 94 162 1 100
2 BAL AL 2012 712 705 93 162 1 7
...
%%timeit
run_diffs_iterrows = []
for i,row in baseball_df.iterrows():
run_diff = calc_run_diff(row['RS'], row['RA'])
run_diffs_iterrows.append(run_diff)
baseball_df['RD'] = run_diffs_iterrows
86.8 ms ± 3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
%%timeit
run_diffs_apply = baseball_df.apply(
lambda row: calc_run_diff(row['RS'], row['RA']),
axis=1)
baseball_df['RD'] = run_diffs_apply
30.1 ms ± 1.75 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Writing Efficient Python Code