How to use the .apply() method on a DataFrame?

Practicing Coding Interview Questions in Python

Kirill Smirnov

Data Science Consultant, Altran

Dataset

import pandas as pd

scores = pd.read_csv('exams.csv')
scores = scores[['math score', 'reading score', 'writing score']]
print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81
Practicing Coding Interview Questions in Python

Default .apply()

df.apply(function)

print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81
import numpy as np

scores_new = scores.apply(np.sqrt)
print(score_new)
   math score  reading score  writing score
0    8.602325       9.273618       9.055385
1    6.633250       7.000000       7.280110
2    7.348469       6.782330       6.557439
3    9.380832       9.746794       9.591663
4    9.219544       9.000000       9.000000
...
Practicing Coding Interview Questions in Python

Default .apply()

df.apply(function)

print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81
import numpy as np

scores_new = scores.apply(np.mean)
print(score_new.head())
math score       65.18
reading score    69.28
writing score    67.96
dtype: float64
type(scores_new)
pandas.core.series.Series
Practicing Coding Interview Questions in Python

Default .apply()

df.apply(function)

print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81

function(pd.Series)

input size $n$
$\rightarrow$ np.sqrt(pd.Series)
$\rightarrow$ output size $n$

input size $n$
$\rightarrow$ np.mean(pd.Series)
$\rightarrow$ single value

Practicing Coding Interview Questions in Python

Default .apply(): own functions

df.apply(function)

print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81
def divide_scores(x):
    return x / 2
scores_new = scores.apply(divide_scores)
print(scores_new)
    math score  reading score  writing score
0         37.0           43.0           41.0
1         22.0           24.5           26.5
2         27.0           23.0           21.5
3         44.0           47.5           46.0
4         42.5           40.5           40.5
...
Practicing Coding Interview Questions in Python

Default .apply(): own functions

df.apply(function)

print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81
def perfect_score(x):
    return 100
scores_new = scores.apply(perfect_score)
print(scores_new)
math score       100
reading score    100
writing score    100
dtype: int64
Practicing Coding Interview Questions in Python

Lambda expressions

def divide_scores(x):
    return x / 2
scores_new = scores.apply(divide_scores)
print(scores_new)
    math score  reading score  writing score
0         37.0           43.0           41.0
1         22.0           24.5           26.5
2         27.0           23.0           21.5
3         44.0           47.5           46.0
4         42.5           40.5           40.5
...
def perfect_score(x):
    return 100
scores_new = scores.apply(perfect_score) 
print(scores_new)
math score       100
reading score    100
writing score    100
dtype: int64
Practicing Coding Interview Questions in Python

Lambda expressions

scores_new = scores.apply(lambda x: x / 2)
print(scores_new)
    math score  reading score  writing score
0         37.0           43.0           41.0
1         22.0           24.5           26.5
2         27.0           23.0           21.5
3         44.0           47.5           46.0
4         42.5           40.5           40.5
...
scores_new = scores.apply(lambda x: 100)
print(scores_new)
math score       100
reading score    100
writing score    100
dtype: int64
Practicing Coding Interview Questions in Python

Additional arguments: axis

df.apply(function, axis= )

Practicing Coding Interview Questions in Python

Additional arguments: axis

df.apply(function, axis=0)

Practicing Coding Interview Questions in Python

Additional arguments: axis

df.apply(function, axis=1)

Practicing Coding Interview Questions in Python

Additional arguments: axis

df.apply(function, axis= )

axis=0 - function is applied over columns

axis=1 - function is applied over rows

print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81
import numpy as np

scores_new = scores.apply(np.mean)
print(score_new.head())
math score       65.18
reading score    69.28
writing score    67.96
dtype: float64
Practicing Coding Interview Questions in Python

Additional arguments: axis

df.apply(function, axis= )

axis=0 - function is applied over columns

axis=1 - function is applied over rows

print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81
import numpy as np

scores_new = scores.apply(np.mean, axis=0)
print(score_new.head())
math score       65.18
reading score    69.28
writing score    67.96
dtype: float64
Practicing Coding Interview Questions in Python

Additional arguments: axis

df.apply(function, axis= )

axis=0 - function is applied over columns

axis=1 - function is applied over rows

print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81
import numpy as np

scores_new = scores.apply(np.mean, axis=1)
print(score_new.head())
0     80.666667
1     48.666667
2     47.666667
3     91.666667
4     82.333333
5     84.000000
6     75.000000
7     70.666667
...
Practicing Coding Interview Questions in Python

Additional arguments: result_type

df.apply(function, result_type= )

result_type='expand'

print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81
import numpy

def span(x):
    return [np.min(x), np.max(x)]
scores_new = scores.apply(span)
print(scores_new)
math score       [27, 100]
reading score    [33, 100]
writing score    [30, 100]
dtype: object
Practicing Coding Interview Questions in Python

Additional arguments: result_type

df.apply(function, result_type= )

result_type='expand'

print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81
import numpy

def span(x):
    return [np.min(x), np.max(x)]

scores.apply(span, result_type='expand')
   math score  reading score  writing score
0          27             33             30
1         100            100            100
Practicing Coding Interview Questions in Python

Additional arguments: result_type

df.apply(function, result_type= )

result_type='expand'

print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81
import numpy

def span(x):
    return [np.min(x), np.max(x)]

scores.apply(span, result_type='expand', axis=1)
     0    1
0   74   86
1   44   53
2   43   54
3   88   95
4   81   85
...
Practicing Coding Interview Questions in Python

Additional arguments: result_type

df.apply(function, result_type= )

result_type='broadcast'

print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81
import numpy as np

scores_new = scores.apply(np.mean)
print(score_new.head())
math score       65.18
reading score    69.28
writing score    67.96
dtype: float64
Practicing Coding Interview Questions in Python

Additional arguments: result_type

df.apply(function, result_type= )

result_type='broadcast'

print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81
import numpy as np

scores.apply(np.mean, result_type='broadcast')
    math score  reading score  writing score
0           65             69             67
1           65             69             67
2           65             69             67
3           65             69             67
4           65             69             67
5           65             69             67
6           65             69             67
7           65             69             67
...
Practicing Coding Interview Questions in Python

More than one argument in a function

function(pd.Series)

Practicing Coding Interview Questions in Python

More than one argument in a function

function(pd.Series, arg1, arg2, ..., kwarg1=val1, kwarg2=val2, ...)

def check_mean(x, a, b, inside=True):
    mean = np.mean(x)
    if inside:
        return mean > a and mean < b
    else:
        return mean < a or mean > b
Practicing Coding Interview Questions in Python

Applying the function

print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81
import numpy as np

scores.apply(check_mean)
TypeError
Practicing Coding Interview Questions in Python

Additional arguments: args

df.apply(function, args= )

args - [arg1, arg2, ...]

print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81
import numpy as np

scores.apply(check_mean, args=[67, 70])
math score       False
reading score     True
writing score     True
dtype: bool
Practicing Coding Interview Questions in Python

Additional arguments: args

df.apply(function, args= )

args - (arg1, arg2, ...)

print(scores.head())
   math score  reading score  writing score
0          74             86             82
1          44             49             53
2          54             46             43
3          88             95             92
4          85             81             81
import numpy as np

scores.apply(
    check_mean, args=[67, 70], inside=False
)
math score        True
reading score    False
writing score    False
dtype: bool
Practicing Coding Interview Questions in Python

Let's practice!

Practicing Coding Interview Questions in Python

Preparing Video For Download...