Practicing Coding Interview Questions in Python
Kirill Smirnov
Data Science Consultant, Altran
import pandas as pd
scores = pd.read_csv('exams.csv')
scores = scores[['math score', 'reading score', 'writing score']]
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
df.apply(function)
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
import numpy as np
scores_new = scores.apply(np.sqrt)
print(score_new)
math score reading score writing score
0 8.602325 9.273618 9.055385
1 6.633250 7.000000 7.280110
2 7.348469 6.782330 6.557439
3 9.380832 9.746794 9.591663
4 9.219544 9.000000 9.000000
...
df.apply(function)
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
import numpy as np
scores_new = scores.apply(np.mean)
print(score_new.head())
math score 65.18
reading score 69.28
writing score 67.96
dtype: float64
type(scores_new)
pandas.core.series.Series
df.apply(function)
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
function(pd.Series)
input size $n$
$\rightarrow$ np.sqrt(pd.Series)
$\rightarrow$ output size $n$
input size $n$
$\rightarrow$ np.mean(pd.Series)
$\rightarrow$ single value
df.apply(function)
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
def divide_scores(x):
return x / 2
scores_new = scores.apply(divide_scores)
print(scores_new)
math score reading score writing score
0 37.0 43.0 41.0
1 22.0 24.5 26.5
2 27.0 23.0 21.5
3 44.0 47.5 46.0
4 42.5 40.5 40.5
...
df.apply(function)
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
def perfect_score(x):
return 100
scores_new = scores.apply(perfect_score)
print(scores_new)
math score 100
reading score 100
writing score 100
dtype: int64
def divide_scores(x):
return x / 2
scores_new = scores.apply(divide_scores)
print(scores_new)
math score reading score writing score
0 37.0 43.0 41.0
1 22.0 24.5 26.5
2 27.0 23.0 21.5
3 44.0 47.5 46.0
4 42.5 40.5 40.5
...
def perfect_score(x):
return 100
scores_new = scores.apply(perfect_score)
print(scores_new)
math score 100
reading score 100
writing score 100
dtype: int64
scores_new = scores.apply(lambda x: x / 2)
print(scores_new)
math score reading score writing score
0 37.0 43.0 41.0
1 22.0 24.5 26.5
2 27.0 23.0 21.5
3 44.0 47.5 46.0
4 42.5 40.5 40.5
...
scores_new = scores.apply(lambda x: 100)
print(scores_new)
math score 100
reading score 100
writing score 100
dtype: int64
df.apply(function, axis= )
df.apply(function, axis=0)
df.apply(function, axis=1)
df.apply(function, axis= )
axis=0
- function
is applied over columns
axis=1
- function
is applied over rows
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
import numpy as np
scores_new = scores.apply(np.mean)
print(score_new.head())
math score 65.18
reading score 69.28
writing score 67.96
dtype: float64
df.apply(function, axis= )
axis=0
- function
is applied over columns
axis=1
- function
is applied over rows
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
import numpy as np
scores_new = scores.apply(np.mean, axis=0)
print(score_new.head())
math score 65.18
reading score 69.28
writing score 67.96
dtype: float64
df.apply(function, axis= )
axis=0
- function
is applied over columns
axis=1
- function
is applied over rows
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
import numpy as np
scores_new = scores.apply(np.mean, axis=1)
print(score_new.head())
0 80.666667
1 48.666667
2 47.666667
3 91.666667
4 82.333333
5 84.000000
6 75.000000
7 70.666667
...
df.apply(function, result_type= )
result_type='expand'
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
import numpy
def span(x):
return [np.min(x), np.max(x)]
scores_new = scores.apply(span)
print(scores_new)
math score [27, 100]
reading score [33, 100]
writing score [30, 100]
dtype: object
df.apply(function, result_type= )
result_type='expand'
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
import numpy
def span(x):
return [np.min(x), np.max(x)]
scores.apply(span, result_type='expand')
math score reading score writing score
0 27 33 30
1 100 100 100
df.apply(function, result_type= )
result_type='expand'
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
import numpy
def span(x):
return [np.min(x), np.max(x)]
scores.apply(span, result_type='expand', axis=1)
0 1
0 74 86
1 44 53
2 43 54
3 88 95
4 81 85
...
df.apply(function, result_type= )
result_type='broadcast'
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
import numpy as np
scores_new = scores.apply(np.mean)
print(score_new.head())
math score 65.18
reading score 69.28
writing score 67.96
dtype: float64
df.apply(function, result_type= )
result_type='broadcast'
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
import numpy as np
scores.apply(np.mean, result_type='broadcast')
math score reading score writing score
0 65 69 67
1 65 69 67
2 65 69 67
3 65 69 67
4 65 69 67
5 65 69 67
6 65 69 67
7 65 69 67
...
function(pd.Series)
function(pd.Series, arg1, arg2, ..., kwarg1=val1, kwarg2=val2, ...)
def check_mean(x, a, b, inside=True):
mean = np.mean(x)
if inside:
return mean > a and mean < b
else:
return mean < a or mean > b
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
import numpy as np
scores.apply(check_mean)
TypeError
df.apply(function, args= )
args
- [arg1, arg2, ...]
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
import numpy as np
scores.apply(check_mean, args=[67, 70])
math score False
reading score True
writing score True
dtype: bool
df.apply(function, args= )
args
- (arg1, arg2, ...)
print(scores.head())
math score reading score writing score
0 74 86 82
1 44 49 53
2 54 46 43
3 88 95 92
4 85 81 81
import numpy as np
scores.apply(
check_mean, args=[67, 70], inside=False
)
math score True
reading score False
writing score False
dtype: bool
Practicing Coding Interview Questions in Python