Introductie tot testen in Python
Alexander Levin
Data Scientist
Data: salarissen in data science.
Elke rij bevat info over een data science‑werker met salaris, functie en andere kenmerken.

Pijplijn: gemiddelde salaris berekenen:
import pandas as pd
# Fixture om de data te laden
@pytest.fixture
def read_df():
return pd.read_csv('ds_salaries.csv')
# Functie om te filteren
def filter_df(df):
return df[df['employment_type'] == 'FT']
# Functie om het gemiddelde te nemen
def get_mean(df):
return df['salary_in_usd'].mean()
Testgevallen:
Code:
def test_read_df(read_df):
# Type van de dataframe controleren
assert isinstance(read_df, pd.DataFrame)
# Controleren dat df rijen bevat
assert read_df.shape[0] > 0
Voorbeeld om te checken dat Python bestanden kan maken.
def test_write():
# Bestand openen in schrijfmodes
with open('temp.txt', 'w') as wfile:
# Tekst naar bestand schrijven
wfile.write('Testing stuff is awesome')
# Checken dat het bestand bestaat
assert os.path.exists('temp.txt')
# Vergeet niet op te ruimen
os.remove('temp.txt')
Testgevallen:
get_mean() geeft een getal terugCode:
def test_units(read_df):
filtered = filter_df(read_df)
assert filtered['employment_type'].unique() == ['FT']
assert isinstance(get_mean(filtered), float)
Testgevallen:
Code:
def test_feature(read_df):
# Data filteren
filtered = filter_df(read_df)
# Testgeval: gemiddelde > 0
assert get_mean(filtered) > 0
# Testgeval: gemiddelde ≤ maximum
assert get_mean(filtered) <= read_df['salary_in_usd'].max()
Testgeval:
Code:
def test_performance(benchmark, read_df):
# Benchmark-decorator
@benchmark
# Te meten functie
def get_result():
filtered = filter_df(read_df)
return get_mean(filtered)
import pytest
## Integration Tests
def test_read_df(read_df):
# Check the type of the dataframe
assert isinstance(read_df, pd.DataFrame)
# Check that df contains rows
assert read_df.shape[0] > 0
def test_write():
with open('temp.txt', 'w') as wfile:
wfile.write('12345')
assert os.path.exists('temp.txt')
os.remove('temp.txt')
## Unit Tests
def test_units(read_df):
filtered = filter_df(read_df)
assert filtered['employment_type'].unique() == ['FT']
assert isinstance(get_mean(filtered), float)
## Feature Tests
def test_feature(read_df):
# Filtering the data
filtered = filter_df(read_df)
# Test case: mean is greater than zero
assert get_mean(filtered) > 0
# Test case: mean is not bigger than the maximum
assert get_mean(filtered) <= read_df['salary_in_usd'].max()
## Performance Tests
def test_performance(benchmark, read_df):
# Benchmark decorator
@benchmark
# Function to measure
def pipeline():
filtered = filter_df(read_df)
return get_mean(filtered)
Introductie tot testen in Python