Introduction to Data Quality with Great Expectations
Davina Moossazadeh
Data Scientist
Expectation - A verifiable assertion about data
Expectation Suite - A group of Expectations describing the same set of data
Create an Expectation Suite named "my_suite"
with the ExpectationSuite
class:
suite = gx.ExpectationSuite(
name="my_suite" )
print(suite)
{
"name": "my_suite",
"id": "a8118858-32e4-4d7d-b548-9cd7d5048958",
"expectations": [],
"meta": {
"great_expectations_version": "1.2.4"
},
"notes": null
}
Add Expectations to a Suite using .add_expectation()
(one Expectation at a time):
expectation = gx.expectations.ExpectTableRowCountToEqual(
value=118000
)
suite.add_expectation(
expectation=expectation )
print(suite)
{ "name": "my_suite",
"id": "a8118858-32e4-4d7d-b548-9cd7d5048958",
"expectations": [
{"type": "expect_table_row_count_to_equal",
"kwargs": {"value": "118000"},
"meta": {},
"id": "3f1f21db-2146-417a-876e-43e11b635665"}
],
"meta": {"great_expectations_version": "1.2.4"},
"notes": null }
print(suite.expectations)
print(suite["expectations"])
[
ExpectTableRowCountToEqual(
id='3f1f21db-2146-417a-876e-43e11b635665',
meta=None, notes=None, result_format=<ResultFormat.BASIC: 'BASIC'>,
description=None, catch_exceptions=False, rendered_content=None,
batch_id=None, row_condition=None, condition_parser=None,
value=118000
),
]
print(suite.name)
print(suite["name"])
"my_suite"
print(suite.id)
print(suite["id"])
"a8118858-32e4-4d7d-b548-9cd7d5048958"
print(suite.meta)
print(suite["meta"])
{"great_expectations_version": "1.2.4"}
print(suite.notes)
print(suite["notes"])
None
Validate an Expectation Suite using batch.validate()
, setting the expect
parameter to the Suite:
validation_results = batch.validate(
expect=suite )
print(validation_results)
{ "success": false,
"results": [{
"success": false,
"expectation_config": {
"type": "expect_table_row_count_to_equal", "kwargs": {"batch_id": "my_datasource-my_dataframe_asset", "value": 118000}, "meta": {}, "rendered_content": [{"name": "atomic.prescriptive.summary", "value": {"schema": {"type": "com.superconductive.rendered.string"}, "template": "Must have exactly $value rows.", "params": {"value": {"schema": {"type": "number"}, "value": 118000}}}, "value_type": "StringValueType"}]
},
"result": {"observed_value": 118066},
"meta": {},
"exception_info": {"raised_exception": false, "exception_traceback": null, "exception_message": null},
"rendered_content": [{"name": "atomic.diagnostic.observed_value", "value": {"schema": {"type": "com.superconductive.rendered.string"}, "template": "118066", "params": {}}, "value_type": "StringValueType"}]
}],
"suite_name": "my_suite",
"suite_parameters": {},
"statistics": {"evaluated_expectations": 1, "successful_expectations": 0, "unsuccessful_expectations": 1, "success_percent": 0.0},
"meta": {
"great_expectations_version": "1.2.4",
"batch_spec": {"batch_data": "PandasDataFrame"},
"batch_markers": {"ge_load_time": "20241118T192902.403204Z", "pandas_data_fingerprint": "7d6363a614af65df638bdb6c053b44d3"},
"active_batch_definition": {"datasource_name": "my_datasource", "data_connector_name": "fluent", "data_asset_name": "my_dataframe_asset", "batch_identifiers": {"dataframe": "<DATAFRAME>"}}
},
"id": null }
print(validation_results.success)
False
print(validation_results.describe())
{ "success": false,
"statistics": {
"evaluated_expectations": 1, "successful_expectations": 0,
"unsuccessful_expectations": 1, "success_percent": 0.0
},
"expectations": [{
"expectation_type": "expect_table_row_count_to_equal",
"success": false,
"kwargs": {"batch_id": "my_datasource-my_dataframe_asset", "value": 118000},
"result": {"observed_value": 118066}},
],
"result_url": null
}
Create Expectation Suite:
suite = gx.ExpectationSuite(name: str)
Add Expectation to Suite:
suite.add_expectation(expectation)
Access Suite's Expectations:
suite.expectations
Validate Expectation Suite:
validation_results = batch.validate(
expect=suite
)
Check Validation Results:
validation_results.success
validation_results.describe()
Introduction to Data Quality with Great Expectations