The problem i am having is where i have a validator and expectation suite with multiple of the same expectation types and only executes the last one in the list.
I’ve added 3 expectations (2 with same type and 1 different type) to the suite, i have printed the suite contents out and it shows all 3 expectations.
The 2 with same type (with different match rules) only the “last” one executes and provides results for, the first one doesn’t do it. I have checked that it isn’t a syntax error, as i took one of them out and it ran the remaining one.
Code:
# Connect to data and create a Batch
data_source = GX_CONTEXT.data_sources.add_spark("spark")
data_asset = data_source.add_dataframe_asset(name=f"{table}")
batch_request = data_asset.build_batch_request({"dataframe": df})
# Get a Validator
validator = GX_CONTEXT.get_validator(batch_request=batch_request)
for i in columns_list:
validator.expectation_suite.add_expectation(gx.expectations.ExpectColumnValuesToMatchRegexList(
validator.expectation_suite.add_expectation(gx.expectations.ExpectColumnValuesToBeInTypeList(
validator.expectation_suite.add_expectation(gx.expectations.ExpectColumnValuesToMatchRegexList(
validator.interactive_evaluation = False
validator.save_expectation_suite()
expectation_suite = validator.expectation_suite
print(expectation_suite)
-----------> SEE PRINT OUTPUT 1
results = validator.validate()
print(results)
-----------> SEE PRINT OUTPUT 2
===============================================================
------ PRINT OUTPUT 1: Confirmed that the expectation is there
{
"type": "expect_column_values_to_match_regex_list",
"kwargs": {
"column": "PERSON_SK",
"mostly": 0.01,
"regex_list": [
"^[\\w-\\.]+@([\\w-]+\\.)+[\\w-]{2,4}$"
]
},
"meta": {
"notes": "Email Check"
}
},
{
"type": "expect_column_values_to_be_in_type_list",
"kwargs": {
"column": "PERSON_SK",
"type_list": [
"TimestampType"
]
},
"meta": {
"notes": "Date Check"
}
},
{
"type": "expect_column_values_to_match_regex_list",
"kwargs": {
"column": "PERSON_SK",
"mostly": 0.01,
"regex_list": [
"^\\d{3}[A-Za-z]{5}$"
]
},
"meta": {
"notes": "ID Check"
}
}
----- PRINT OUTPUT 2: Results doesn't show for Email check
{
"success": false,
"expectation_config": {
"type": "expect_column_values_to_match_regex_list",
"kwargs": {
"batch_id": "spark-table",
"column": "PERSON_SK",
"mostly": 0.01,
"regex_list": [
"^\\d{3}[A-Za-z]{5}$"
]
},
"meta": {
"notes": "ID Check"
}
},
"result": {
"element_count": 1000,
"unexpected_count": 1000,
"unexpected_percent": 100.0,
"partial_unexpected_list": [],
"missing_count": 0,
"missing_percent": 0.0,
"unexpected_percent_total": 100.0,
"unexpected_percent_nonmissing": 100.0
},
"meta": {},
"exception_info": {
"raised_exception": false,
"exception_traceback": null,
"exception_message": null
}
},
{
"success": false,
"expectation_config": {
"type": "expect_column_values_to_be_in_type_list",
"kwargs": {
"batch_id": "spark-table",
"column": "PERSON_SK",
"type_list": [
"TimestampType"
]
},
"meta": {
"notes": "Date Check"
}
},
"result": {
"observed_value": "IntegerType"
},
"meta": {},
"exception_info": {
"raised_exception": false,
"exception_traceback": null,
"exception_message": null
}
}