Hi everyone!
I am very new to GE, so please forgive me if this is a dumb question.
I am using “expect_column_pair_values_to_be_equal” to compare 2 columns, and so far almost everything works fine, GE validates the table and provides me with statistics.
My issue is that I need GE to show me which keys are failing, in other words, I need a report of which records are failing the data diff between A and B, but I am not finding how to do it.
Can anyone please share any insights?
This is my script:
# Setup imports
import great_expectations as gx
from great_expectations.checkpoint import Checkpoint
# expect_column_pair_values_to_be_equal
data_asset = 'stg_QualityCheck'
# Setup context
context = gx.get_context()
# Connect
MSSQL_CONNECTION_STRING = ""
# Setup DataSource
mssql_datasource = context.sources.add_sql(
name="mssql_datasource", connection_string=MSSQL_CONNECTION_STRING
)
# Setup Data Asset
mssql_datasource.add_table_asset(
name=data_asset, table_name=data_asset
)
# Setup Batch Request
batch_request = mssql_datasource.get_asset(data_asset).build_batch_request()
# Setup Validator
expectation_suite_name = "test_expectation"
context.add_or_update_expectation_suite(expectation_suite_name=expectation_suite_name)
validator = context.get_validator(
batch_request=batch_request,
expectation_suite_name=expectation_suite_name,
)
print(validator.head())
# Setup Expectations
validator.expect_column_pair_values_to_be_equal('count_r', 'count_l')
validator.save_expectation_suite(discard_failed_expectations=False)
# Setup checkpoint
my_checkpoint_name = "my_sql_checkpoint"
checkpoint = Checkpoint(
name=my_checkpoint_name,
run_name_template="%Y%m%d-%H%M%S-test-validation-checkpoint",
data_context=context,
batch_request=batch_request,
expectation_suite_name=expectation_suite_name,
action_list=[
{
"name": "store_validation_result",
"action": {"class_name": "StoreValidationResultAction"},
},
{"name": "update_data_docs", "action": {"class_name": "UpdateDataDocsAction"}},
],
)
context.add_or_update_checkpoint(checkpoint=checkpoint)
checkpoint_result = checkpoint.run()
context.open_data_docs()
Thank you!