I am using Databricks with great expectations 1.0.1 version.
I want with a single checkpoint to validate two different data assets with their respective expectations suites. I have the following:
batch_parameters = {"dataframe": df}
batch_parameters2 = {"dataframe": df2}
data_source = context.data_sources.add_spark(name="source1")
data_source2 = context.data_sources.add_spark(name="source2")
data_asset = data_source.add_dataframe_asset(name="asset1")
data_asset2 = data_source2.add_dataframe_asset(name="asset2")
batch_definition_name = "my_batch_definition"
batch_definition_name2 = "my_batch_definition2"
batch_definition = data_asset.add_batch_definition_whole_dataframe(
name=batch_definition_name
)
batch_definition2 = data_asset2.add_batch_definition_whole_dataframe(
name=batch_definition_name2
)
batch_definition.build_batch_request(batch_parameters)
batch_definition2.build_batch_request(batch_parameters2)
definition_name = "my_validation_definition"
validation_definition = ge.ValidationDefinition(
data=batch_definition, suite=suite, name=definition_name
)
definition_name2 = "my_validation_definition2"
validation_definition2 = ge.ValidationDefinition(
data=batch_definition2, suite=suite2, name=definition_name2
)
context.validation_definitions.add(validation_definition)
context.validation_definitions.add(validation_definition2)
validation_definitions = [
validation_definition,
validation_definition2
]
action_list = [
]
checkpoint_name = "my_checkpoint"
checkpoint = ge.Checkpoint(
name=checkpoint_name,
validation_definitions=validation_definitions,
actions=action_list,
result_format={"result_format": "BASIC", "unexpected_index_column_names": ["hash_col"]},
)
validation_results = checkpoint.run()
Durin checkpoint.run() I am getting the following error:
BuildBatchRequestError: Bad input to build_batch_request: options must contain exactly 1 key, ‘dataframe’.