Hello @rachel.house , thank you for your hint.
The good news is it works. Files are written in ct10 subdirectory!
And .html-files are in content-type text/html
!
But unfortunately I get an error after executing the following code:
This here works so far:
# Profiler
# Run the default onboarding profiler on the batch request
onboarding_data_assistant_result = (context
.assistants
.onboarding
.run(
batch_request=batch_request,
exclude_column_names=[],
estimation="exact", # default: "exact"; "flag_outliers"
)
)
# Get the suite with specific name
onboarding_suite_name = data_source_name+"_"+data_asset_name
onboarding_suite = (onboarding_data_assistant_result
.get_expectation_suite(
expectation_suite_name=onboarding_suite_name
)
)
# Perist expectation suite with the specified suite name from above
context.add_or_update_expectation_suite(expectation_suite=onboarding_suite)
onboarding_checkpoint_name=data_source_name+"_"+data_asset_name
# Create and persist checkpoint to reuse for multiple batches
context.add_or_update_checkpoint(
name = onboarding_checkpoint_name,
config_version = 1,
class_name = "SimpleCheckpoint",
validations = [
{"expectation_suite_name": onboarding_suite_name}
]
)
This generates a JSON file in context_root_dir
DataQuality/GX/expectations/co...ck.json
Which can then be read and used in the execution of a checkpoint
# Run Onboarding checkpoint
co..._datasources_block_checkpoint_result = context.get_checkpoint("co..._datasources_block").run(batch_request=batch_request)
Where the following error occurs.
But the .html file is generated anyways in
ct10/expectations/co..._datasources_block.html
and it looks (pretty fine) like this:
Strange. But no index.html is generated in
$web/ct10
Maybe this is where the following error has it’s root cause?
I hope you have here also a good hint to get this working.
Error:
---------------------------------------------------------------------------
HttpResponseError Traceback (most recent call last)
File <command-2347044268606593>, line 16
14 except Exception as exception:
15 handle_exception(exception, dbutils.notebook.entry_point.getDbutils().notebook().getContext())
---> 16 raise exception
File <command-2347044268606593>, line 3
1 try:
2 # Run Onboarding checkpoint
----> 3 co..._datasources_block_checkpoint_result = context.get_checkpoint("co..._datasources_block").run(batch_request=batch_request)
(...)
14 except Exception as exception:
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/checkpoint/checkpoint.py:1263, in SimpleCheckpoint.run(self, template_name, run_name_template, expectation_suite_name, batch_request, validator, action_list, evaluation_parameters, runtime_configuration, validations, profilers, run_id, run_name, run_time, result_format, site_names, slack_webhook, notify_on, notify_with, expectation_suite_ge_cloud_id)
1252 if any((site_names, slack_webhook, notify_on, notify_with)):
1253 new_baseline_config = self._configurator_class(
1254 name=self.name,
1255 data_context=self.data_context,
(...)
1260 notify_with=notify_with,
1261 ).build()
-> 1263 return super().run(
1264 template_name=template_name,
1265 run_name_template=run_name_template,
1266 expectation_suite_name=expectation_suite_name,
1267 batch_request=batch_request,
1268 validator=validator,
1269 action_list=new_baseline_config.action_list
1270 if new_baseline_config
1271 else action_list,
1272 evaluation_parameters=evaluation_parameters,
1273 runtime_configuration=runtime_configuration,
1274 validations=validations,
1275 profilers=profilers,
1276 run_id=run_id,
1277 run_name=run_name,
1278 run_time=run_time,
1279 result_format=result_format,
1280 expectation_suite_ge_cloud_id=expectation_suite_ge_cloud_id,
1281 )
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/core/usage_statistics/usage_statistics.py:266, in usage_statistics_enabled_method.<locals>.usage_statistics_wrapped_method(*args, **kwargs)
263 args_payload = args_payload_fn(*args, **kwargs) or {}
264 nested_update(event_payload, args_payload)
--> 266 result = func(*args, **kwargs)
267 message["success"] = True
268 except Exception:
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/checkpoint/checkpoint.py:305, in BaseCheckpoint.run(self, template_name, run_name_template, expectation_suite_name, batch_request, validator, action_list, evaluation_parameters, runtime_configuration, validations, profilers, run_id, run_name, run_time, result_format, expectation_suite_ge_cloud_id)
303 if len(validations) > 0:
304 for idx, validation_dict in enumerate(validations):
--> 305 self._run_validation(
306 substituted_runtime_config=substituted_runtime_config,
307 async_validation_operator_results=async_validation_operator_results,
308 async_executor=async_executor,
309 result_format=result_format,
310 run_id=run_id,
311 idx=idx,
312 validation_dict=validation_dict,
313 )
314 else:
315 self._run_validation(
316 substituted_runtime_config=substituted_runtime_config,
317 async_validation_operator_results=async_validation_operator_results,
(...)
320 run_id=run_id,
321 )
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/checkpoint/checkpoint.py:530, in BaseCheckpoint._run_validation(self, substituted_runtime_config, async_validation_operator_results, async_executor, result_format, run_id, idx, validation_dict)
526 operator_run_kwargs["catch_exceptions"] = catch_exceptions_validation
528 validation_id: str | None = substituted_validation_dict.get("id")
--> 530 async_validation_operator_result = async_executor.submit(
531 action_list_validation_operator.run,
532 assets_to_validate=[validator],
533 run_id=run_id,
534 evaluation_parameters=substituted_validation_dict.get(
535 "evaluation_parameters"
536 ),
537 result_format=result_format,
538 checkpoint_identifier=checkpoint_identifier,
539 checkpoint_name=self.name,
540 validation_id=validation_id,
541 **operator_run_kwargs,
542 )
543 async_validation_operator_results.append(async_validation_operator_result)
544 except (
545 gx_exceptions.CheckpointError,
546 gx_exceptions.ExecutionEngineError,
547 gx_exceptions.MetricError,
548 ) as e:
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/core/async_executor.py:106, in AsyncExecutor.submit(self, fn, *args, **kwargs)
102 return AsyncResult(
103 future=self._thread_pool_executor.submit(fn, *args, **kwargs) # type: ignore[union-attr]
104 )
105 else:
--> 106 return AsyncResult(value=fn(*args, **kwargs))
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/validation_operators/validation_operators.py:415, in ActionListValidationOperator.run(self, assets_to_validate, run_id, evaluation_parameters, run_name, run_time, catch_exceptions, result_format, checkpoint_identifier, checkpoint_name, validation_id)
410 validation_result.meta["validation_id"] = validation_id
411 validation_result.meta["checkpoint_id"] = (
412 checkpoint_identifier.id if checkpoint_identifier else None
413 )
--> 415 batch_actions_results = self._run_actions(
416 batch=batch,
417 expectation_suite_identifier=expectation_suite_identifier,
418 expectation_suite=batch._expectation_suite,
419 batch_validation_result=validation_result,
420 run_id=run_id,
421 validation_result_id=validation_result_id,
422 checkpoint_identifier=checkpoint_identifier,
423 )
425 run_result_obj = {
426 "validation_result": validation_result,
427 "actions_results": batch_actions_results,
428 }
429 run_results[validation_result_id] = run_result_obj
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/validation_operators/validation_operators.py:508, in ActionListValidationOperator._run_actions(self, batch, expectation_suite_identifier, expectation_suite, batch_validation_result, run_id, validation_result_id, checkpoint_identifier)
506 except Exception as e:
507 logger.exception(f"Error running action with name {action['name']}")
--> 508 raise e
510 return batch_actions_results
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/validation_operators/validation_operators.py:478, in ActionListValidationOperator._run_actions(self, batch, expectation_suite_identifier, expectation_suite, batch_validation_result, run_id, validation_result_id, checkpoint_identifier)
472 validation_result_id = ValidationResultIdentifier(
473 expectation_suite_identifier=expectation_suite_identifier,
474 run_id=run_id,
475 batch_identifier=batch_identifier,
476 )
477 try:
--> 478 action_result = self.actions[name].run(
479 validation_result_suite_identifier=validation_result_id,
480 validation_result_suite=batch_validation_result,
481 data_asset=batch,
482 payload=batch_actions_results,
483 expectation_suite_identifier=expectation_suite_identifier,
484 checkpoint_identifier=checkpoint_identifier,
485 )
487 # Transform action_result if it not a dictionary.
488 if isinstance(action_result, GXCloudResourceRef):
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/checkpoint/actions.py:100, in ValidationAction.run(self, validation_result_suite, validation_result_suite_identifier, data_asset, expectation_suite_identifier, checkpoint_identifier, **kwargs)
72 @public_api
73 def run( # noqa: PLR0913
74 self,
(...)
82 **kwargs,
83 ):
84 """Public entrypoint GX uses to trigger a ValidationAction.
85
86 When a ValidationAction is configured in a Checkpoint, this method gets called
(...)
98 A Dict describing the result of the Action.
99 """
--> 100 return self._run(
101 validation_result_suite=validation_result_suite,
102 validation_result_suite_identifier=validation_result_suite_identifier,
103 data_asset=data_asset,
104 expectation_suite_identifier=expectation_suite_identifier,
105 checkpoint_identifier=checkpoint_identifier,
106 **kwargs,
107 )
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/checkpoint/actions.py:1182, in UpdateDataDocsAction._run(self, validation_result_suite, validation_result_suite_identifier, data_asset, payload, expectation_suite_identifier, checkpoint_identifier)
1174 raise TypeError(
1175 "validation_result_id must be of type ValidationResultIdentifier or GeCloudIdentifier, not {}".format(
1176 type(validation_result_suite_identifier)
1177 )
1178 )
1180 # TODO Update for RenderedDataDocs
1181 # build_data_docs will return the index page for the validation results, but we want to return the url for the validation result using the code below
-> 1182 self.data_context.build_data_docs(
1183 site_names=self._site_names,
1184 resource_identifiers=[
1185 validation_result_suite_identifier,
1186 expectation_suite_identifier,
1187 ],
1188 )
1189 # <snippet name="great_expectations/checkpoint/actions.py empty dict">
1190 data_docs_validation_results: dict = {}
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/core/usage_statistics/usage_statistics.py:266, in usage_statistics_enabled_method.<locals>.usage_statistics_wrapped_method(*args, **kwargs)
263 args_payload = args_payload_fn(*args, **kwargs) or {}
264 nested_update(event_payload, args_payload)
--> 266 result = func(*args, **kwargs)
267 message["success"] = True
268 except Exception:
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/data_context/data_context/abstract_data_context.py:5306, in AbstractDataContext.build_data_docs(self, site_names, resource_identifiers, dry_run, build_index)
5269 @usage_statistics_enabled_method(
5270 event_name=UsageStatsEvents.DATA_CONTEXT_BUILD_DATA_DOCS,
5271 )
(...)
5278 build_index: bool = True,
5279 ):
5280 """Build Data Docs for your project.
5281
5282 --Documentation--
(...)
5304 ClassInstantiationError: Site config in your Data Context config is not valid.
5305 """
-> 5306 return self._build_data_docs(
5307 site_names=site_names,
5308 resource_identifiers=resource_identifiers,
5309 dry_run=dry_run,
5310 build_index=build_index,
5311 )
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/data_context/data_context/abstract_data_context.py:5353, in AbstractDataContext._build_data_docs(self, site_names, resource_identifiers, dry_run, build_index)
5349 index_page_locator_infos[
5350 site_name
5351 ] = site_builder.get_resource_url(only_if_exists=False)
5352 else:
-> 5353 index_page_resource_identifier_tuple = site_builder.build(
5354 resource_identifiers,
5355 build_index=build_index,
5356 )
5357 if index_page_resource_identifier_tuple:
5358 index_page_locator_infos[
5359 site_name
5360 ] = index_page_resource_identifier_tuple[0]
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/render/renderer/site_builder.py:323, in SiteBuilder.build(self, resource_identifiers, build_index)
319 return
321 self.target_store.copy_static_assets()
--> 323 _, index_links_dict = self.site_index_builder.build(build_index=build_index)
324 return (
325 self.get_resource_url(only_if_exists=False),
326 index_links_dict,
327 )
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/render/renderer/site_builder.py:756, in DefaultSiteIndexBuilder.build(self, skip_and_clean_missing, build_index)
753 if self.show_how_to_buttons:
754 index_links_dict["cta_object"] = self.get_calls_to_action()
--> 756 self._add_expectations_to_index_links(index_links_dict, skip_and_clean_missing)
757 validation_and_profiling_result_site_keys = (
758 self._build_validation_and_profiling_result_site_keys(
759 skip_and_clean_missing
760 )
761 )
762 self._add_profiling_to_index_links(
763 index_links_dict, validation_and_profiling_result_site_keys
764 )
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/render/renderer/site_builder.py:805, in DefaultSiteIndexBuilder._add_expectations_to_index_links(self, index_links_dict, skip_and_clean_missing)
795 if expectations and expectations not in FALSEY_YAML_STRINGS:
796 expectation_suite_source_keys = self.data_context.stores[
797 self.site_section_builders_config["expectations"].get(
798 "source_store_name"
799 )
800 ].list_keys()
801 expectation_suite_site_keys = [
802 ExpectationSuiteIdentifier.from_tuple(expectation_suite_tuple)
803 for expectation_suite_tuple in self.target_store.store_backends[
804 ExpectationSuiteIdentifier
--> 805 ].list_keys()
806 ]
807 if skip_and_clean_missing:
808 cleaned_keys = []
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/great_expectations/data_context/store/tuple_store_backend.py:1135, in TupleAzureBlobStoreBackend.list_keys(self, prefix)
1130 @override
1131 def list_keys(self, prefix: Tuple = ()) -> List[Tuple]:
1132 # Note that the prefix arg is only included to maintain consistency with the parent class signature
1133 key_list = []
-> 1135 for obj in self._container_client.list_blobs(name_starts_with=self.prefix):
1136 az_blob_key = os.path.relpath(obj.name)
1137 if az_blob_key.startswith(f"{self.prefix}{os.path.sep}"):
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/azure/core/paging.py:123, in ItemPaged.__next__(self)
121 if self._page_iterator is None:
122 self._page_iterator = itertools.chain.from_iterable(self.by_page())
--> 123 return next(self._page_iterator)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/azure/core/paging.py:75, in PageIterator.__next__(self)
73 raise StopIteration("End of paging")
74 try:
---> 75 self._response = self._get_next(self.continuation_token)
76 except AzureError as error:
77 if not error.continuation_token:
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/azure/storage/blob/_list_blobs_helper.py:100, in BlobPropertiesPaged._get_next_cb(self, continuation_token)
93 return self._command(
94 prefix=self.prefix,
95 marker=continuation_token or None,
96 maxresults=self.results_per_page,
97 cls=return_context_and_deserialized,
98 use_location=self.location_mode)
99 except HttpResponseError as error:
--> 100 process_storage_error(error)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/azure/storage/blob/_shared/response_handlers.py:184, in process_storage_error(storage_error)
181 error.args = (error.message,)
182 try:
183 # `from None` prevents us from double printing the exception (suppresses generated layer error context)
--> 184 exec("raise error from None") # pylint: disable=exec-used # nosec
185 except SyntaxError as exc:
186 raise error from exc
File <string>:1
HttpResponseError: The requested URI does not represent any resource on the server.
RequestId:e02116b8-601e-0035-66ef-6a7eae000000
Time:2024-02-29T09:11:25.6007194Z
ErrorCode:InvalidUri
Content: <?xml version="1.0" encoding="utf-8"?>
<Error><Code>InvalidUri</Code><Message>The requested URI does not represent any resource on the server.
RequestId:e02116b8-601e-0035-66ef-6a7eae000000
Time:2024-02-29T09:11:25.6007194Z</Message></Error>```