Dear GX Community,
I am using GX version 1.3.10 with spark/Databricks and when I evaluate an Expectation for a column name which include a dot, e.g. “Te.st” I get the error that GX cannot find the column. Is there any workaround to still evaluate column names that include dots?
Here follows an example:
import great_expectations as gx
from great_expectations.expectations import ExpectColumnValuesToNotBeNull
from great_expectations.data_context.types.base import InMemoryStoreBackendDefaults, DataContextConfig
df = spark.createDataFrame(
[
(1, "foo"),
(2, "bar"),
],
["ID", "Te.st"]
)
display(df)
# set up great expectations
expectation = ExpectColumnValuesToNotBeNull(column="Te.st")
suite = gx.ExpectationSuite(name="my-suite", expectations=[expectation])
project_config = DataContextConfig(store_backend_defaults=InMemoryStoreBackendDefaults())
context = gx.get_context(project_config=project_config)
data_source = context.data_sources.add_spark(name="my-spark-data-source")
data_asset = data_source.add_dataframe_asset(name="my-asset")
# run validation
batch_definition = data_asset.add_batch_definition_whole_dataframe("my-batch")
batch = batch_definition.get_batch(batch_parameters={"dataframe": df})
result = batch.validate(suite, result_format='COMPLETE')
# print(result)
print(result['results'][0]['exception_info'])
Which returns following error:
{
"MetricConfigurationID(metric_name='column_values.nonnull.condition', metric_domain_kwargs_id='eca3dbed4899f51912bdecd9cdb8d0b5', metric_value_kwargs_id=())": {
'exception_traceback': 'Traceback (most recent call last):\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/execution_engine/execution_engine.py", line 534, in _process_direct_and_bundled_metric_computation_configurations\n metric_computation_configuration.metric_fn( # type: ignore[misc] # F not callable\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/expectations/metrics/metric_provider.py", line 99, in inner_func\n return metric_fn(*args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/expectations/metrics/map_metric_provider/column_condition_partial.py", line 246, in inner_func\n metric_domain_kwargs = get_dbms_compatible_metric_domain_kwargs(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/expectations/metrics/util.py", line 685, in get_dbms_compatible_metric_domain_kwargs\n column_name: str | sqlalchemy.quoted_name = get_dbms_compatible_column_names(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/expectations/metrics/util.py", line 755, in get_dbms_compatible_column_names\n _verify_column_names_exist_and_get_normalized_typed_column_names_map(\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/expectations/metrics/util.py", line 840, in _verify_column_names_exist_and_get_normalized_typed_column_names_map\n raise gx_exceptions.InvalidMetricAccessorDomainKwargsKeyError(\ngreat_expectations.exceptions.exceptions.InvalidMetricAccessorDomainKwargsKeyError: Error: The column "Te.st" in BatchData does not exist.\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/validator/validation_graph.py", line 296, in _resolve\n self._execution_engine.resolve_metrics(\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/execution_engine/execution_engine.py", line 280, in resolve_metrics\n return self._process_direct_and_bundled_metric_computation_configurations(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/execution_engine/execution_engine.py", line 539, in _process_direct_and_bundled_metric_computation_configurations\n raise gx_exceptions.MetricResolutionError(\ngreat_expectations.exceptions.exceptions.MetricResolutionError: Error: The column "Te.st" in BatchData does not exist.\n',
'exception_message': 'Error: The column "Te.st" in BatchData does not exist.',
'raised_exception': True
}
}
Formatted the error looks like this:
Traceback (most recent call last):
\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/execution_engine/execution_engine.py", line 534, in _process_direct_and_bundled_metric_computation_configurations
\n metric_computation_configuration.metric_fn( # type: ignore[misc] # F not callable
\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/expectations/metrics/metric_provider.py", line 99, in inner_func
\n return metric_fn(*args, **kwargs)
\n ^^^^^^^^^^^^^^^^^^^^^^^^^^
\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/expectations/metrics/map_metric_provider/column_condition_partial.py", line 246, in inner_func
\n metric_domain_kwargs = get_dbms_compatible_metric_domain_kwargs(
\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/expectations/metrics/util.py", line 685, in get_dbms_compatible_metric_domain_kwargs
\n column_name: str | sqlalchemy.quoted_name = get_dbms_compatible_column_names(
\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/expectations/metrics/util.py", line 755, in get_dbms_compatible_column_names
\n _verify_column_names_exist_and_get_normalized_typed_column_names_map(
\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/expectations/metrics/util.py", line 840, in _verify_column_names_exist_and_get_normalized_typed_column_names_map
\n raise gx_exceptions.InvalidMetricAccessorDomainKwargsKeyError(
\ngreat_expectations.exceptions.exceptions.InvalidMetricAccessorDomainKwargsKeyError: Error: The column "Te.st" in BatchData does not exist.
\n
\nThe above exception was the direct cause of the following exception:
\n
\nTraceback (most recent call last):
\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/validator/validation_graph.py", line 296, in _resolve
\n self._execution_engine.resolve_metrics(
\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/execution_engine/execution_engine.py", line 280, in resolve_metrics
\n return self._process_direct_and_bundled_metric_computation_configurations(
\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
\n File "/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/great_expectations/execution_engine/execution_engine.py", line 539, in _process_direct_and_bundled_metric_computation_configurations
\n raise gx_exceptions.MetricResolutionError(
\ngreat_expectations.exceptions.exceptions.MetricResolutionError: Error: The column "Te.st" in BatchData does not exist.
\n