How to create custom Expectations for SQLAlchemy

kyle · May 28, 2020, 2:14pm

This article is for comments to: https://docs.greatexpectations.io/en/latest/how_to_guides/creating_and_editing_expectations/how_to_create_custom_expectations_for_sqlalchemy.html

Please comment +1 if this How to is important to you.

fbormann · December 22, 2020, 5:55pm

Hello, I am having trouble creating a custom expectation for SQLAlchemy. I created a custom expectation which involves two columns (I used the older way of creating custom expectations).
Here is the code of the expectation

from typing import List, Union, Any

import sqlalchemy as sa
from great_expectations.core import ExpectationValidationResult, ExpectationConfiguration
from great_expectations.data_asset import DataAsset
from great_expectations.dataset import SqlAlchemyDataset
from great_expectations.render.renderer.renderer import renderer
from great_expectations.render.types import RenderedGraphContent, RenderedStringTemplateContent, RenderedTableContent, \
    RenderedBulletListContent, CollapseContent
from great_expectations.render.util import substitute_none_for_missing, num_to_str
from jinja2 import Template


class RamboSqlAlchemyDataset(SqlAlchemyDataset):
    _data_asset_type = "CustomSqlAlchemyDataset"

@DataAsset.expectation(["column_A", "column_B", "regex_value", "min_value"])
def expect_table_column_b_to_be_bigger_than_a_value_when_column_a_matches_a_regex(
        self,
        column_A,
        column_B,
        regex_value,
        min_value
):
    query = sa.select([sa.func.count()]).select_from(self._table).where(sa.and_(
        sa.column(column_A).like(regex_value),
        sa.column(column_B) <= min_value
    ))

    row_count = self.engine.execute(query).fetchone()
    if row_count is None:
        row_count = 0
    else:
        row_count = row_count[0]

    total_count_query = sa.select([sa.func.count()]).select_from(self._table)
    total_count = self.engine.execute(total_count_query).fetchone()[0]

    if row_count == 0:
        return {
            "success": row_count == 0,
            "result": {
                "element_count": row_count,
            }
        }
    else:
        return {
            "success": row_count == 0,
            "result": {
                "unexpected_percent": 100.0 * row_count / total_count,
                "unexpected_count": row_count,
            }
        }

@classmethod
@renderer(renderer_type="renderer.prescriptive")
def _prescriptive_renderer(
        cls,
        configuration: ExpectationConfiguration = None,
        result: ExpectationValidationResult = None,
        language: str = None,
        runtime_configuration: dict = None,
        **kwargs,
) -> List[Union[dict, str, RenderedStringTemplateContent, RenderedTableContent, RenderedBulletListContent,
                RenderedGraphContent, Any]]:
    runtime_configuration = runtime_configuration or {}
    include_column_name = runtime_configuration.get("include_column_name", True)
    include_column_name = (
        include_column_name if include_column_name is not None else True
    )
    styling = runtime_configuration.get("styling")
    # get params dict with all expected kwargs
    params = substitute_none_for_missing(
        configuration.kwargs,
        [
            "column_A",
            "column_B",
            "regex",
            "min_value",
        ],
    )

    template_str = f"Values from {kwargs['column_A']} much be LIKE {kwargs['regex']} and " \
                   f"{kwargs['column_B']} must be less or equal to {kwargs['min_value']}"

    return [
        Template(template_str).substitute(params)
    ]

@classmethod
@renderer(renderer_type="renderer.diagnostic.unexpected_statement")
def _diagnostic_unexpected_statement_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
):
    assert result, "Must provide a result object."
    success = result.success
    result_dict = result.result

    if result.exception_info["raised_exception"]:
        exception_message_template_str = (
            "\n\n$expectation_type raised an exception:\n$exception_message"
        )

        exception_message = RenderedStringTemplateContent(
            **{
                "content_block_type": "string_template",
                "string_template": {
                    "template": exception_message_template_str,
                    "params": {
                        "expectation_type": result.expectation_config.expectation_type,
                        "exception_message": result.exception_info[
                            "exception_message"
                        ],
                    },
                    "tag": "strong",
                    "styling": {
                        "classes": ["text-danger"],
                        "params": {
                            "exception_message": {"tag": "code"},
                            "expectation_type": {
                                "classes": ["badge", "badge-danger", "mb-2"]
                            },
                        },
                    },
                },
            }
        )

        exception_traceback_collapse = CollapseContent(
            **{
                "collapse_toggle_link": "Show exception traceback...",
                "collapse": [
                    RenderedStringTemplateContent(
                        **{
                            "content_block_type": "string_template",
                            "string_template": {
                                "template": result.exception_info[
                                    "exception_traceback"
                                ],
                                "tag": "code",
                            },
                        }
                    )
                ],
            }
        )

        return [exception_message, exception_traceback_collapse]

    if success or not result_dict.get("unexpected_count"):
        return []
    else:
        unexpected_count = num_to_str(
            result_dict["unexpected_count"], use_locale=True, precision=20
        )
        unexpected_percent = (
                num_to_str(result_dict["unexpected_percent"], precision=4) + "%"
        )
        element_count = num_to_str(
            result_dict["element_count"], use_locale=True, precision=20
        )

        template_str = (
            "\n\n$unexpected_count unexpected values found. "
            "$unexpected_percent of $element_count total rows."
        )

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": {
                            "unexpected_count": unexpected_count,
                            "unexpected_percent": unexpected_percent,
                            "element_count": element_count,
                        },
                        "tag": "strong",
                        "styling": {"classes": ["text-danger"]},
                    },
                }
            )
        ]

But I am having two problems:
When I specify the expectations to run, it runs all of them but the data docs produced only shows the last one executed. And it doesn’t show the proper message being rendered. Here is how I am calling them inside the notebook:

batch.expect_table_column_b_to_be_bigger_than_a_value_when_column_a_matches_a_regex(
column_A="rate_type", column_B="revenue", min_value=1, regex_value="%Daily%"

)

yloo · January 4, 2021, 6:14am

+1 would like to see documentations on how to build custom sqlalchemy expectation

lgabs · July 27, 2021, 5:26pm

Me too! Wanna see doc for this section.

suridata · August 9, 2021, 1:13pm

+1 would like to see how to customize SQL expectations

Topic		Replies	Views
Need some help for create custom expectations with v3 api for sqlalchemy Archive how-to	0	495	June 14, 2021
For the custom expectation do we need to register the expectation GX Core Support	3	72	June 9, 2025
Creating Custom Expectations from SQL queries Feedback	1	742	October 21, 2020
How to create custom Expectations for Spark Archive how-to , help-wanted	9	2719	June 3, 2021
Desperate for examples of full custom expectations GX Core Support how-to	2	101	March 31, 2025

How to create custom Expectations for SQLAlchemy

Related topics