How to create custom Expectations for SQLAlchemy

This article is for comments to: https://docs.greatexpectations.io/en/latest/how_to_guides/creating_and_editing_expectations/how_to_create_custom_expectations_for_sqlalchemy.html

Please comment +1 if this How to is important to you.

2 Likes

Hello, I am having trouble creating a custom expectation for SQLAlchemy. I created a custom expectation which involves two columns (I used the older way of creating custom expectations).
Here is the code of the expectation

from typing import List, Union, Any

import sqlalchemy as sa
from great_expectations.core import ExpectationValidationResult, ExpectationConfiguration
from great_expectations.data_asset import DataAsset
from great_expectations.dataset import SqlAlchemyDataset
from great_expectations.render.renderer.renderer import renderer
from great_expectations.render.types import RenderedGraphContent, RenderedStringTemplateContent, RenderedTableContent, \
    RenderedBulletListContent, CollapseContent
from great_expectations.render.util import substitute_none_for_missing, num_to_str
from jinja2 import Template


class RamboSqlAlchemyDataset(SqlAlchemyDataset):
    _data_asset_type = "CustomSqlAlchemyDataset"

@DataAsset.expectation(["column_A", "column_B", "regex_value", "min_value"])
def expect_table_column_b_to_be_bigger_than_a_value_when_column_a_matches_a_regex(
        self,
        column_A,
        column_B,
        regex_value,
        min_value
):
    query = sa.select([sa.func.count()]).select_from(self._table).where(sa.and_(
        sa.column(column_A).like(regex_value),
        sa.column(column_B) <= min_value
    ))

    row_count = self.engine.execute(query).fetchone()
    if row_count is None:
        row_count = 0
    else:
        row_count = row_count[0]

    total_count_query = sa.select([sa.func.count()]).select_from(self._table)
    total_count = self.engine.execute(total_count_query).fetchone()[0]

    if row_count == 0:
        return {
            "success": row_count == 0,
            "result": {
                "element_count": row_count,
            }
        }
    else:
        return {
            "success": row_count == 0,
            "result": {
                "unexpected_percent": 100.0 * row_count / total_count,
                "unexpected_count": row_count,
            }
        }

@classmethod
@renderer(renderer_type="renderer.prescriptive")
def _prescriptive_renderer(
        cls,
        configuration: ExpectationConfiguration = None,
        result: ExpectationValidationResult = None,
        language: str = None,
        runtime_configuration: dict = None,
        **kwargs,
) -> List[Union[dict, str, RenderedStringTemplateContent, RenderedTableContent, RenderedBulletListContent,
                RenderedGraphContent, Any]]:
    runtime_configuration = runtime_configuration or {}
    include_column_name = runtime_configuration.get("include_column_name", True)
    include_column_name = (
        include_column_name if include_column_name is not None else True
    )
    styling = runtime_configuration.get("styling")
    # get params dict with all expected kwargs
    params = substitute_none_for_missing(
        configuration.kwargs,
        [
            "column_A",
            "column_B",
            "regex",
            "min_value",
        ],
    )

    template_str = f"Values from {kwargs['column_A']} much be LIKE {kwargs['regex']} and " \
                   f"{kwargs['column_B']} must be less or equal to {kwargs['min_value']}"

    return [
        Template(template_str).substitute(params)
    ]

@classmethod
@renderer(renderer_type="renderer.diagnostic.unexpected_statement")
def _diagnostic_unexpected_statement_renderer(
        cls,
        configuration=None,
        result=None,
        language=None,
        runtime_configuration=None,
        **kwargs,
):
    assert result, "Must provide a result object."
    success = result.success
    result_dict = result.result

    if result.exception_info["raised_exception"]:
        exception_message_template_str = (
            "\n\n$expectation_type raised an exception:\n$exception_message"
        )

        exception_message = RenderedStringTemplateContent(
            **{
                "content_block_type": "string_template",
                "string_template": {
                    "template": exception_message_template_str,
                    "params": {
                        "expectation_type": result.expectation_config.expectation_type,
                        "exception_message": result.exception_info[
                            "exception_message"
                        ],
                    },
                    "tag": "strong",
                    "styling": {
                        "classes": ["text-danger"],
                        "params": {
                            "exception_message": {"tag": "code"},
                            "expectation_type": {
                                "classes": ["badge", "badge-danger", "mb-2"]
                            },
                        },
                    },
                },
            }
        )

        exception_traceback_collapse = CollapseContent(
            **{
                "collapse_toggle_link": "Show exception traceback...",
                "collapse": [
                    RenderedStringTemplateContent(
                        **{
                            "content_block_type": "string_template",
                            "string_template": {
                                "template": result.exception_info[
                                    "exception_traceback"
                                ],
                                "tag": "code",
                            },
                        }
                    )
                ],
            }
        )

        return [exception_message, exception_traceback_collapse]

    if success or not result_dict.get("unexpected_count"):
        return []
    else:
        unexpected_count = num_to_str(
            result_dict["unexpected_count"], use_locale=True, precision=20
        )
        unexpected_percent = (
                num_to_str(result_dict["unexpected_percent"], precision=4) + "%"
        )
        element_count = num_to_str(
            result_dict["element_count"], use_locale=True, precision=20
        )

        template_str = (
            "\n\n$unexpected_count unexpected values found. "
            "$unexpected_percent of $element_count total rows."
        )

        return [
            RenderedStringTemplateContent(
                **{
                    "content_block_type": "string_template",
                    "string_template": {
                        "template": template_str,
                        "params": {
                            "unexpected_count": unexpected_count,
                            "unexpected_percent": unexpected_percent,
                            "element_count": element_count,
                        },
                        "tag": "strong",
                        "styling": {"classes": ["text-danger"]},
                    },
                }
            )
        ]

But I am having two problems:
When I specify the expectations to run, it runs all of them but the data docs produced only shows the last one executed. And it doesn’t show the proper message being rendered. Here is how I am calling them inside the notebook:

batch.expect_table_column_b_to_be_bigger_than_a_value_when_column_a_matches_a_regex(
column_A="rate_type", column_B="revenue", min_value=1, regex_value="%Daily%"

)

2 Likes

+1 would like to see documentations on how to build custom sqlalchemy expectation

2 Likes

Me too! Wanna see doc for this section.

+1 would like to see how to customize SQL expectations