This article is for comments to: https://docs.greatexpectations.io/en/latest/how_to_guides/creating_and_editing_expectations/how_to_create_custom_expectations_for_sqlalchemy.html
Please comment +1 if this How to is important to you.
This article is for comments to: https://docs.greatexpectations.io/en/latest/how_to_guides/creating_and_editing_expectations/how_to_create_custom_expectations_for_sqlalchemy.html
Please comment +1 if this How to is important to you.
Hello, I am having trouble creating a custom expectation for SQLAlchemy. I created a custom expectation which involves two columns (I used the older way of creating custom expectations).
Here is the code of the expectation
from typing import List, Union, Any
import sqlalchemy as sa
from great_expectations.core import ExpectationValidationResult, ExpectationConfiguration
from great_expectations.data_asset import DataAsset
from great_expectations.dataset import SqlAlchemyDataset
from great_expectations.render.renderer.renderer import renderer
from great_expectations.render.types import RenderedGraphContent, RenderedStringTemplateContent, RenderedTableContent, \
RenderedBulletListContent, CollapseContent
from great_expectations.render.util import substitute_none_for_missing, num_to_str
from jinja2 import Template
class RamboSqlAlchemyDataset(SqlAlchemyDataset):
_data_asset_type = "CustomSqlAlchemyDataset"
@DataAsset.expectation(["column_A", "column_B", "regex_value", "min_value"])
def expect_table_column_b_to_be_bigger_than_a_value_when_column_a_matches_a_regex(
self,
column_A,
column_B,
regex_value,
min_value
):
query = sa.select([sa.func.count()]).select_from(self._table).where(sa.and_(
sa.column(column_A).like(regex_value),
sa.column(column_B) <= min_value
))
row_count = self.engine.execute(query).fetchone()
if row_count is None:
row_count = 0
else:
row_count = row_count[0]
total_count_query = sa.select([sa.func.count()]).select_from(self._table)
total_count = self.engine.execute(total_count_query).fetchone()[0]
if row_count == 0:
return {
"success": row_count == 0,
"result": {
"element_count": row_count,
}
}
else:
return {
"success": row_count == 0,
"result": {
"unexpected_percent": 100.0 * row_count / total_count,
"unexpected_count": row_count,
}
}
@classmethod
@renderer(renderer_type="renderer.prescriptive")
def _prescriptive_renderer(
cls,
configuration: ExpectationConfiguration = None,
result: ExpectationValidationResult = None,
language: str = None,
runtime_configuration: dict = None,
**kwargs,
) -> List[Union[dict, str, RenderedStringTemplateContent, RenderedTableContent, RenderedBulletListContent,
RenderedGraphContent, Any]]:
runtime_configuration = runtime_configuration or {}
include_column_name = runtime_configuration.get("include_column_name", True)
include_column_name = (
include_column_name if include_column_name is not None else True
)
styling = runtime_configuration.get("styling")
# get params dict with all expected kwargs
params = substitute_none_for_missing(
configuration.kwargs,
[
"column_A",
"column_B",
"regex",
"min_value",
],
)
template_str = f"Values from {kwargs['column_A']} much be LIKE {kwargs['regex']} and " \
f"{kwargs['column_B']} must be less or equal to {kwargs['min_value']}"
return [
Template(template_str).substitute(params)
]
@classmethod
@renderer(renderer_type="renderer.diagnostic.unexpected_statement")
def _diagnostic_unexpected_statement_renderer(
cls,
configuration=None,
result=None,
language=None,
runtime_configuration=None,
**kwargs,
):
assert result, "Must provide a result object."
success = result.success
result_dict = result.result
if result.exception_info["raised_exception"]:
exception_message_template_str = (
"\n\n$expectation_type raised an exception:\n$exception_message"
)
exception_message = RenderedStringTemplateContent(
**{
"content_block_type": "string_template",
"string_template": {
"template": exception_message_template_str,
"params": {
"expectation_type": result.expectation_config.expectation_type,
"exception_message": result.exception_info[
"exception_message"
],
},
"tag": "strong",
"styling": {
"classes": ["text-danger"],
"params": {
"exception_message": {"tag": "code"},
"expectation_type": {
"classes": ["badge", "badge-danger", "mb-2"]
},
},
},
},
}
)
exception_traceback_collapse = CollapseContent(
**{
"collapse_toggle_link": "Show exception traceback...",
"collapse": [
RenderedStringTemplateContent(
**{
"content_block_type": "string_template",
"string_template": {
"template": result.exception_info[
"exception_traceback"
],
"tag": "code",
},
}
)
],
}
)
return [exception_message, exception_traceback_collapse]
if success or not result_dict.get("unexpected_count"):
return []
else:
unexpected_count = num_to_str(
result_dict["unexpected_count"], use_locale=True, precision=20
)
unexpected_percent = (
num_to_str(result_dict["unexpected_percent"], precision=4) + "%"
)
element_count = num_to_str(
result_dict["element_count"], use_locale=True, precision=20
)
template_str = (
"\n\n$unexpected_count unexpected values found. "
"$unexpected_percent of $element_count total rows."
)
return [
RenderedStringTemplateContent(
**{
"content_block_type": "string_template",
"string_template": {
"template": template_str,
"params": {
"unexpected_count": unexpected_count,
"unexpected_percent": unexpected_percent,
"element_count": element_count,
},
"tag": "strong",
"styling": {"classes": ["text-danger"]},
},
}
)
]
But I am having two problems:
When I specify the expectations to run, it runs all of them but the data docs produced only shows the last one executed. And it doesn’t show the proper message being rendered. Here is how I am calling them inside the notebook:
batch.expect_table_column_b_to_be_bigger_than_a_value_when_column_a_matches_a_regex(
column_A="rate_type", column_B="revenue", min_value=1, regex_value="%Daily%"
)
+1 would like to see documentations on how to build custom sqlalchemy expectation
Me too! Wanna see doc for this section.
+1 would like to see how to customize SQL expectations