I am currently working with Great Expectations Core to validate data from two different sources: a CSV file and a MongoDB data source. While I am able to create Expectations and generate local Data Docs, I am encountering the same issue in both cases. S

code ------

import pandas as pd
import great_expectations as ge
from great_expectations.core.batch import RuntimeBatchRequest
import json

Load sample data into a Pandas DataFrame

try:
df = ge.read_csv(“sample_data.csv”)
print(“Data loaded successfully.”)
except Exception as e:
print(f"Error loading data: {e}")
raise

Initialize Great Expectations Data Context

try:
context = ge.data_context.DataContext()
print(“Data context initialized successfully.”)

# Build data docs after validation
context.build_data_docs()

# Open the data docs in a browser
context.open_data_docs()

except Exception as e:
print(f"Error initializing Data Context: {e}")
raise

Step 1: Add the pandas Datasource (in case it’s not already added)

datasource_config = {
“name”: “pandas_datasource”,
“class_name”: “Datasource”,
“execution_engine”: {
“class_name”: “PandasExecutionEngine”,
},
“data_connectors”: {
“default_runtime_data_connector_name”: {
“class_name”: “RuntimeDataConnector”,
“batch_identifiers”: [“default_identifier_name”],
},
},
}

context.add_datasource(**datasource_config)

Create or update an expectation suite (a collection of validation expectations)

expectation_suite_name = “simple_expectation_suite5”
try:
context.add_or_update_expectation_suite(expectation_suite_name=expectation_suite_name)
print(f"Added/updated expectation suite: {expectation_suite_name}“)
except Exception as e:
print(f"Error creating/updating expectation suite: {e}”)
raise

Step 2: Create a RuntimeBatchRequest for the DataFrame

batch_request = RuntimeBatchRequest(
datasource_name=“pandas_datasource”, # Name the datasource
data_connector_name=“default_runtime_data_connector_name”, # Use runtime for in-memory data
data_asset_name=“mongo_dataframe_asset”, # Name the asset
runtime_parameters={“batch_data”: df}, # The DataFrame as batch data
batch_identifiers={“default_identifier_name”: “default_identifier”}
)

Step 3: Create a Validator to Validate the DataFrame Against the Expectation Suite

try:
validator = context.get_validator(
batch_request=batch_request,
expectation_suite_name=expectation_suite_name
)
print(“Validator created successfully.”)
except Exception as e:
print(f"Error creating validator: {e}")
raise

Step 4: Add Expectations to the Validator

Step 4: Add Expectations for Quantiles, Median, and Values

try:
# Add expectations for basic validations
validator.expect_column_to_exist(“Designation”)
validator.expect_column_values_to_not_be_null(“name”)
validator.expect_column_values_to_be_between(“age”, 40, 100)

# Add expectation for column quantiles (specify quantiles like 0.05, 0.25, 0.5, 0.75, and 0.95)
validator.expect_column_quantile_values_to_be_between(
    column="age",
    quantile_ranges={
        "quantiles": [0.05, 0.25, 0.5, 0.75, 0.95],
        "value_ranges": [
            [100, 500],  # Range for 5th percentile
            [1000, 1500],  # Range for 25th percentile (Q1)
            [2000, 2500],  # Range for median (50th percentile)
            [3000, 3500],  # Range for 75th percentile (Q3)
            [4000, 4500]  # Range for 95th percentile
        ]
    }
)

# Expect the column median to fall between a specific range (optional, as median is the 50th quantile)
validator.expect_column_median_to_be_between(
    column="age", min_value=2000, max_value=2500
)
print("Expectations added to the validator successfully.")

# Save the expectation suite after adding expectations
validator.save_expectation_suite(discard_failed_expectations=False)
print("Expectation suite saved successfully.")

except Exception as e:
print(f"Error adding expectations to validator or saving suite: {e}")
raise

Step 5: Validate the Data and capture detailed results

try:
validation_results = validator.validate()
# Rebuild Data Docs to visualize the validation results
context.build_data_docs()
context.open_data_docs()

except Exception as e:
print(f"Error validating data: {e}")
raise

csv data ------

id name age
1 John
2 Alice 32
3 Bob 45
4 Eve 26
5 Frank 23

output -----

Request:

Could you please help me understand why the validation statistics/validation tat are not showing up and guide me on how to fix this? If any supporting documents or configuration details are required, please let me know.

hi there, what version of GX are you on? the issue here appears to be not having a checkpoint set up, however any version pre v1 is no longer supported.

I’d recommend following our migration guide and upgrading to v1.

A simple v1 script could look something like this:


context = gx.get_context(mode="file")

data = {
    "ID": [1, 2, 3, 4, None],
    "name": ["Alice", "Bob", "Charlie", "David", None],
    "age_when_joined": [25, 30, 35, 40, 28],
    "age_when_left": [26, 38, 38, 49, 30],
}

df = pd.DataFrame(data)

batch_parameters = {"dataframe": df}

data_source_name = "my_data_source"
data_source = context.data_sources.add_pandas(name=data_source_name)

data_asset_name = "my_dataframe_data_asset"
data_asset = data_source.add_dataframe_asset(name=data_asset_name)

batch_definition_name = "my_batch_definition"
batch_definition = data_asset.add_batch_definition_whole_dataframe(
    batch_definition_name
)

suite = context.suites.add(
    gx.core.expectation_suite.ExpectationSuite(name="my_expectations")
)

suite.add_expectation(
    gx.expectations.ExpectColumnPairValuesAToBeGreaterThanB(
        column_A="age_when_left", 
        column_B="age_when_joined", 
        or_equal=True
    )
)

validation_definition = context.validation_definitions.add(
    gx.core.validation_definition.ValidationDefinition(
        name="my_validation_definition",
        data=batch_definition,
        suite=suite,
    )
)

checkpoint = context.checkpoints.add(
    gx.Checkpoint(
        name="checkpoint",
        validation_definitions=[validation_definition],
        actions=[gx.checkpoint.actions.UpdateDataDocsAction(name="dda")],
        result_format={"result_format": "BASIC", "unexpected_index_column_names": ["ID", "name", "age_when_left", "age_when_joined"]},
    )
)

validation_results = checkpoint.run(batch_parameters=batch_parameters)
print(validation_results)

context.open_data_docs()