code ------
import pandas as pd
import great_expectations as ge
from great_expectations.core.batch import RuntimeBatchRequest
import json
Load sample data into a Pandas DataFrame
try:
df = ge.read_csv(“sample_data.csv”)
print(“Data loaded successfully.”)
except Exception as e:
print(f"Error loading data: {e}")
raise
Initialize Great Expectations Data Context
try:
context = ge.data_context.DataContext()
print(“Data context initialized successfully.”)
# Build data docs after validation
context.build_data_docs()
# Open the data docs in a browser
context.open_data_docs()
except Exception as e:
print(f"Error initializing Data Context: {e}")
raise
Step 1: Add the pandas Datasource (in case it’s not already added)
datasource_config = {
“name”: “pandas_datasource”,
“class_name”: “Datasource”,
“execution_engine”: {
“class_name”: “PandasExecutionEngine”,
},
“data_connectors”: {
“default_runtime_data_connector_name”: {
“class_name”: “RuntimeDataConnector”,
“batch_identifiers”: [“default_identifier_name”],
},
},
}
context.add_datasource(**datasource_config)
Create or update an expectation suite (a collection of validation expectations)
expectation_suite_name = “simple_expectation_suite5”
try:
context.add_or_update_expectation_suite(expectation_suite_name=expectation_suite_name)
print(f"Added/updated expectation suite: {expectation_suite_name}“)
except Exception as e:
print(f"Error creating/updating expectation suite: {e}”)
raise
Step 2: Create a RuntimeBatchRequest for the DataFrame
batch_request = RuntimeBatchRequest(
datasource_name=“pandas_datasource”, # Name the datasource
data_connector_name=“default_runtime_data_connector_name”, # Use runtime for in-memory data
data_asset_name=“mongo_dataframe_asset”, # Name the asset
runtime_parameters={“batch_data”: df}, # The DataFrame as batch data
batch_identifiers={“default_identifier_name”: “default_identifier”}
)
Step 3: Create a Validator to Validate the DataFrame Against the Expectation Suite
try:
validator = context.get_validator(
batch_request=batch_request,
expectation_suite_name=expectation_suite_name
)
print(“Validator created successfully.”)
except Exception as e:
print(f"Error creating validator: {e}")
raise
Step 4: Add Expectations to the Validator
Step 4: Add Expectations for Quantiles, Median, and Values
try:
# Add expectations for basic validations
validator.expect_column_to_exist(“Designation”)
validator.expect_column_values_to_not_be_null(“name”)
validator.expect_column_values_to_be_between(“age”, 40, 100)
# Add expectation for column quantiles (specify quantiles like 0.05, 0.25, 0.5, 0.75, and 0.95)
validator.expect_column_quantile_values_to_be_between(
column="age",
quantile_ranges={
"quantiles": [0.05, 0.25, 0.5, 0.75, 0.95],
"value_ranges": [
[100, 500], # Range for 5th percentile
[1000, 1500], # Range for 25th percentile (Q1)
[2000, 2500], # Range for median (50th percentile)
[3000, 3500], # Range for 75th percentile (Q3)
[4000, 4500] # Range for 95th percentile
]
}
)
# Expect the column median to fall between a specific range (optional, as median is the 50th quantile)
validator.expect_column_median_to_be_between(
column="age", min_value=2000, max_value=2500
)
print("Expectations added to the validator successfully.")
# Save the expectation suite after adding expectations
validator.save_expectation_suite(discard_failed_expectations=False)
print("Expectation suite saved successfully.")
except Exception as e:
print(f"Error adding expectations to validator or saving suite: {e}")
raise
Step 5: Validate the Data and capture detailed results
try:
validation_results = validator.validate()
# Rebuild Data Docs to visualize the validation results
context.build_data_docs()
context.open_data_docs()
except Exception as e:
print(f"Error validating data: {e}")
raise
csv data ------
id | name | age |
---|---|---|
1 | John | |
2 | Alice | 32 |
3 | Bob | 45 |
4 | Eve | 26 |
5 | Frank | 23 |
output -----
Request:
Could you please help me understand why the validation statistics/validation tat are not showing up and guide me on how to fix this? If any supporting documents or configuration details are required, please let me know.