Very slow validations

I have 80 Dataframes which I am validating. 4 expectation suits, each validating 20 Dataframes. I have set up the following code:

config_data_docs_sites = {
        "s3_site": {
            "class_name": "SiteBuilder",
            "store_backend": {
                "class_name": "TupleS3StoreBackend",
                "bucket": "great-expectations",
                "prefix": "data_docs",
                "boto3_options": BOTO3_OPTIONS
            },
        },
    }
    data_context_config = DataContextConfig(
        store_backend_defaults=S3StoreBackendDefaults(default_bucket_name=GX_BUCKET_NAME),
        data_docs_sites=config_data_docs_sites
    )
    context = BaseDataContext(project_config=data_context_config)
    asset_names, data_source = gx_preparation(s3_client, context, latest_version, latest_file_name, normalised_file_list)
    failed = False

    validations_df1 = []
    validations_df2 = []
    validations_df3 = []
    validations_df4 = []
    for asset_name in asset_names:
        data_asset = data_source.get_asset(asset_name)
        my_batch_request = data_asset.build_batch_request()

        if asset_name.endswith('df1'):
            expectation = 'Exp_Abteilung'
            validations_df1.append(
                {"batch_request": my_batch_request, "expectation_suite_name": expectation}
            )
        elif asset_name.endswith('df2'):
            expectation = 'Exp_Person'
            validations_df2.append(
                {"batch_request": my_batch_request, "expectation_suite_name": expectation}
            )
        elif asset_name.endswith('df3'):
            expectation = 'Exp_Ausruestung'
            validations_df3.append(
                {"batch_request": my_batch_request, "expectation_suite_name": expectation}
            )
        else:
            expectation = 'Exp_Combined'
            validations_df4.append(
                {"batch_request": my_batch_request, "expectation_suite_name": expectation}
            )

    checkpoint1 = gx.checkpoint.SimpleCheckpoint(
        name=f"{latest_file_name}_{latest_version}_df1",
        data_context=context,
        validations=validations_df1,
        run_name_template=f"{latest_file_name}_{latest_version}",
    )
    checkpoint2 = gx.checkpoint.SimpleCheckpoint(
        name=f"{latest_file_name}_{latest_version}_df2",
        data_context=context,
        validations=validations_df2,
        run_name_template=f"{latest_file_name}_{latest_version}",
    )
    checkpoint3 = gx.checkpoint.SimpleCheckpoint(
        name=f"{latest_file_name}_{latest_version}_df3",
        data_context=context,
        validations=validations_df3,
        run_name_template=f"{latest_file_name}_{latest_version}",
    )
    checkpoint4 = gx.checkpoint.SimpleCheckpoint(
        name=f"{latest_file_name}_{latest_version}_df4",
        data_context=context,
        validations=validations_df4,
        run_name_template=f"{latest_file_name}_{latest_version}",
    )
    results = checkpoint1.run()
    if not results["success"]:
        failed = True
    results = checkpoint2.run()
    if not results["success"]:
        failed = True
    results = checkpoint3.run()
    if not results["success"]:
        failed = True
    results = checkpoint4.run()
    if not results["success"]:
        failed = True

In total it takes about 4 mins for this code to complete. I read online it is possible to tell gx to rebuild the index.html file for the data docs after all validations are complete, rather than after each validation in order to speed up the process. How would I do that? Is there something else I am missing in order to bring the run time down to sub 30 sec? I tried with threading but to no success.

Hey @erman ! Thanks for reaching out.

SimpleCheckpoint has some actions baked in, including updating your data docs with the validations of that run. If you convert these over to Checkpoints instead, you can control that action list and skip that step, calling context.build_data_docs() at the end of this process a single time.