actableai.data_validation package

Submodules

actableai.data_validation.base module

class actableai.data_validation.base.CheckLevels

Bases: object

Class holding the check levels for data validation

CRITICAL = 'CRITICAL'
WARNING = 'WARNING'
class actableai.data_validation.base.CheckResult(name: str, message: str, level: str)

Bases: object

class actableai.data_validation.base.IChecker(name: str)

Bases: object

Abstract class for Checker

abstract check(df: pandas.core.frame.DataFrame) Optional[actableai.data_validation.base.CheckResult]
class actableai.data_validation.base.ValidationStatus

Bases: object

CRITICAL = 'CRITICAL'
HEALTHY = 'HEALTHY'
UNHEALTHY = 'UNHEALTHY'

actableai.data_validation.checkers module

class actableai.data_validation.checkers.CategoricalSameValuesChecker(level, name='CategoricalSameValuesChecker')

Bases: actableai.data_validation.base.IChecker

check(df: pandas.core.frame.DataFrame, current_intervention_column: str, new_intervention_column: str) Optional[actableai.data_validation.base.CheckResult]

Check if the categorical features have the same unique values.

Parameters
  • df – Dataframe to check.
  • features – Features to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.CategoryChecker(level, name='CategoryChecker')

Bases: actableai.data_validation.base.IChecker

check(df, columns) Optional[actableai.data_validation.base.CheckResult]

Check if the columns are categorical.

Parameters
  • df – Dataframe to check.
  • columns – Columns to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.CausalDiscoveryAlgoChecker(level, name='CausalDiscoveryAlgoChecker')

Bases: actableai.data_validation.base.IChecker

check(algo: str) Optional[actableai.data_validation.base.CheckResult]
class actableai.data_validation.checkers.CheckColumnInflateLimit(level: str, name: str = 'CheckColumnInflateLimit')

Bases: actableai.data_validation.base.IChecker

check(df: pandas.core.frame.DataFrame, features: List[str], polynomial_degree: int, n_columns: int) Optional[actableai.data_validation.base.CheckResult]

Check if the number of columns is not too large for the polynomial degree.

Parameters
  • df – Dataframe to check.
  • features – Features to check.
  • polynomial_degree – Polynomial degree for expansion.
  • n_columns – Limit number of columns.
Returns

_description_

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.CheckNUnique(level: str, name='CheckNUnique')

Bases: actableai.data_validation.base.IChecker

check(df: pandas.core.frame.DataFrame, n_unique_level: int, analytics: str = 'Explanation') Optional[actableai.data_validation.base.CheckResult]

Check if the number of unique values is less than the threshold.

Parameters
  • df – Dataframe to check.
  • n_unique_level – Threshold to check.
  • analytics – Type of analytics to use. Either ‘Explanation’ or ‘Bayesian Regression’.
Returns

_description_

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.ColumnsExistChecker(level, name='ColumnsExistChecker')

Bases: actableai.data_validation.base.IChecker

check(df, columns) Optional[actableai.data_validation.base.CheckResult]

Check if the columns exist.

Parameters
  • df – Dataframe to check.
  • columns – Columns to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.ColumnsInList(level, name='ColumnsInList')

Bases: actableai.data_validation.base.IChecker

check(columns_list, columns) Optional[actableai.data_validation.base.CheckResult]

Check if the columns are in the list.

Parameters
  • columns_list – List of columns to check.
  • columns – Columns to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.ColumnsNotInList(level, name='ColumnsNotInList')

Bases: actableai.data_validation.base.IChecker

check(columns_list, columns) Optional[actableai.data_validation.base.CheckResult]

Check if the columns are not in the list.

Parameters
  • columns_list – List of columns to check.
  • columns – Columns to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.CorrectAnalyticChecker(level, name='CorrectAnalyticChecker')

Bases: actableai.data_validation.base.IChecker

check(df, problem_type, unique_threshold) Optional[actableai.data_validation.base.CheckResult]

Check if you are using the correct analytic. (Classification or Regression)

Parameters
  • df – Dataframe to check.
  • problem_type – Type of problem to check.
  • unique_threshold – Threshold to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.DoNotContainDatetimeChecker(level, name='DoNotContainDatetimeChecker')

Bases: actableai.data_validation.base.IChecker

check(df) Optional[actableai.data_validation.base.CheckResult]

Check if the dataframe contains datetime column.

Parameters
df – Dataframe to check.
Returns
Check result.
Return type
Optional[CheckResult]
class actableai.data_validation.checkers.DoNotContainEmptyColumnsChecker(level, name='DoNotContainEmptyColumnsChecker')

Bases: actableai.data_validation.base.IChecker

check(df, columns) Optional[actableai.data_validation.base.CheckResult]

Check if the columns are full of NaN.

Parameters
  • df – Dataframe to check.
  • columns – Columns to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.DoNotContainMixedChecker(level, name='DoNotContainMixedChecker')

Bases: actableai.data_validation.base.IChecker

check(df, columns) Optional[actableai.data_validation.base.CheckResult]

Check if the dataframe contains mixed data types.

Parameters
  • df – Dataframe to check.
  • columns – Columns to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.DoNotContainTextChecker(level, name='DoNotContainTextChecker')

Bases: actableai.data_validation.base.IChecker

check(df, columns) Optional[actableai.data_validation.base.CheckResult]

Check if the dataframe contains text.

Parameters
  • df – Dataframe to check.
  • columns – Columns to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.DoNotUseAdditionalFeatureAsGroupbyChecker(level, name='DoNotUseAdditionalFeatureAsGroupbyChecker')

Bases: actableai.data_validation.base.IChecker

check(feature_columns: List[str], group_by: List[str]) Optional[actableai.data_validation.base.CheckResult]

Check if there are any additional features that are also used as group by features.

class actableai.data_validation.checkers.FieldsToExtractChecker(level, name='FieldsToExtractChecker')

Bases: actableai.data_validation.base.IChecker

check(fields_to_extract: List[str]) Optional[actableai.data_validation.base.CheckResult]
class actableai.data_validation.checkers.InsufficientCategoricalRows(level, name='InsufficientCategoricalRows')

Bases: actableai.data_validation.base.IChecker

check(df, treatment, n_rows) Optional[actableai.data_validation.base.CheckResult]

Check if the number of rows is enough for categorical treatment.

Parameters
  • df – Dataframe to check.
  • treatment – Treatment to check.
  • n_rows – Number of rows to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.IsCategoricalChecker(level, name='IsCategoricalChecker')

Bases: actableai.data_validation.base.IChecker

check(df) Optional[actableai.data_validation.base.CheckResult]

Check if the dataframe is categorical.

Parameters
df – Dataframe to check.
Returns
Check result.
Return type
Optional[CheckResult]
class actableai.data_validation.checkers.IsCategoricalOrNumericalChecker(level, name='IsCategoricalNumericalChecker')

Bases: actableai.data_validation.base.IChecker

check(df: pandas.core.frame.DataFrame, features: List[str]) Optional[actableai.data_validation.base.CheckResult]

Check if the types of the features are the same.

Parameters
  • df – Dataframe to check.
  • features – Features to check.
class actableai.data_validation.checkers.IsClusteringModelCompatible(level, name='IsClusteringAlgorithmCompatible')

Bases: actableai.data_validation.base.IChecker

check(df: pandas.core.frame.DataFrame, clustering_model_class: Type[actableai.clustering.models.base.BaseClusteringModel]) Optional[actableai.data_validation.base.CheckResult]
class actableai.data_validation.checkers.IsDatetimeChecker(level, name='IsDatetimeChecker')

Bases: actableai.data_validation.base.IChecker

check(df) Optional[actableai.data_validation.base.CheckResult]

Check if the dataframe contains datetime.

Parameters
df – Dataframe to check.
Returns
Check result.
Return type
Optional[CheckResult]
class actableai.data_validation.checkers.IsNumericalChecker(level, name='IsNumericalChecker')

Bases: actableai.data_validation.base.IChecker

check(series: pandas.core.series.Series) Optional[actableai.data_validation.base.CheckResult]

Check if the series is numerical.

Parameters
series – Series to check.
Returns
Check result.
Return type
Optional[CheckResult]
class actableai.data_validation.checkers.IsSufficientClassSampleChecker(level, name='IsSufficientClassSampleChecker')

Bases: actableai.data_validation.base.IChecker

check(df, target, validation_ratio, problem_type='classification') Optional[actableai.data_validation.base.CheckResult]

Check if each category has enough data.

Parameters
  • df – Dataframe to check.
  • target – Target column to check.
  • validation_ratio (_type_) – _description_
  • problem_type (str, optional) – _description_. Defaults to ‘classification’.
Returns

_description_

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.IsSufficientClassSampleForCrossValidationChecker(level, name='IsSufficientClassSampleForCrossValidationChecker')

Bases: actableai.data_validation.base.IChecker

check(df, target, kfolds) Optional[actableai.data_validation.base.CheckResult]

Check if the number of validation samples is enough for cross validation.

Parameters
  • df – Dataframe to check.
  • target – Target column to check.
  • kfolds – Number of folds to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.IsSufficientDataChecker(level, name='IsSufficientDataChecker')

Bases: actableai.data_validation.base.IChecker

check(df, n_sample) Optional[actableai.data_validation.base.CheckResult]

Check if the dataframe contains enough data.

Parameters
  • df – Dataframe to check.
  • n_sample – Number of samples to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.IsSufficientDataClassificationStratification(level, name='IsSufficientDataClassificationStratification')

Bases: actableai.data_validation.base.IChecker

check(df: pandas.core.frame.DataFrame, target: str, validation_ratio: float, drop_duplicates: bool, features: List[str]) Optional[actableai.data_validation.base.CheckResult]
class actableai.data_validation.checkers.IsSufficientDataTreatmentStratification(level, name='IsSufficientDataTreatmentStratification')

Bases: actableai.data_validation.base.IChecker

check(df: pandas.core.frame.DataFrame, current_intervention_column: str) Optional[actableai.data_validation.base.CheckResult]
class actableai.data_validation.checkers.IsSufficientNumberOfClassChecker(level, name='IsSufficientNumberOfClassChecker')

Bases: actableai.data_validation.base.IChecker

check(target_df) Optional[actableai.data_validation.base.CheckResult]

Check if the number of classes is sufficient.

Parameters
target_df – Target column to check.
Returns
Check result.
Return type
Optional[CheckResult]
class actableai.data_validation.checkers.IsSufficientSampleCrossValidationChecker(level, name: str = 'IsSufficientSampleCrossValidationChecker')

Bases: actableai.data_validation.base.IChecker

check(df: pandas.core.frame.DataFrame, kfolds: int) Optional[actableai.data_validation.base.CheckResult]

Check that there is more values than the number of kfolds

Parameters
  • df – Input dataframe
  • kfolds – Number of cross validated folds
Returns

Check result

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.IsSufficientValidationSampleChecker(level, name='IsSufficientValidationSampleChecker')

Bases: actableai.data_validation.base.IChecker

check(df, validation_ratio) Optional[actableai.data_validation.base.CheckResult]

Check if the number of validation samples is greater than the threshold.

Parameters
  • df – Dataframe to check.
  • validation_ratio – Threshold to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.IsValidFrequencyChecker(level, name='IsValidFrequencyChecker')

Bases: actableai.data_validation.base.IChecker

check(df) Optional[actableai.data_validation.base.CheckResult]

Check if the frequency is valid.

Parameters
df – Dataframe to check.
Returns
Check result.
Return type
Optional[CheckResult]
class actableai.data_validation.checkers.IsValidNumberOfClusterChecker(level, name='IsValidNumberOfClusterChecker')

Bases: actableai.data_validation.base.IChecker

check(df, n_cluster) Optional[actableai.data_validation.base.CheckResult]

Check if the number of cluster is valid against the number of rows.

Parameters
  • df – Dataframe to check.
  • n_cluster – Number of cluster to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.IsValidPredictionLengthChecker(level, name='IsValidPredictionLengthChecker')

Bases: actableai.data_validation.base.IChecker

check(df, prediction_length) Optional[actableai.data_validation.base.CheckResult]

Check if the prediction length is valid.

Parameters
  • df – Dataframe to check.
  • prediction_length – Prediction length to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.IsValidTypeNumberOfClusterChecker(level, name='IsValidTypeNumberOfClusterChecker')

Bases: actableai.data_validation.base.IChecker

check(n_cluster) Optional[actableai.data_validation.base.CheckResult]

Check if the number of cluster is valid.

Parameters
n_cluster – Number of cluster to check.
Returns
Check result.
Return type
Optional[CheckResult]
class actableai.data_validation.checkers.MaxTrainSamplesChecker(level, name='MaxTrainSamplesChecker')

Bases: actableai.data_validation.base.IChecker

check(n_cluster: Union[str, int], max_samples: Optional[int]) Optional[actableai.data_validation.base.CheckResult]

Check if the number of samples is not too large for the model.

Parameters
  • df – Dataframe to check.
  • max_samples – Maximum number of samples.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.NoFrequentItemSet(level: str, name: str = 'NoFrequentItemSet')

Bases: actableai.data_validation.base.IChecker

check(frequent_itemset: pandas.core.frame.DataFrame) Optional[actableai.data_validation.base.CheckResult]

Check if the frequent item set is empty.

Parameters
  • df – Dataframe to check.
  • features – Features to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.OnlyOneValueChecker(level, name='OnlyOneValueChecker')

Bases: actableai.data_validation.base.IChecker

check(df: pandas.core.frame.DataFrame, features: List[str]) Optional[actableai.data_validation.base.CheckResult]

Check that all features have only one value.

Parameters
  • df – Dataframe to check.
  • features – Features to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.PositiveOutcomeForBinaryChecker(level, name: str = 'PositiveOutcomeForBinaryChecker')

Bases: actableai.data_validation.base.IChecker

check(df: pandas.core.frame.DataFrame, outcomes: List[str], positive_outcome_value: Optional[Any]) Optional[actableai.data_validation.base.CheckResult]

Check that if the target is binary, the positive outcome value is not None.

Parameters
  • df – Dataframe to check.
  • features – Features to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.PositiveOutcomeValueThreshold(level, name='PositiveOutcomeValueThreshold')

Bases: actableai.data_validation.base.IChecker

check(df: pandas.core.frame.DataFrame, outcomes: List[str], positive_outcome_value: Optional[str]) Optional[actableai.data_validation.base.CheckResult]

Check if the number of samples is enough for the model.

Parameters
  • df – Dataframe to check.
  • positive_outcome_value – Positive outcome value to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.ROCAUCChecker(level: str, name: str = 'ROCAUCChecker')

Bases: actableai.data_validation.base.IChecker

check(df: pandas.core.frame.DataFrame, target: str, eval_metric: str = 'roc_auc') Optional[actableai.data_validation.base.CheckResult]

Check if the ROC AUC is usable.

Parameters
  • df – Dataframe to check.
  • features – Features to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.RegressionEvalMetricChecker(level, name='RegressionEvalMetricChecker')

Bases: actableai.data_validation.base.IChecker

check(eval_metric: str, use_quantiles: bool) Optional[actableai.data_validation.base.CheckResult]

Check if the eval metric is valid for regression.

Parameters
  • eval_metric – Eval metric to check.
  • use_quantiles – True if quantiles will be used (different metric).
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.RuleDoNotContainDatetimeChecker(level, name='RuleDoNotContainDatetimeChecker')

Bases: actableai.data_validation.base.IChecker

check(df, rules) Optional[actableai.data_validation.base.CheckResult]
class actableai.data_validation.checkers.SameTypeChecker(level, name='SameTypeChecker')

Bases: actableai.data_validation.base.IChecker

check(df: pandas.core.frame.DataFrame, features: List[str]) Optional[actableai.data_validation.base.CheckResult]

Check if the features are categorical or numerical.

Parameters
  • df – Dataframe to check.
  • features – Features to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.SplitByDatetimeValidationChecker(level, name='SplitByDatetimeValidationChecker')

Bases: actableai.data_validation.base.IChecker

check(df: pandas.core.frame.DataFrame, target: str, datetime_column: str, validation_ratio: float) Optional[actableai.data_validation.base.CheckResult]
class actableai.data_validation.checkers.StratifiedKFoldChecker(level, name='StratifiedKFoldChecker')

Bases: actableai.data_validation.base.IChecker

check(df: pandas.core.frame.DataFrame, intervention: str, causal_cv: int) Optional[actableai.data_validation.base.CheckResult]

Check if the features can be splitted into stratified folds.

Parameters
  • df – Dataframe to check.
  • features – Features to check.
Returns

Check result.

Return type

Optional[CheckResult]

class actableai.data_validation.checkers.TimeSeriesTuningMetricChecker(level, name='TimeSeriesTuningMetricChecker')

Bases: actableai.data_validation.base.IChecker

check(tuning_metric: str) Optional[actableai.data_validation.base.CheckResult]
class actableai.data_validation.checkers.UniqueDateTimeChecker(level, name='IsValidFrequencyChecker')

Bases: actableai.data_validation.base.IChecker

check(dt_series) Optional[actableai.data_validation.base.CheckResult]

Check if there is duplicate date time.

Parameters
dt_series – Series to check.
Returns
Check result.
Return type
Optional[CheckResult]

actableai.data_validation.params module

class actableai.data_validation.params.AssociationRulesDataValidator

Bases: object

validate(df: pandas.core.frame.DataFrame, group_by: List[str], items: str) List[Optional[actableai.data_validation.base.CheckResult]]

Method to validate data before running association rules algorithm

Parameters
  • df – DataFrame to validate
  • group_by – Columns of df that groups the buyers for the association
  • items – Column in df representing the associated items

Returns:

class actableai.data_validation.params.BayesianRegressionDataValidator

Bases: object

validate(target: str, features: List[str], df: pandas.core.frame.DataFrame, polynomial_degree: int) List
class actableai.data_validation.params.CausalDataValidator

Bases: object

validate(treatments: List[str], outcomes: List[str], df: pandas.core.frame.DataFrame, effect_modifiers: List[str], common_causes: List[str], positive_outcome_value: Optional[Any], drop_unique: bool, cv: Union[int, str]) List[Optional[actableai.data_validation.base.CheckResult]]
validate_cv(treatments: List[str], outcomes: List[str], df: pandas.core.frame.DataFrame, cv: Union[int, str]) List[Optional[actableai.data_validation.base.CheckResult]]
validate_except_cv(treatments: List[str], outcomes: List[str], df: pandas.core.frame.DataFrame, effect_modifiers: List[str], common_causes: List[str], positive_outcome_value: Optional[Any], drop_unique: bool) List[Optional[actableai.data_validation.base.CheckResult]]
class actableai.data_validation.params.CausalDiscoveryDataValidator

Bases: object

validate(algo: str)
class actableai.data_validation.params.CausalFeatureSelectionDataValidator

Bases: object

validate(target: str, features: List[str], df: pandas.core.frame.DataFrame)
class actableai.data_validation.params.ClassificationDataValidator

Bases: object

validate(target, features, debiasing_features, debiased_features, df, presets, validation_ratio=None, kfolds: int = 1, drop_duplicates=True, explain_samples=False, eval_metric: str = 'accuracy', drop_unique=True, split_by_datetime=False, datetime_column: Optional[str] = None) List[Optional[actableai.data_validation.base.CheckResult]]
class actableai.data_validation.params.ClusteringDataValidator

Bases: object

validate(target, df, n_cluster, explain_samples=False, max_train_samples: Optional[int] = None, clustering_model_class: Optional[Type[actableai.clustering.models.base.BaseClusteringModel]] = None)
class actableai.data_validation.params.CorrelationDataValidator

Bases: object

validate(df, target)
class actableai.data_validation.params.DataImputationDataValidator

Bases: object

validate(df)
class actableai.data_validation.params.InterventionDataValidator

Bases: object

validate(df, target: str, current_intervention_column: str, new_intervention_column: Optional[str], common_causes: List[str], causal_cv, drop_unique: bool)
class actableai.data_validation.params.RegressionDataValidator

Bases: object

validate(target, features, df, debiasing_features, debiased_features, eval_metric='r2', prediction_quantiles=None, presets='medium_quality_faster_train', explain_samples=False, drop_duplicates=True, drop_unique=True, split_by_datetime=False, datetime_column=None, kfolds=1)
class actableai.data_validation.params.TextExtractionDataValidator

Bases: object

validate(df: pandas.core.frame.DataFrame, document_name_column: str, text_column: str, fields_to_extract: List[str])
class actableai.data_validation.params.TimeSeriesDataValidator

Bases: object

validate(df, date_column, predicted_columns, feature_columns, group_by, tuning_metric)
class actableai.data_validation.params.TimeSeriesPredictionDataValidator

Bases: object

validate(train_dataset, valid_dataset, predict_dataset, feature_columns, predicted_columns, prediction_length)

Module contents