actableai.data_imputation package

Subpackages

Submodules

actableai.data_imputation.correlation_calculator module

class actableai.data_imputation.correlation_calculator.CorrelationCalculator

Bases: object

calculate_correlations_for_all_column_pairs(df: pandas.core.frame.DataFrame) pandas.core.frame.DataFrame
clear()
most_correlate_columns(df: pandas.core.frame.DataFrame, col: str, top: int) List[str]

actableai.data_imputation.data_imputation module

actableai.data_imputation.data_imputation.data_imputation(df, rules='', impute_nulls=True)

actableai.data_imputation.main module

Module contents

class actableai.data_imputation.ColumnType(value)

Bases: enum.Enum

An enumeration.

Category = 'category'
Complex = 'complex'
Float = 'float'
Id = 'id'
Integer = 'integer'
NULL = 'null'
NumWithTag = 'num_with_tag'
Percentage = 'percentage'
String = 'string'
Temperature = 'temperature'
Text = 'text'
Timestamp = 'timestamp'
Unknown = 'unknown'
class actableai.data_imputation.DataFrame(d: Union[str, pandas.core.frame.DataFrame])

Bases: pandas.core.frame.DataFrame

auto_fix(errors: Optional[actableai.data_imputation.error_detector.cell_erros.CellErrors] = None, *detectors: actableai.data_imputation.error_detector.base_error_detector.BaseErrorDetector) actableai.data_imputation.data.data_frame.DataFrame
property column_types: actableai.data_imputation.type_recon.type_detector.DfTypes
detect_error(*detectors: actableai.data_imputation.error_detector.base_error_detector.BaseErrorDetector) actableai.data_imputation.error_detector.cell_erros.CellErrors
enable_debug(enable: bool = True)
property fix_info
property fix_strategy
classmethod from_dict(data, orient='columns', dtype=None, columns=None) actableai.data_imputation.data.data_frame.DataFrame

Construct DataFrame from dict of array-like or dicts.

Creates DataFrame object from dictionary by columns or by index allowing dtype specification.

Parameters
  • data (dict) – Of the form {field : array-like} or {field : dict}.
  • orient ({'columns', 'index'}, default 'columns') – The “orientation” of the data. If the keys of the passed dict should be the columns of the resulting DataFrame, pass ‘columns’ (default). Otherwise if the keys should be rows, pass ‘index’.
  • dtype (dtype, default None) – Data type to force, otherwise infer.
  • columns (list, default None) – Column labels to use when orient='index'. Raises a ValueError if used with orient='columns'.
Return type

DataFrame

See also

DataFrame.from_records
DataFrame from structured ndarray, sequence of tuples or dicts, or DataFrame.
DataFrame
DataFrame object creation using constructor.

Examples

By default the keys of the dict become the DataFrame columns:

>>> data = {'col_1': [3, 2, 1, 0], 'col_2': ['a', 'b', 'c', 'd']}
>>> pd.DataFrame.from_dict(data)
   col_1 col_2
0      3     a
1      2     b
2      1     c
3      0     d

Specify orient='index' to create the DataFrame using dictionary keys as rows:

>>> data = {'row_1': [3, 2, 1, 0], 'row_2': ['a', 'b', 'c', 'd']}
>>> pd.DataFrame.from_dict(data, orient='index')
       0  1  2  3
row_1  3  2  1  0
row_2  a  b  c  d

When using the ‘index’ orientation, the column names can be specified manually:

>>> pd.DataFrame.from_dict(data, orient='index',
...                        columns=['A', 'B', 'C', 'D'])
       A  B  C  D
row_1  3  2  1  0
row_2  a  b  c  d
override_column_type(column: str, column_type: actableai.data_imputation.meta.types.ColumnType)
property possible_column_types: Dict[str, Set[actableai.data_imputation.meta.types.ColumnType]]
class actableai.data_imputation.MisplacedDetector(*, preset_rules: typing.List[actableai.data_imputation.error_detector.column_format.PresetRuleName] = (), customize_rules: actableai.data_imputation.error_detector.column_format.MatchRules = <actableai.data_imputation.error_detector.column_format.MatchRules object>)

Bases: actableai.data_imputation.error_detector.base_error_detector.BaseErrorDetector

detect_cells() actableai.data_imputation.error_detector.cell_erros.CellErrors
property mentioned_columns: Set[str]
setup(df: pandas.core.frame.DataFrame, dftypes: actableai.data_imputation.type_recon.type_detector.DfTypes)
update_df(df: pandas.core.frame.DataFrame)
class actableai.data_imputation.NullDetector

Bases: actableai.data_imputation.error_detector.base_error_detector.BaseErrorDetector

detect_cells() actableai.data_imputation.error_detector.cell_erros.CellErrors
class actableai.data_imputation.ValidationDetector(constraints: Optional[actableai.data_imputation.error_detector.constraint.Constraints] = NotImplemented)

Bases: actableai.data_imputation.error_detector.base_error_detector.BaseErrorDetector

property constraints
detect_cells() actableai.data_imputation.error_detector.cell_erros.CellErrors
classmethod from_constraints(constraints_string: str)
setup_constraints(constraints: actableai.data_imputation.error_detector.constraint.Constraints)