Source code for hooqu.constraints.constraints

from typing import Callable, Optional, Pattern, Sequence, Union

from hooqu.analyzers import (
    Completeness,
    Compliance,
    FrequenciesAndNumRows,
    Maximum,
    MaxState,
    Mean,
    MeanState,
    Minimum,
    MinState,
    NumMatches,
    NumMatchesAndCount,
    PatternMatch,
    Quantile,
    QuantileState,
    Size,
    StandardDeviation,
    StandardDeviationState,
    Sum,
    SumState,
    Uniqueness,
)
from hooqu.constraints.analysis_based_constraint import AnalysisBasedConstraint
from hooqu.constraints.constraint import Constraint, NamedConstraint

# A lot of mypy ignores because mypy is not able to understand that the
# Analyzers are specialization of Analyzer[K, S, V]


def size_constraint(
    assertion: Callable[[int], bool],
    where: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:

    if not callable(assertion):
        raise ValueError("assertion is not a callable")

    size = Size(where)
    constraint = AnalysisBasedConstraint[NumMatches, int, int](
        size, assertion, hint=hint  # type: ignore[arg-type]
    )

    return NamedConstraint(constraint, f"SizeConstraint({size})")


def min_constraint(
    column: str,
    assertion: Callable[[float], bool],
    where: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:

    minimum = Minimum(column, where)
    constraint = AnalysisBasedConstraint[MinState, float, float](
        minimum, assertion, hint=hint  # type: ignore[arg-type]
    )

    return NamedConstraint(constraint, f"MinimumConstraint({minimum})")


def max_constraint(
    column: str,
    assertion: Callable[[float], bool],
    where: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:

    maximum = Maximum(column, where)
    constraint = AnalysisBasedConstraint[MaxState, float, float](
        maximum, assertion, hint=hint  # type: ignore[arg-type]
    )

    return NamedConstraint(constraint, f"MaximumConstraint({maximum})")


def completeness_constraint(
    column: str,
    assertion: Callable[[float], bool],
    where: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:

    completeness = Completeness(column, where)
    constraint = AnalysisBasedConstraint[NumMatchesAndCount, float, float](
        completeness, assertion, hint=hint  # type: ignore[arg-type]
    )

    return NamedConstraint(constraint, f"CompletenessConstraint({completeness})")


def mean_constraint(
    column: str,
    assertion: Callable[[float], bool],
    where: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:

    mean = Mean(column, where)
    constraint = AnalysisBasedConstraint[MeanState, float, float](
        mean, assertion, hint=hint  # type: ignore[arg-type]
    )

    return NamedConstraint(constraint, f"MeanConstraint({mean})")


def sum_constraint(
    column: str,
    assertion: Callable[[float], bool],
    where: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:

    sum_ = Sum(column, where)
    constraint = AnalysisBasedConstraint[SumState, float, float](
        sum_, assertion, hint=hint  # type: ignore[arg-type]
    )

    return NamedConstraint(constraint, f"SumConstraint({sum})")


def standard_deviation_constraint(
    column: str,
    assertion: Callable[[float], bool],
    where: Optional[str] = None,
    hint: Optional[str] = None,
) -> Constraint:

    std = StandardDeviation(column, where)
    constraint = AnalysisBasedConstraint[StandardDeviationState, float, float](
        std, assertion, hint=hint  # type: ignore[arg-type]
    )

    return NamedConstraint(constraint, f"StandardDeviationConstraint({std})")


[docs]def quantile_constraint( column: str, quantile: float, assertion: Callable[[float], bool], where: Optional[str] = None, hint: Optional[str] = None, ) -> Constraint: """ Runs quantile analysis on the given column and executes the assertion column: Column to run the assertion on quantile: Which quantile to assert on assertion Callable that receives a float input parameter (the computed quantile) and returns a boolean hint: A hint to provide additional context why a constraint could have failed """ quant = Quantile(column, quantile, where) constraint = AnalysisBasedConstraint[QuantileState, float, float]( quant, assertion, hint=hint # type: ignore[arg-type] ) return NamedConstraint(constraint, f"QuantileConstraint({quant})")
[docs]def compliance_constraint( name: str, column: str, assertion: Callable[[float], bool], where: Optional[str] = None, hint: Optional[str] = None, ) -> Constraint: """ Runs given the expression on the given column(s) and executes the assertion Parameters: --------- name: A name that summarizes the check being made. This name is being used to name the metrics for the analysis being done. column: The column expression to be evaluated. assertion: Callable that receives a float input parameter and returns a boolean where: Additional filter to apply before the analyzer is run. hint: A hint to provide additional context why a constraint could have failed """ compliance = Compliance(name, column, where) constraint = AnalysisBasedConstraint[NumMatchesAndCount, float, float]( compliance, assertion, hint=hint # type: ignore[arg-type] ) return NamedConstraint(constraint, f"ComplianceConstraint({compliance})")
[docs]def uniqueness_constraint( columns: Sequence[str], assertion: Callable[[float], bool], where: Optional[str] = None, hint: Optional[str] = None, ) -> Constraint: """ Runs Uniqueness analysis on the given columns and executes the assertion columns. Parameters: ---------- columns: Columns to run the assertion on. assertion: Callable that receives a float input parameter and returns a boolean where: Additional filter to apply before the analyzer is run. hint: A hint to provide additional context why a constraint could have failed """ uniqueness = Uniqueness(columns, where) constraint = AnalysisBasedConstraint[FrequenciesAndNumRows, float, float]( uniqueness, assertion, hint=hint # type: ignore[arg-type] ) return NamedConstraint(constraint, f"UniquenessConstraint({uniqueness})")
[docs]def pattern_match_constraint( column: str, pattern: Union[str, Pattern], assertion: Callable[[float], bool], where: Optional[str] = None, name: Optional[str] = None, hint: Optional[str] = None, ) -> Constraint: """ Runs given regex compliance analysis on the given column(s) and executes the assertion. Parameters ---------- column: The column to run the assertion on pattern: The regex pattern to check compliance for (either string or pattern instance) where: Additional filter to apply before the analyzer is run. name: A name that summarizes the check being made. This name is being used to name the metrics for the analysis being done. hint: A hint to provide additional context why a constraint could have failed """ pattern_match = PatternMatch(column, pattern, where) constraint = AnalysisBasedConstraint[NumMatchesAndCount, float, float]( pattern_match, assertion, hint=hint # type: ignore[arg-type] ) name = ( f"PatternMatchConstraint({name})" if name else f"PatternMatchConstraint({column}, {pattern})" ) return NamedConstraint(constraint, name)