validatetools is a utility package for managing validation rule sets that are defined with validate. In production systems validation rule sets tend to grow organically and accumulate redundant or (partially) contradictory rules. validatetools helps to identify problems with large rule sets and includes simplification methods for resolving issues.

Installation

validatetools is available from CRAN and can be installed with

install.packages("validatetools")

The adventurous can install an (unstable) development version of validatetools from github with:

# install.packages("devtools")
devtools::install_github("data-cleaning/validatetools")

or use

install.packages('validatetools', repos = c('https://data-cleaning.r-universe.dev', 'https://cloud.r-project.org'))

Example

Check for feasibility

rules <- validator( x > 0)
is_infeasible(rules)
#> [1] FALSE

rules <- validator(
  rule1 = x > 0,
  rule2 = x < 0
)
is_infeasible(rules)
#> [1] TRUE

detect_infeasible_rules(rules, verbose=TRUE)
#> Found: 
#>   rule1: x > 0
#> [1] "rule1"
# find out the conflict with this rule
is_contradicted_by(rules, "rule1", verbose=TRUE)
#> Rule(s): 
#>  rule1: x > 0
#> contradicted by:
#>  rule2: x < 0
#> [1] "rule2"

# we prefer to keep rule1, so we can give rule1 Inf weight
detect_infeasible_rules(
  rules, 
  weight=c(rule1 = Inf), 
  verbose=TRUE
)
#> Found: 
#>   rule2: x < 0
#> [1] "rule2"

make_feasible(rules, weight=c(rule1=Inf), verbose=TRUE)
#> Found: 
#>   rule2: x < 0
#> Dropping rule(s): "rule2"
#> Object of class 'validator' with 1 elements:
#>  rule1: x > 0
#> Rules are evaluated using locally defined options

Finding contradicting if rules

rules <- validator(
  rule1 = if (income > 0) job == "yes",
  rule2 = if (job == "yes") income == 0
)
    
is_infeasible(rules, verbose=TRUE)
#> The rule set is feasible,
#>   but may contain contradictions in conditional if-rules.
#>   use `detect_contradicting_if_rules()` to find out whether there are 
#>   contradictions in the if-clauses.
#> [1] FALSE
conflicts <- detect_contradicting_if_rules(rules, verbose=TRUE)
#> 1 contradiction(s) with if clauses found:
#> When income > 0:
#>   rule2: if (job == "yes") income == 0
#>   rule1: if (income > 0) job == "yes"
print(conflicts)
#> $`income > 0`
#> [1] "rule2" "rule1"

Simplifying

The function simplify_rules combines most simplification methods of validatetools to simplify a rule set. For example, it reduces the following rule set to a simpler form:

rules <- validator(
  rule1 = if (age < 16) income == 0,
  rule2 = job %in% c("yes", "no"),
  rule3 = if (job == "yes") income > 0
)

simplify_rules(rules, age = 13)
#> Object of class 'validator' with 3 elements:
#>  .const_income: income == 0
#>  .const_age   : age == 13
#>  .const_job   : job == "no"
#or 
simplify_rules(rules, job = "yes")
#> Object of class 'validator' with 3 elements:
#>  rule1     : age >= 16
#>  rule3     : income > 0
#>  .const_job: job == "yes"

simplify_rules combines the following simplification and substitution methods:

Value substitution

rules <- validator( 
  rule1 = height > 4,
  rule2 = height <= max_height,
  rule3 = if (gender == "male") weight > 100,
  rule4 = gender %in% c("male", "female")
)
substitute_values(rules, max_height = 6, gender = "male")
#> Object of class 'validator' with 5 elements:
#>  rule1            : height > 4
#>  rule2            : height <= 6
#>  rule3            : weight > 100
#>  .const_max_height: max_height == 6
#>  .const_gender    : gender == "male"

Finding fixed values

rules <- validator( 
  rule1 = x >= 0, 
  rule2 = x <=0
)
detect_fixed_variables(rules)
#> $x
#> [1] 0
simplify_fixed_variables(rules)
#> Object of class 'validator' with 1 elements:
#>  .const_x: x == 0

rules <- validator(
  rule1 = x1 + x2 + x3 == 0,
  rule2 = x1 + x2 >= 0,
  rule3 = x3 >=0
)
simplify_fixed_variables(rules)
#> Object of class 'validator' with 3 elements:
#>  rule1    : x1 + x2 + 0 == 0
#>  rule2    : x1 + x2 >= 0
#>  .const_x3: x3 == 0

Simplifying conditional statements

# superfluous conditions
rules <- validator(
  r1 = if (age > 18) age <= 67,
  r2 = if (income > 0 && income > 1000) job == TRUE 
)
# implies that age always is <= 67
simplify_conditional(rules)
#> Object of class 'validator' with 2 elements:
#>  r1: age <= 67
#>  r2: income <= 1000 | (job == TRUE)



# non-relaxing clause
rules <- validator( 
  r1 = if (income > 0) age >= 16,
  r2 = age < 12
)
# age > 16 is always FALSE so r1 can be simplified
simplify_conditional(rules)
#> Object of class 'validator' with 2 elements:
#>  r1: income <= 0
#>  r2: age < 12


# non-constraining clause
rules <- validator( 
  rule1 = if (age  < 16) income == 0,
  rule2 = if (age >=16) income >= 0
)
simplify_conditional(rules)
#> Object of class 'validator' with 2 elements:
#>  rule1: age >= 16 | (income == 0)
#>  rule2: income >= 0

Removing redundant rules

rules <- validator(
  rule1 = age > 12,
  rule2 = age > 18
)

# rule1 is superfluous
remove_redundancy(rules, verbose=TRUE)
#> Removed redundant rule(s):
#>   rule1: age > 12
#> Object of class 'validator' with 1 elements:
#>  rule2: age > 18

rules <- validator(
  rule1 = age > 12,
  rule2 = age > 12
)

# standout: rule1 and rule2, first rule wins
remove_redundancy(rules, verbose=TRUE)
#> Removed redundant rule(s):
#>   rule2: age > 12
#> Object of class 'validator' with 1 elements:
#>  rule1: age > 12

# Note that detection signifies both rules!
detect_redundancy(rules, verbose=TRUE)
#> Redundant rule(s):
#>   rule1: age > 12
#>   rule2: age > 12
#> rule1 rule2 
#>  TRUE  TRUE