Skip to contents

See the number of valid and invalid checks either by rule or by record.

Usage

# S3 method for tbl_validation
aggregate(x, by = c("rule", "record", "key"), ...)

Arguments

x

tbl_validation() object

by

either by "rule" or by "record"

...

not used

Value

A dbplyr::tbl_dbi() object that represents the aggregation query (to be executed) on the database.

Details

The result of a confront() on a db tbl results in a lazy squery. That is it builds a query without executing it. To store the result in the database use compute() or values().

Examples

income <- data.frame(id = 1:2, age=c(12,35), salary = c(1000,NA))
con <- dbplyr::src_memdb()
tbl_income <- dplyr::copy_to(con, income, overwrite=TRUE)
print(tbl_income)
#> # Source:   table<income> [?? x 3]
#> # Database: sqlite 3.38.5 [:memory:]
#>      id   age salary
#>   <int> <dbl>  <dbl>
#> 1     1    12   1000
#> 2     2    35     NA

# Let's define a rule set and confront the table with it:
rules <- validator( is_adult   = age >= 18
                    , has_income = salary > 0
)

# and confront!
# in general with a db table it is handy to use a key
cf <- confront(tbl_income, rules, key="id")
aggregate(cf, by = "rule")
#> # Source:   lazy query [?? x 7]
#> # Database: sqlite 3.38.5 [:memory:]
#>   rule       npass nfail   nNA rel.pass rel.fail rel.NA
#>   <chr>      <int> <int> <int> <lgl>       <dbl>  <dbl>
#> 1 is_adult       1     1     0 NA            0.5    0  
#> 2 has_income     1     0     1 NA            0      0.5
aggregate(cf, by = "record")
#> # Source:   lazy query [?? x 3]
#> # Database: sqlite 3.38.5 [:memory:]
#>      id nfails   nNA
#>   <int>  <int> <int>
#> 1     1      1     0
#> 2     2      0     1

# to tweak performance of the db query the following options are available
# 1) store validation result in db
cf <- confront(tbl_income, rules, key="id", compute = TRUE)
# or identical
cf <- confront(tbl_income, rules, key="id")
cf <- compute(cf)

# 2) Store the validation sparsely
cf_sparse <- confront(tbl_income, rules, key="id", sparse=TRUE )

show_query(cf_sparse)
#> <SQL>
#> SELECT `id`, 'is_adult' AS `rule`, 1 AS `fail`
#> FROM (SELECT `id`, `age`
#> FROM `income`)
#> WHERE (`age` - 18.0 < -1e-08)
#> UNION ALL
#> SELECT `id`, 'is_adult' AS `rule`, NULL AS `fail`
#> FROM (SELECT `id`, `age`
#> FROM `income`)
#> WHERE (((`age`) IS NULL))
#> UNION ALL
#> SELECT `id`, 'has_income' AS `rule`, 1 AS `fail`
#> FROM (SELECT `id`, `salary`
#> FROM `income`)
#> WHERE (`salary` <= 0.0)
#> UNION ALL
#> SELECT `id`, 'has_income' AS `rule`, NULL AS `fail`
#> FROM (SELECT `id`, `salary`
#> FROM `income`)
#> WHERE (((`salary`) IS NULL))
values(cf_sparse, type="tbl")
#> # Source:   lazy query [?? x 3]
#> # Database: sqlite 3.38.5 [:memory:]
#>      id rule        fail
#>   <int> <chr>      <int>
#> 1     1 is_adult       1
#> 2     2 has_income    NA