## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ## ----setup-------------------------------------------------------------------- library(syntheticdata) ## ----real-data---------------------------------------------------------------- set.seed(42) real <- data.frame( age = rnorm(500, mean = 65, sd = 12), sbp = rnorm(500, mean = 135, sd = 22), sex = sample(c("Male", "Female"), 500, replace = TRUE), smoking = sample(c("Never", "Former", "Current"), 500, replace = TRUE, prob = c(0.4, 0.35, 0.25)), outcome = rbinom(500, 1, 0.28) ) head(real) ## ----synthesize--------------------------------------------------------------- syn <- synthesize(real, method = "parametric", n = 500, seed = 1) syn ## ----validate----------------------------------------------------------------- val <- validate_synthetic(syn) val ## ----compare------------------------------------------------------------------ comp <- compare_methods(real, seed = 1) comp ## ----privacy------------------------------------------------------------------ pr <- privacy_risk(syn, sensitive_cols = "age") pr ## ----fidelity----------------------------------------------------------------- mf <- model_fidelity(syn, outcome = "outcome") mf ## ----tradeoff----------------------------------------------------------------- results <- list() for (nl in c(0.05, 0.1, 0.2, 0.5)) { s <- synthesize(real, method = "noise", noise_level = nl, seed = 1) v <- validate_synthetic(s) results <- c(results, list(data.frame( noise_level = nl, ks = v$value[v$metric == "ks_statistic_mean"], privacy = v$value[v$metric == "nn_distance_ratio"] ))) } do.call(rbind, results)