library(here)
library(zeallot)
source(file.path(here(), "src/utils/utils.R"))
Generate two categorical variables based on conditional distribution of the two categorical variables
tab <- matrix(c(5, 4, 2, 5, 6, 8), ncol = 2) / 30
dimnames(tab) <- list(c("A", "B", "C"), c("X", "Y"))
print(tab)
X Y
A 0.16666667 0.1666667
B 0.13333333 0.2000000
C 0.06666667 0.2666667
c(varA, varB) %<-% gen_2catBy2cat(tab, 100)
prop.table(table(varA, varB))
varB
varA X Y
A 0.19 0.20
B 0.08 0.18
C 0.07 0.28
Genrate a set of variables based on correlation structure
# 1. generate correlated uniform vars
df <- gen_gauss_cop(c(0.0, 0.2, 0.5), 1000)
cor(df)
[,1] [,2] [,3] [,4]
[1,] 1.00000000 0.02917351 0.18951054 0.48828247
[2,] 0.02917351 1.00000000 0.00436373 0.21103953
[3,] 0.18951054 0.00436373 1.00000000 -0.03408774
[4,] 0.48828247 0.21103953 -0.03408774 1.00000000
par(mfrow = c(2, 2))
invisible(apply(df, 2, function(x) hist(x)))
# 2. generate genotype from the correlated mtx with MAF
afs <- c(0.05, 0.1, 0.2, 0.5)
G <- sapply(seq_len(ncol(df)), function(i) {
unif2genotype(df[, i], afs[i])
}) %>%
data.frame() %>%
setNames(paste0("G", 1:ncol(df)))
summary(G)
G1 G2 G3 G4
Min. :1.000 Min. :0.00 Min. :0.000 Min. :0.000
1st Qu.:2.000 1st Qu.:2.00 1st Qu.:1.000 1st Qu.:1.000
Median :2.000 Median :2.00 Median :2.000 Median :1.000
Mean :1.908 Mean :1.79 Mean :1.613 Mean :1.027
3rd Qu.:2.000 3rd Qu.:2.00 3rd Qu.:2.000 3rd Qu.:2.000
Max. :2.000 Max. :2.00 Max. :2.000 Max. :2.000
cat_cor(G)
G1 G2 G3 G4
G1 1.0000000 0.16997444 0.11024363 0.43385115
G2 0.1699744 1.00000000 0.04361235 0.22121256
G3 0.1102436 0.04361235 1.00000000 0.03434121
G4 0.4338512 0.22121256 0.03434121 1.00000000
Session Info
sessionInfo()
R version 4.1.0 (2021-05-18)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.2 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8 LC_COLLATE=C.UTF-8
[5] LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8 LC_PAPER=C.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] zeallot_0.1.0 here_1.0.1 lmerTest_3.1-3 lme4_1.1-27.1 Matrix_1.5-3 jtools_2.2.0
[7] ggpubr_0.5.0 ggplot2_3.4.0 dplyr_1.0.10 knitr_1.39
loaded via a namespace (and not attached):
[1] Rcpp_1.0.7 lattice_0.20-44 tidyr_1.2.1 assertthat_0.2.1
[5] rprojroot_2.0.2 digest_0.6.27 utf8_1.2.2 R6_2.5.1
[9] cellranger_1.1.0 backports_1.2.1 evaluate_0.15 highr_0.9
[13] pillar_1.6.2 rlang_1.0.6 curl_4.3.2 readxl_1.3.1
[17] rstudioapi_0.13 minqa_1.2.4 data.table_1.14.0 jquerylib_0.1.4
[21] car_3.0-11 nloptr_1.2.2.2 rmarkdown_2.18 splines_4.1.0
[25] stringr_1.5.0 foreign_0.8-81 pander_0.6.4 munsell_0.5.0
[29] broom_1.0.0 compiler_4.1.0 numDeriv_2016.8-1.1 xfun_0.31
[33] pkgconfig_2.0.3 htmltools_0.5.3 tidyselect_1.1.1 tibble_3.1.6
[37] docthis_0.1.1 rio_0.5.27 fansi_0.5.0 crayon_1.4.1
[41] withr_2.5.0 MASS_7.3-54 grid_4.1.0 nlme_3.1-152
[45] jsonlite_1.8.4 gtable_0.3.0 lifecycle_1.0.3 DBI_1.1.1
[49] magrittr_2.0.3 scales_1.2.1 zip_2.2.0 cli_3.4.1
[53] stringi_1.7.8 carData_3.0-4 ggsignif_0.6.2 bslib_0.2.5.1
[57] ellipsis_0.3.2 generics_0.1.0 vctrs_0.5.1 boot_1.3-28
[61] openxlsx_4.2.4 tools_4.1.0 forcats_0.5.1 glue_1.6.2
[65] purrr_0.3.4 hms_1.1.0 yaml_2.2.1 fastmap_1.1.0
[69] abind_1.4-5 colorspace_2.0-2 rstatix_0.7.1.999 haven_2.4.1
[73] sass_0.4.0
# Markdown --------------------------------------------------------
# rmarkdown::render('docs/demo.R', output_dir = 'docs')