library(here)
library(zeallot)
source(file.path(here(), "src/utils/utils.R"))

Generte categorical variables

Generate categorical variables based on conditional probabilities

Generate two categorical variables based on conditional distribution of the two categorical variables

tab <- matrix(c(5, 4, 2, 5, 6, 8), ncol = 2) / 30
dimnames(tab) <- list(c("A", "B", "C"), c("X", "Y"))
print(tab)
           X         Y
A 0.16666667 0.1666667
B 0.13333333 0.2000000
C 0.06666667 0.2666667
c(varA, varB) %<-% gen_2catBy2cat(tab, 100)
prop.table(table(varA, varB))
    varB
varA    X    Y
   A 0.19 0.20
   B 0.08 0.18
   C 0.07 0.28

Genrate a set of variables based on correlation structure

# 1. generate correlated uniform vars
df <- gen_gauss_cop(c(0.0, 0.2, 0.5), 1000)
cor(df)
           [,1]       [,2]        [,3]        [,4]
[1,] 1.00000000 0.02917351  0.18951054  0.48828247
[2,] 0.02917351 1.00000000  0.00436373  0.21103953
[3,] 0.18951054 0.00436373  1.00000000 -0.03408774
[4,] 0.48828247 0.21103953 -0.03408774  1.00000000
par(mfrow = c(2, 2))
invisible(apply(df, 2, function(x) hist(x)))

# 2. generate genotype from the correlated mtx with MAF
afs <- c(0.05, 0.1, 0.2, 0.5)
G <- sapply(seq_len(ncol(df)), function(i) {
  unif2genotype(df[, i], afs[i])
}) %>%
  data.frame() %>%
  setNames(paste0("G", 1:ncol(df)))
summary(G)
       G1              G2             G3              G4       
 Min.   :1.000   Min.   :0.00   Min.   :0.000   Min.   :0.000  
 1st Qu.:2.000   1st Qu.:2.00   1st Qu.:1.000   1st Qu.:1.000  
 Median :2.000   Median :2.00   Median :2.000   Median :1.000  
 Mean   :1.908   Mean   :1.79   Mean   :1.613   Mean   :1.027  
 3rd Qu.:2.000   3rd Qu.:2.00   3rd Qu.:2.000   3rd Qu.:2.000  
 Max.   :2.000   Max.   :2.00   Max.   :2.000   Max.   :2.000  
cat_cor(G)
          G1         G2         G3         G4
G1 1.0000000 0.16997444 0.11024363 0.43385115
G2 0.1699744 1.00000000 0.04361235 0.22121256
G3 0.1102436 0.04361235 1.00000000 0.03434121
G4 0.4338512 0.22121256 0.03434121 1.00000000

Generatenumeric data

Session Info

sessionInfo()
R version 4.1.0 (2021-05-18)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.2 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8        LC_COLLATE=C.UTF-8    
 [5] LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8    LC_PAPER=C.UTF-8       LC_NAME=C             
 [9] LC_ADDRESS=C           LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] zeallot_0.1.0  here_1.0.1     lmerTest_3.1-3 lme4_1.1-27.1  Matrix_1.5-3   jtools_2.2.0  
 [7] ggpubr_0.5.0   ggplot2_3.4.0  dplyr_1.0.10   knitr_1.39    

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.7          lattice_0.20-44     tidyr_1.2.1         assertthat_0.2.1   
 [5] rprojroot_2.0.2     digest_0.6.27       utf8_1.2.2          R6_2.5.1           
 [9] cellranger_1.1.0    backports_1.2.1     evaluate_0.15       highr_0.9          
[13] pillar_1.6.2        rlang_1.0.6         curl_4.3.2          readxl_1.3.1       
[17] rstudioapi_0.13     minqa_1.2.4         data.table_1.14.0   jquerylib_0.1.4    
[21] car_3.0-11          nloptr_1.2.2.2      rmarkdown_2.18      splines_4.1.0      
[25] stringr_1.5.0       foreign_0.8-81      pander_0.6.4        munsell_0.5.0      
[29] broom_1.0.0         compiler_4.1.0      numDeriv_2016.8-1.1 xfun_0.31          
[33] pkgconfig_2.0.3     htmltools_0.5.3     tidyselect_1.1.1    tibble_3.1.6       
[37] docthis_0.1.1       rio_0.5.27          fansi_0.5.0         crayon_1.4.1       
[41] withr_2.5.0         MASS_7.3-54         grid_4.1.0          nlme_3.1-152       
[45] jsonlite_1.8.4      gtable_0.3.0        lifecycle_1.0.3     DBI_1.1.1          
[49] magrittr_2.0.3      scales_1.2.1        zip_2.2.0           cli_3.4.1          
[53] stringi_1.7.8       carData_3.0-4       ggsignif_0.6.2      bslib_0.2.5.1      
[57] ellipsis_0.3.2      generics_0.1.0      vctrs_0.5.1         boot_1.3-28        
[61] openxlsx_4.2.4      tools_4.1.0         forcats_0.5.1       glue_1.6.2         
[65] purrr_0.3.4         hms_1.1.0           yaml_2.2.1          fastmap_1.1.0      
[69] abind_1.4-5         colorspace_2.0-2    rstatix_0.7.1.999   haven_2.4.1        
[73] sass_0.4.0         
# Markdown --------------------------------------------------------
# rmarkdown::render('docs/demo.R', output_dir = 'docs')