## ---- echo = FALSE, message = FALSE--------------------------------------
library(simstudy)
library(ggplot2)
library(scales)
library(grid)
library(gridExtra)
library(survival)
library(knitr)
library(gee)

set.seed(33333)

opts_chunk$set(tidy.opts=list(width.cutoff=75), tidy=TRUE)

plotcolors <- c("#B84226", "#1B8445", "#1C5974")

cbbPalette <- c("#B84226","#B88F26", "#A5B435", "#1B8446",
                "#B87326","#B8A526", "#6CA723", "#1C5974") 

ggtheme <- function(panelback = "white") {
  
  ggplot2::theme(
    panel.background = element_rect(fill = panelback),
    panel.grid = element_blank(),
    axis.ticks =  element_line(colour = "black"),
    panel.spacing =unit(0.25, "lines"),  # requires package grid
    panel.border = element_rect(fill = NA, colour="gray90"), 
    plot.title = element_text(size = 8,vjust=.5,hjust=0),
    axis.text = element_text(size=8),
    axis.title = element_text(size = 8)
  )  
  
}

splotfunc <- function(dt, ptitle) {

  dtplot <- dt[,.N,keyby=.(male, over65, rxGrp)][, .(rxGrp, grp = male * 2 + over65 * 1, N)]
  ggplot(dtplot, aes(factor(grp), N)) +
    geom_bar(aes(fill = factor(rxGrp)), alpha=.8, position = "dodge", stat="identity") +
    scale_fill_manual(values = plotcolors) +
    ggtitle(ptitle) +
    theme(legend.position = "none") +
    ggtheme() +
    xlab("Strata") +
    ylim(0,80)
}

aplotfunc <- function(dt, ptitle) {

  dtplot <- dt[,.N,keyby=.(rxGrp)]
  ggplot(dtplot, aes(factor(rxGrp), N)) +
    geom_bar(aes(fill = factor(rxGrp)), alpha=.8, position="dodge", stat="identity", width=.5) +
    scale_fill_manual(values = plotcolors) +
    ggtitle(ptitle) +
    theme(legend.position = "none") +
    ggtheme() +
    xlab("Treatment group") +
    ylim(0,150)
}

ggmissing <- function(dtPlot,varSelect=NULL,varLevel=NULL, idvar = "id",
                      periodvar = "period", missvar,
                      pcolor="#738e75", title = NULL) {

  dtP <- copy(dtPlot)

  if (! is.null(varSelect)) dtP <- dtP[eval(parse(text=varSelect)) == varLevel]

  xp <- ggplot(data=dtP, aes(y = factor(eval(parse(text=idvar))),
                             x = eval(parse(text=periodvar)))) +
    geom_tile(aes(fill=factor(eval(parse(text=missvar)))),
                  color="white") +
    ggtheme()+
    theme(axis.text=element_blank(),
          axis.ticks=element_blank(),
          axis.title=element_blank(),
          legend.position="none",
          plot.title=element_text(size=8)
    ) +
    scale_fill_manual(values=c("grey80",pcolor))

  if (is.null(title)) {
    return(xp)
  } else {
    return(xp + ggtitle(title))
  }
}

ggsurv_m <- function(
  s,
  CI         = 'def',
  plot.cens  = TRUE,
  surv.col   = 'gg.def',
  cens.col   = 'gg.def',
  lty.est    = 1,
  lty.ci     = 2,
  cens.shape = 3,
  back.white = FALSE,
  xlab       = 'Time',
  ylab       = 'Survival',
  main       = '',
  strata     = length(s$strata),
  labels     = NULL
) {
  
  s <- fit
  
  n <- s$strata
  
  strataEqualNames <- unlist(strsplit(names(s$strata), '='))
  groups <- factor(
    strataEqualNames[seq(2, 2 * strata, by = 2)],
    levels = strataEqualNames[seq(2, 2 * strata, by = 2)]
  )
  
  gr.name <-  strataEqualNames[1]
  gr.df   <- vector('list', strata)
  n.ind   <- cumsum(c(0,n))
  
  for (i in 1:strata) {
    indI <- (n.ind[i]+1):n.ind[i+1]
    gr.df[[i]] <- data.frame(
      time  = c(0, s$time[ indI ]),
      surv  = c(1, s$surv[ indI ]),
      up    = c(1, s$upper[ indI ]),
      low   = c(1, s$lower[ indI ]),
      cens  = c(0, s$n.censor[ indI ]),
      group = rep(groups[i], n[i] + 1)
    )
  }
  
  dat      <- do.call(rbind, gr.df)
  dat.cens <- subset(dat, cens != 0)
  
  pl <- ggplot(dat, aes(x = time, y = surv, group = group)) +
    geom_step(aes(col = group, lty = group)) +
    xlab(xlab) +
    ylab(ylab) +
    ggtitle(main)
  
  pl <- if(surv.col[1] != 'gg.def'){
    scaleValues <- if (length(surv.col) == 1) {
      rep(surv.col, strata)
    } else{
      surv.col
    }
    pl + scale_colour_manual(name = gr.name, values = scaleValues, labels=labels)
    
  } else {
    pl + scale_colour_discrete(name = gr.name, labels=labels)
  }
  
  lineScaleValues <- if (length(lty.est) == 1) {
    rep(lty.est, strata)
  } else {
    lty.est
  }
  pl <- pl + scale_linetype_manual(name = gr.name, values = lineScaleValues)

  if(identical(CI,TRUE)) {
    if(length(surv.col) > 1 || length(lty.est) > 1){
      stop('Either surv.col or lty.est should be of length 1 in order to plot 95% CI with multiple strata')
    }

    stepLty <- if ((length(surv.col) > 1 | surv.col == 'gg.def')[1]) {
      lty.ci
    } else {
      surv.col
    }
    pl <- pl +
      geom_step(aes(y = up, lty = group), lty = stepLty) +
      geom_step(aes(y = low,lty = group), lty = stepLty)
  }

  if (identical(plot.cens, TRUE) ){
    if (nrow(dat.cens) == 0) {
      stop('There are no censored observations')
    }
    if (length(cens.col) == 1) {
      col <- ifelse(cens.col == 'gg.def', 'red', cens.col)
      pl <- pl + geom_point(
        data    = dat.cens,
        mapping = aes(y = surv),
        shape   = cens.shape,
        col     = col
      )

    } else if (length(cens.col) > 0) {
    # if(!(identical(cens.col,surv.col) || is.null(cens.col))) {
      #   warning ("Color scales for survival curves and censored points don't match.\nOnly one color scale can be used. Defaulting to surv.col")
      # }


      if (! identical(cens.col, "gg.def")) {
        if (length(cens.col) != strata) {
          warning("Color scales for censored points don't match the number of groups. Defaulting to ggplot2 default color scale")
          cens.col <- "gg.def"
        }
      }

      if (identical(cens.col, "gg.def")) {
        pl <- pl + geom_point(
          data = dat.cens,
          mapping = aes(y=surv, col = group),
          shape = cens.shape,
          show.legend = FALSE
        )
      } else {

        uniqueGroupVals = unique(dat.cens$group)
        if (length(cens.shape) == 1) {
          cens.shape = rep(cens.shape, strata)
        }

        if (length(cens.shape) != strata) {
          warning("The length of the censored shapes does not match the number of groups (or 1). Defaulting shape = 3 (+)")
          cens.shape = rep(3, strata)
        }
        for (i in seq_along(uniqueGroupVals)) {
          groupVal = uniqueGroupVals[i]
          dtGroup <- subset(dat.cens, group == groupVal)

          pl <- pl + geom_point(
            data = dtGroup,
            mapping = aes(y=surv),
            color = I(cens.col[i]),
            shape = cens.shape[i],
            show.legend = FALSE
          )

        }
      }

    }
  }

  if(identical(back.white, TRUE)) {
    pl <- pl + theme_bw()
  }
  
  pl
}


## ----  echo=FALSE--------------------------------------------------------
def <- defData(varname = "nr", dist = "nonrandom", formula=7, id = "idnum")
def <- defData(def,varname="x1", dist="uniform", formula="10;20")
def <- defData(def,varname="y1", formula="nr + x1 * 2", variance=8)
def <- defData(def,varname="y2", dist="poisson", formula="nr - 0.2 * x1",link="log")
def <- defData(def,varname="xCat",formula = "0.3;0.2;0.5", dist="categorical")
def <- defData(def,varname="g1", dist="gamma", formula = "5+xCat", variance = 1, link = "log")
def <- defData(def, varname = "a1", dist = "binary" , formula="-3 + xCat", link="logit")

knitr::kable(def)

## ---- tidy = TRUE--------------------------------------------------------
def <- defData(varname = "nr", dist = "nonrandom", formula=7, id = "idnum")
def <- defData(def,varname="x1",dist="uniform",formula="10;20")
def <- defData(def,varname="y1",formula="nr + x1 * 2",variance=8)
def <- defData(def,varname="y2",dist="poisson",formula="nr - 0.2 * x1",link="log")
def <- defData(def,varname="xCat",formula = "0.3;0.2;0.5",dist="categorical")
def <- defData(def,varname="g1", dist="gamma", formula = "5+xCat", variance = 1, link = "log")
def <- defData(def, varname = "a1", dist = "binary" , formula="-3 + xCat", link="logit")

## ---- tidy = TRUE--------------------------------------------------------
dt <- genData(1000, def)
dt

## ---- tidy = TRUE--------------------------------------------------------
addef <- defDataAdd(varname = "zExtra", dist = "normal", formula = '3 + y1', 
                 variance = 2)

dt <- addColumns(addef, dt)
dt

## ---- tidy = TRUE--------------------------------------------------------
def <- defData(varname = "male", dist = "binary", formula = .5 , id="cid")
def <- defData(def, varname = "over65", dist = "binary", formula = "-1.7 + .8*male", link="logit")
def <- defData(def, varname = "baseDBP", dist = "normal", formula = 70, variance = 40)

dtstudy <- genData(330, def)

## ---- tidy = TRUE--------------------------------------------------------
study1 <- trtAssign(dtstudy , n=3, balanced = TRUE, strata = c("male","over65"), grpName = "rxGrp")

study1

## ---- tidy = TRUE--------------------------------------------------------
study2 <- trtAssign(dtstudy , n=3, balanced = TRUE, grpName = "rxGrp")

## ---- tidy = TRUE--------------------------------------------------------
study3 <- trtAssign(dtstudy , n=3, balanced = FALSE, grpName = "rxGrp")

## ---- tidy = TRUE, echo = FALSE, fig.width = 4, fig.height = 6-----------
p1 <- splotfunc(study1, "Balanced within strata")
p1a <- aplotfunc(study1, "")

p2 <- splotfunc(study2, "Balanced without strata")
p2a <- aplotfunc(study2, "")

p3 <- splotfunc(study3, "Random allocation")
p3a <- aplotfunc(study3, "")

grid.arrange(p1, p1a, p2, p2a, p3, p3a, ncol=2)

## ---- tidy = TRUE--------------------------------------------------------
formula1 <- c("-2 + 2*male - .5*over65", "-1 + 2*male + .5*over65")
dtExp <- trtObserve(dtstudy, formulas = formula1, logit.link = TRUE, grpName = "exposure")

## ---- tidy = TRUE, echo = FALSE, fig.width = 6.5, fig.height = 2.5-------
dtplot1 <- dtExp[,.N,keyby=.(male,exposure)]
p1 <- ggplot(data = dtplot1, aes(x=factor(male), y=N)) +
  geom_bar(aes(fill=factor(exposure)), alpha = .8, stat="identity", position = "dodge") +
  ggtheme() +
  theme(axis.title.x = element_blank()) +
  theme(legend.title = element_text(size = 8)) +
  ylim(0, 150) +
  scale_fill_manual(values = plotcolors, name = "Exposure") +
  scale_x_discrete(breaks = c(0, 1), labels = c("Female", "Male")) +
  ylab("Number exposed")+
  ggtitle("Gender")

dtplot2 <- dtExp[,.N,keyby=.(over65,exposure)]
p2 <- ggplot(data = dtplot2, aes(x=factor(over65), y=N)) +
  geom_bar(aes(fill=factor(exposure)), alpha = .8, stat="identity", position = "dodge") +
  ggtheme() +
  theme(axis.title.x = element_blank()) +
  theme(legend.title = element_text(size = 8)) +
  ylim(0, 150) +
  scale_fill_manual(values = plotcolors, name = "Exposure") +
  scale_x_discrete(breaks = c(0, 1), labels = c("65 or younger", "Over 65")) +
  ylab("Number exposed") +
  ggtitle("Age")

grid.arrange(p1,p2,nrow=1)


## ---- tidy = TRUE--------------------------------------------------------
formula2 <- c(.35, .45)

dtExp2 <- trtObserve(dtstudy, formulas = formula2, logit.link = FALSE, grpName = "exposure")

## ---- tidy = TRUE, echo = FALSE, fig.width = 6.5, fig.height = 2.5-------
dtplot1a <- dtExp2[,.N,keyby=.(male,exposure)]
p1a <- ggplot(data = dtplot1a, aes(x=factor(male), y=N)) +
  geom_bar(aes(fill=factor(exposure)), alpha = .8, stat="identity", position = "dodge") +
  ggtheme() +
  theme(axis.title.x = element_blank()) +
  theme(legend.title = element_text(size = 8)) +
  ylim(0, 150) +
  scale_fill_manual(values = plotcolors, name = "Exposure") +
  scale_x_discrete(breaks = c(0, 1), labels = c("Female", "Male")) +
  ylab("Number exposed")+
  ggtitle("Gender")

dtplot2a <- dtExp2[,.N,keyby=.(over65,exposure)]
p2a <- ggplot(data = dtplot2a, aes(x=factor(over65), y=N)) +
  geom_bar(aes(fill=factor(exposure)), alpha = .8, stat="identity", position = "dodge") +
  ggtheme() +
  theme(axis.title.x = element_blank()) +
  theme(legend.title = element_text(size = 8)) +
  ylim(0, 150) +
  scale_fill_manual(values = plotcolors, name = "Exposure") +
  scale_x_discrete(breaks = c(0, 1), labels = c("65 or younger", "Over 65")) +
  ylab("Number exposed") +
  ggtitle("Age")

grid.arrange(p1a,p2a,nrow=1)

## ---- tidy = TRUE--------------------------------------------------------

# Baseline data definitions

def <- defData(varname = "x1", formula = .5, dist = "binary")
def <- defData(def,varname = "x2", formula = .5, dist = "binary")
def <- defData(def,varname = "grp", formula = .5, dist = "binary")

# Survival data definitions

sdef <- defSurv(varname = "survTime", formula = "1.5*x1", scale = "grp*50 + (1-grp)*25", shape = "grp*1 + (1-grp)*1.5")
sdef <- defSurv(sdef, varname = "censorTime", scale = 80, shape = 1)

sdef


## ---- tidy = TRUE--------------------------------------------------------

# Baseline data definitions

dtSurv <- genData(300, def)
dtSurv <- genSurv(dtSurv, sdef)

head(dtSurv)

# A comparison of survival by group and x1

dtSurv[,round(mean(survTime),1), keyby = .(grp,x1)]


## ---- tidy = TRUE--------------------------------------------------------

cdef <- defDataAdd(varname = "obsTime", formula = "pmin(survTime, censorTime)", dist="nonrandom")
cdef <- defDataAdd(cdef, varname = "status", formula = "I(survTime <= censorTime)",dist="nonrandom")

dtSurv <- addColumns(cdef, dtSurv)

head(dtSurv)

# estimate proportion of censoring by x1 and group

dtSurv[,round(1-mean(status),2), keyby = .(grp,x1)]

## ---- tidy = TRUE, echo = FALSE, fig.width = 6.5, fig.height = 3.5-------
fit <- survfit(Surv(obsTime, status) ~ x1+grp, data=dtSurv)
# ggsurvplot(fit, palette = cbbPalette, font.tickslab = c(8), font.x = 10, font.y = 10,
#            legend = c(0.8, 0.8))

ggsurv_m(fit, cens.col = "grey50", surv.col = cbbPalette, 
         labels = c("grp=0 & x1=0","grp=1 & x1=0","grp=0 & x1=1","grp=1 & x1=1")) +
  ggplot2::guides(linetype = FALSE) +
  ggtheme("grey95") +
  theme(legend.position=c(.8,.8), 
        legend.title = element_blank(),
        legend.key = element_rect(fill="grey95" , color = "grey95"),
        legend.background = element_rect(fill="grey95"),
        legend.key.width = unit(1, "cm")) +
  guides(colour = guide_legend(override.aes = list(size=1)))


## ---- tidy = TRUE--------------------------------------------------------
tdef <- defData(varname = "T", dist="binary", formula = 0.5)
tdef <- defData(tdef, varname = "Y0", dist = "normal", formula = 10, variance = 1)
tdef <- defData(tdef, varname = "Y1", dist = "normal", formula = "Y0 + 5 + 5 * T", variance = 1)
tdef <- defData(tdef, varname = "Y2", dist = "normal", formula = "Y0 + 10 + 5 * T", variance = 1)

dtTrial <- genData( 500, tdef)
dtTrial

## ---- tidy = TRUE--------------------------------------------------------
dtTime <- addPeriods(dtTrial, nPeriods = 3, idvars = "id", timevars = c("Y0", "Y1", "Y2"), timevarName = "Y")
dtTime

## ---- tidy = TRUE, echo = FALSE, fig.width = 6, fig.height = 3-----------

avg <- dtTime[,.(Y=mean(Y)), keyby = .(T, period)]

ggplot(data = dtTime, aes(x = factor(period), y = Y)) +
  geom_jitter(aes(color=factor(T)), size = .5, alpha = .8, width = .25) +
  geom_line(data=avg, aes(x = factor(period), y = Y, group = T, color= factor(T)), size=1) +
  xlab("Period") +
  scale_color_manual(values = plotcolors[c(3,1)], 
                     labels = c("Ctrl", "Trt")) +
  theme(legend.title=element_blank()) +
  ggtheme("grey90") +
  theme(legend.key=element_rect(fill=NA))

## ---- tidy = TRUE--------------------------------------------------------
def <- defData(varname = "xbase", dist = "normal", formula = 20, variance = 3)
def <- defData(def,varname = "nCount", dist = "noZeroPoisson", formula = 6)
def <- defData(def, varname = "mInterval", dist = "gamma", formula = 30, variance = .01)
def <- defData(def, varname = "vInterval", dist = "nonrandom", formula = .07)

dt <- genData(200, def)
dt[id %in% c(8,121)]                # View individuals 8 and 121

## ---- tidy = TRUE--------------------------------------------------------
dtPeriod <- addPeriods(dt)
dtPeriod[id %in% c(8,121)]  # View individuals 8 and 121 only

## ---- tidy = TRUE--------------------------------------------------------
def2 <- defDataAdd(varname = "Y", dist = "normal", formula = "15 + .1 * time", variance = 5)
dtPeriod <- addColumns(def2, dtPeriod)

## ---- tidy = TRUE, echo = FALSE, fig.width = 6, fig.height = 3-----------

sampledID <- sample(1:nrow(dt), 5)
dtSample <- dtPeriod[id %in% sampledID]

ggplot(data = dtSample, aes(x = time, y = Y, group=id)) +
  geom_point(aes(color = factor(id))) +
  geom_line(aes(color = factor(id))) +
  xlab("Day") +
  scale_color_manual(values = cbbPalette) +
  theme(legend.position = "none") +
  ggtheme("grey90")

## ---- tidy = TRUE--------------------------------------------------------
gen.school <- defData(varname="s0", dist = "normal", 
                      formula = 0, variance = 3, id = "idSchool"
)
gen.school <- defData(gen.school, varname = "nClasses", 
                      dist = "noZeroPoisson", formula = 3
)

dtSchool <- genData(8, gen.school)
dtSchool <- trtAssign(dtSchool, n = 2)

dtSchool


## ---- tidy = TRUE--------------------------------------------------------
gen.class <- defDataAdd(varname = "c0", dist = "normal", formula = 0, 
                     variance = 2)
gen.class <- defDataAdd(gen.class, varname = "nStudents", dist = "noZeroPoisson", formula = 20
)

dtClass <- genCluster(dtSchool, "idSchool", numIndsVar = "nClasses",level1ID = "idClass")
dtClass <- addColumns(gen.class, dtClass)

head(dtClass, 10)

## ---- tidy = TRUE, tidy.opts= list(width.cutoff = 60)--------------------
gen.student <- defDataAdd(varname="Male", dist="binary", formula=0.5)
gen.student <- defDataAdd(gen.student, varname="age", dist = "uniform", formula="9.5; 10.5")
gen.student <- defDataAdd(gen.student, varname="test", dist = "normal",
                       formula = "50 - 5*Male + s0 + c0 + 8 * trtGrp",                           variance = 2)
dtStudent <- genCluster(dtClass,cLevelVar="idClass", numIndsVar = "nStudents",                        level1ID = "idChild")

dtStudent <- addColumns(gen.student, dtStudent)

## ---- tidy = TRUE, echo = FALSE, fig.width = 7, fig.height = 3-----------
ggplot(data=dtStudent,aes(x=factor(idClass),y=test,group=idClass)) +
  geom_boxplot(aes(color=factor(trtGrp), fill = factor(idSchool)))+
  xlab("Classes")+
  ylab("Test scores") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) +
  scale_fill_manual(values = cbbPalette, guide = FALSE) +
  scale_color_manual(values = c("grey80", "#000000"),
                    labels = c("Ctrl", "Rx"),
                    guide = guide_legend(title = NULL,
                                         override.aes = list(shape = 1,
                                                             keywidth=8
                                                             )
                                         )
                    ) +
  theme(legend.key=element_rect(fill=NA)) +
  ggtheme()

## ---- tidy=TRUE----------------------------------------------------------
# specifying a specific correlation matrix C
C <- matrix(c(1,.7,.2, .7, 1, .8, .2, .8, 1),nrow = 3)
C

# generate 3 correlated variables with different location and scale for each field
dt <- genCorData(1000, mu=c(4,12,3), sigma = c(1,2,3), corMatrix=C)
dt

# estimate correlation matrix
dt[,round(cor(cbind(V1, V2, V3)),1)]

# estimate standard deviation
dt[,round(sqrt(diag(var(cbind(V1, V2, V3)))),1)]

## ---- tidy=TRUE----------------------------------------------------------
# generate 3 correlated variables with different location but same standard deviation
# and compound symmetry (cs) correlation matrix with correlation coefficient = 0.4.
# Other correlation matrix structures are "independent" ("ind") and "auto-regressive" ("ar1").

dt <- genCorData(1000, mu=c(4,12,3), sigma = 3, rho = .4, corstr = "cs", 
                cnames=c("x0","x1","x2"))
dt

# estimate correlation matrix
dt[,round(cor(cbind(x0, x1, x2)),1)]

# estimate standard deviation
dt[,round(sqrt(diag(var(cbind(x0, x1, x2)))),1)]

## ---- tidy = TRUE--------------------------------------------------------

# define and generate the original data set
def <- defData(varname = "x", dist = "normal", formula = 0, variance = 1, id = "cid")
dt <- genData(1000, def)

# add new correlate fields a0 and a1 to "dt"
dt <- addCorData(dt, idname="cid", mu=c(0,0), sigma = c(2,.2), rho = -0.2, 
                 corstr = "cs", cnames=c("a0","a1"))

dt

# estimate correlation matrix
dt[,round(cor(cbind(a0, a1)),1)]

# estimate standard deviation
dt[,round(sqrt(diag(var(cbind(a0, a1)))),1)]

## ------------------------------------------------------------------------
l <- c(8, 10, 12) # lambda for each new variable

dx <- genCorGen(1000, nvars = 3, params1 = l, dist = "poisson", rho = .3, corstr = "cs", wide = TRUE)
dx
round(cor(as.matrix(dx[, .(V1, V2, V3)])), 2)

## ------------------------------------------------------------------------
genCorGen(1000, nvars = 3, params1 = c(.3, .5, .7), dist = "binary", rho = .8, corstr = "cs", wide = TRUE)

## ------------------------------------------------------------------------
dx <- genCorGen(1000, nvars = 3, params1 = l, params2 = c(1,1,1), dist = "gamma", rho = .7, corstr = "cs", wide = TRUE, cnames="a, b, c")
dx
round(cor(as.matrix(dx[, .(a, b, c)])), 2)

## ------------------------------------------------------------------------
dx <- genCorGen(1000, nvars = 3, params1 = l, params2 = c(1,1,1), dist = "gamma", rho = .7, corstr = "cs", wide = FALSE, cnames="NewCol")
dx

## ------------------------------------------------------------------------
def <- defData(varname = "xbase", formula = 5, variance = .2, dist = "gamma", id = "cid")
def <- defData(def, varname = "lambda", formula = ".5 + .1*xbase", dist="nonrandom", link = "log")
def <- defData(def, varname = "p", formula = "-2 + .3*xbase", dist="nonrandom", link = "logit")
def <- defData(def, varname = "gammaMu", formula = ".5 + .2*xbase", dist="nonrandom", link = "log")
def <- defData(def, varname = "gammaDis", formula = 1, dist="nonrandom")

dt <- genData(10000, def)
dt

## ------------------------------------------------------------------------

dtX1 <- addCorGen(dtOld = dt, idvar = "cid", nvars = 3, rho = .1, corstr = "cs",
                    dist = "poisson", param1 = "lambda", cnames = "a, b, c")
dtX1

## ------------------------------------------------------------------------
dtX2 <- addCorGen(dtOld = dt, idvar = "cid", nvars = 4, rho = .4, corstr = "ar1",
                    dist = "binary", param1 = "p")
dtX2

## ------------------------------------------------------------------------
dtX3 <- addCorGen(dtOld = dt, idvar = "cid", nvars = 4, rho = .4, corstr = "cs",
                  dist = "gamma", param1 = "gammaMu", param2 = "gammaDis")
dtX3

## ------------------------------------------------------------------------
def <- defData(varname = "xbase", formula = 5, variance = .4, dist = "gamma", id = "cid")
def <- defData(def, "nperiods", formula = 3, dist = "noZeroPoisson")

def2 <- defDataAdd(varname = "lambda", formula = ".5+.5*period + .1*xbase", dist="nonrandom", link = "log")

dt <- genData(1000, def)

dtLong <- addPeriods(dt, idvars = "cid", nPeriods = 3)
dtLong <- addColumns(def2, dtLong)

dtLong

### Generate the data 

dtX3 <- addCorGen(dtOld = dtLong, idvar = "cid", nvars = 3, rho = .6, corstr = "cs",
                  dist = "poisson", param1 = "lambda", cnames = "NewPois")
dtX3

## ------------------------------------------------------------------------
geefit <- gee(NewPois ~ period + xbase, data = dtX3, id = cid, family = poisson, corstr = "exchangeable")
round(summary(geefit)$working.correlation, 2)


## ---- tidy = TRUE--------------------------------------------------------
def1 <- defData(varname = "m", dist = "binary", formula = .5)
def1 <- defData(def1, "u", dist = "binary", formula = .5)
def1 <- defData(def1, "x1", dist="normal", formula = "20*m + 20*u", variance = 2)
def1 <- defData(def1, "x2", dist="normal", formula = "20*m + 20*u", variance = 2)
def1 <- defData(def1, "x3", dist="normal", formula = "20*m + 20*u", variance = 2)

dtAct <- genData(1000, def1)

## ---- tidy = TRUE--------------------------------------------------------
defM <- defMiss(varname = "x1", formula = .15, logit.link = FALSE)
defM <- defMiss(defM, varname = "x2", formula = ".05 + m * 0.25", logit.link = FALSE)
defM <- defMiss(defM, varname = "x3", formula = ".05 + u * 0.25", logit.link = FALSE)
defM <- defMiss(defM, varname = "u", formula = 1, logit.link = FALSE) # not observed

missMat <- genMiss(dtAct, defM, idvars = "id")
dtObs <- genObs(dtAct, missMat, idvars = "id")

## ---- tidy = TRUE--------------------------------------------------------
missMat
dtObs

## ----tidy=TRUE-----------------------------------------------------------
# Two functions to calculate means and compare them

rmean <- function(var, digits = 1) {
  round(mean(var, na.rm=TRUE), digits)
}

showDif <- function(dt1, dt2, rowName = c("Actual", "Observed", "Difference")) {
  dt <- data.frame(rbind(dt1, dt2, dt1 - dt2))
  rownames(dt) <- rowName
  return(dt)
}

# data.table functionality to estimate means for each data set

meanAct <- dtAct[,.(x1 = rmean(x1), x2 = rmean(x2), x3 = rmean(x3))]
meanObs <- dtObs[,.(x1 = rmean(x1), x2 = rmean(x2), x3 = rmean(x3))]

showDif(meanAct, meanObs)

## ----tidy=TRUE-----------------------------------------------------------
meanActm <- dtAct[,.(x1 = rmean(x1), x2 = rmean(x2), x3 = rmean(x3)), keyby = m]
meanObsm <- dtObs[,.(x1 = rmean(x1), x2 = rmean(x2), x3 = rmean(x3)), keyby = m]

## ---- tidy = TRUE--------------------------------------------------------
# compare observed and actual when m = 0

showDif(meanActm[m==0, .(x1, x2, x3)], meanObsm[m==0, .(x1, x2, x3)])

# compare observed and actual when m = 1

showDif(meanActm[m==1, .(x1, x2, x3)], meanObsm[m==1, .(x1, x2, x3)])

## ---- tidy = TRUE--------------------------------------------------------

# use baseline definitions from previous example

dtAct <- genData(120, def1)
dtAct <- trtObserve(dtAct, formulas = .5, logit.link = FALSE, grpName = "rx")

# add longitudinal data

defLong <- defDataAdd(varname = "y", dist = "normal", formula = "10 + period*2 + 2 * rx", variance = 2)

dtTime <- addPeriods(dtAct, nPeriods = 4)
dtTime <- addColumns(defLong, dtTime)

## ---- tidy = TRUE--------------------------------------------------------

# missingness for y is not monotonic

defMlong <- defMiss(varname = "x1", formula = .20, baseline = TRUE)
defMlong <- defMiss(defMlong,varname = "y", formula = "-1.5 - 1.5 * rx + .25*period", logit.link = TRUE, baseline = FALSE, monotonic = FALSE)

missMatLong <- genMiss(dtTime, defMlong, idvars = c("id","rx"), repeated = TRUE, periodvar = "period")

## ----tidy=TRUE, echo=FALSE, fig.width = 7, fig.height = 6----------------
xp10 <- ggmissing(missMatLong, varSelect="rx", varLevel = 0, idvar = "id",
                 periodvar = "period", missvar="x1", pcolor="#1C5974",
                 title = "x1: baseline (control)")

xp11 <- ggmissing(missMatLong, varSelect="rx", varLevel = 1, idvar = "id",
                 periodvar = "period", missvar="x1", pcolor="#B84226",
                 title = "x1: baseline (exposed)")

xp20 <- ggmissing(missMatLong, varSelect="rx", varLevel = 0, idvar = "id",
                 periodvar = "period", missvar="y", pcolor="#1C5974",
                 title = "y: not monotonic (control)")

xp21 <- ggmissing(missMatLong, varSelect="rx", varLevel = 1, idvar = "id",
                 periodvar = "period", missvar="y", pcolor="#B84226",
                 title = "y: not monotonic (exposed)")

grid.arrange(xp10, xp20, xp11, xp21,
             nrow = 2,
             bottom = textGrob("Periods", gp = gpar(cex=.8)) #,
#             left = textGrob("ID", gp = gpar(cex = .8), rot = 90)
)



## ----tidy=TRUE-----------------------------------------------------------
# missingness for y is not monotonic

defMlong <- defMiss(varname = "x1", formula = .20, baseline = TRUE)
defMlong <- defMiss(defMlong,varname = "y", formula = "-1.8 - 1.5 * rx + .25*period", logit.link = TRUE, baseline = FALSE, monotonic = TRUE)

missMatLong <- genMiss(dtTime, defMlong, idvars = c("id","rx"), repeated = TRUE, periodvar = "period")

## ----tidy=TRUE, echo=FALSE, fig.width = 7, fig.height = 6----------------
xp10 <- ggmissing(missMatLong, varSelect="rx", varLevel = 0, idvar = "id",
                 periodvar = "period", missvar="x1", pcolor="#1C5974",
                 title = "x1: baseline (control)")

xp11 <- ggmissing(missMatLong, varSelect="rx", varLevel = 1, idvar = "id",
                 periodvar = "period", missvar="x1", pcolor="#B84226",
                 title = "x1: baseline (exposed)")

xp20 <- ggmissing(missMatLong, varSelect="rx", varLevel = 0, idvar = "id",
                 periodvar = "period", missvar="y", pcolor="#1C5974",
                 title = "y: monotonic (control)")

xp21 <- ggmissing(missMatLong, varSelect="rx", varLevel = 1, idvar = "id",
                 periodvar = "period", missvar="y", pcolor="#B84226",
                 title = "y: monotonic (exposed)")

grid.arrange(xp10, xp20, xp11, xp21,
             nrow = 2,
             bottom = textGrob("Periods", gp = gpar(cex=.8)) #,
#             left = textGrob("ID", gp = gpar(cex = .8), rot = 90)
)



