BKMR CMA Example 3 • causalbkmr

library(bkmr)
library(mixtools)
library(causalbkmr)
library(ggplot2)
library(ggpubr)

In this scenario, we have a binary \(M\), a continuous outcome \(Y\), and no effect modification. The sample size is 50 and there are 3 covariates.

Sample simulated Dataset

dat <-  cma_sampledata(N = 50, L=3, P=3, scenario=3, seed=7) 
head(dat$data, n = 3L)
#>           z1         z2         z3 M         x1          x2        x3
#> 1  2.2767428  0.1432685  0.7973699 1 -0.3556944  2.02334405 -1.477592
#> 2 -1.0893537 -0.0944783  0.4035083 0  1.0973004  0.86249250 -2.973256
#> 3 -0.6389109  0.2491581 -0.1243989 0 -0.9066920 -0.02490949 -1.339762
#>            y
#> 1  0.8482788
#> 2 -1.0776233
#> 3 -1.2847646
dat = dat$data

Fit BKMR Models

let \(A\) be a \(n\)-by-\(L\) matrix containing an exposure mixture comprised of \(L\) elements,E.M and E.Y be effect modifiers of exposure-mediator and exposure-outcome relationship respectively, \(y\), a vector of outcome data, and \(m\), a vector of mediator data.

Z.M <- cbind(A,E.M)

Z.Y <- cbind(A,E.Y)

Let Z.M be the exposures and effect modifers E.M and let Z.Y be the exposures and effect modifers E.Y, create one more matrix containing the exposures, effect modifier Z.Y and mediator, precisely in that order.

Zm.Y <- cbind(Z.Y,m)

NOTE: the last column of the Zm.Y matrix must be your mediator in order for the functions to work properly.

A <- cbind(dat$z1, dat$z2, dat$z3)
X <- cbind(dat$x1, dat$x2, dat$x3)
y  <- dat$y
m  <- dat$M 

E.M <- NULL
E.Y <- dat$x2

Z.M <- cbind(A,E.M) 
Z.Y <- cbind(A, E.Y) 
Zm.Y <- cbind(Z.Y, m)

set.seed(1)
fit.y <- kmbayes(y=y, Z=Zm.Y, X=X, iter=10000, verbose=TRUE, varsel=FALSE) 
#save(fit.y,file="bkmr_y.RData")

set.seed(2)
fit.y.TE <- kmbayes(y=y, Z=Z.Y, X=X, iter=10000, verbose=TRUE, varsel=FALSE) 
#save(fit.y.TE,file="bkmr_y_TE.RData")

set.seed(3)
fit.m <- kmbayes(y=m, Z=Z.M, X=X, iter=10000, verbose=TRUE, varsel=FALSE) 
#save(fit.m,file="bkmr_m.RData")

Values at which to predict counterfactuals

Mean level of confounders:

X.predict <- matrix(colMeans(X),nrow=1)

We define the change in exposure for which you want to estimate the mediation effects: in this example, we will consider a change in all exposures from their 25th to 75th percentiles, fixing age (E.Y) at testing to its 10th and 90th percentiles. However, this contrast can be anything.

Note: If modifiers are considered, you should fix the levels of the modifiers

astar <- c(apply(A, 2, quantile, probs=0.25))  
a <- c(apply(A, 2, quantile, probs=0.75))

e.y10 = quantile(E.Y, probs=0.1)
e.y90 = quantile(E.Y, probs=0.9)

The index of the MCMC iterations to be used for inference:

sel<-seq(5000,10000,by=10)

Estimate TE for BKMR

Estimate the TE for a change in the exposures from \(a^*\) to \(a\) fixing Effect modifier at testing to its 10th percentile or 90th percentile:

e.y10 = quantile(E.Y, probs=0.1)
e.y90 = quantile(E.Y, probs=0.9)

TE.ey10 <- TE.bkmr(a=a, astar=astar, e.y = e.y10, fit.y.TE=fit.y.TE, X.predict=X.predict, alpha=0.05, sel=sel, seed=122)

TE.ey90 <- TE.bkmr(a=a, astar=astar, e.y = e.y90, fit.y.TE=fit.y.TE, X.predict=X.predict, alpha=0.05, sel=sel, seed=122)

Look at the posterior mean, median, and 95% CI for TE:

TE.ey10$est
#>         mean    median     lower    upper        sd
#> TE 0.9564067 0.9635427 0.7270294 1.197973 0.1179727
TE.ey90$est
#>         mean    median     lower    upper        sd
#> TE 0.9572624 0.9601179 0.7096564 1.196424 0.1158066

plotdf <- as.data.frame(TE.ey10$est)
plotdf["Effect"] <- rownames(plotdf)
ggplot(plotdf, aes(Effect, mean, ymin = lower, ymax = upper))  + 
  geom_pointrange(position = position_dodge(width = 0.75))  +  coord_flip()

Estimate CDE for BKMR

Estimate the CDE for a change in the exposures from \(a^*\) to \(a\), fixing the mediator at its 10th, 50th, and 75th percentile and the effect modifier at testing at its 10th percentile:

CDE.ey10 <- CDE.bkmr(a=a, astar=astar, e.y = e.y10, m.quant=c(0.1,0.5,0.75), fit.y=fit.y, alpha=0.05, sel=sel, seed=777)
#> [1] "Running 3 mediator values for CDE:"
#> [1] "1 out of 3"
#> [1] "2 out of 3"
#> [1] "3 out of 3"

CDE.ey90 <- CDE.bkmr(a=a, astar=astar, e.y = e.y90, m.quant=c(0.1,0.5,0.75), fit.y=fit.y, alpha=0.05, sel=sel, seed=777)
#> [1] "Running 3 mediator values for CDE:"
#> [1] "1 out of 3"
#> [1] "2 out of 3"
#> [1] "3 out of 3"

Look at the posterior mean, median, and 95% CI for the CDEs:

CDE.ey10$est
#>             mean    median     lower    upper        sd
#> CDE10% 0.8603252 0.8588774 0.5797773 1.130129 0.1383725
#> CDE50% 0.8817881 0.8760663 0.6127921 1.129211 0.1347592
#> CDE75% 0.8817881 0.8760663 0.6127921 1.129211 0.1347592
CDE.ey90$est
#>             mean    median     lower    upper        sd
#> CDE10% 0.8461777 0.8458703 0.5699850 1.119050 0.1415219
#> CDE50% 0.8663077 0.8605127 0.5928742 1.118321 0.1332145
#> CDE75% 0.8663077 0.8605127 0.5928742 1.118321 0.1332145

Plotting:

plotdf <- as.data.frame(CDE.ey10$est)
plotdf["Effect"] <- rownames(plotdf)
ggplot(plotdf, aes(Effect, mean, ymin = lower, ymax = upper ))  + 
  geom_pointrange(position = position_dodge(width = 0.75))  +  coord_flip()

Estimate NDE/NIE for BKMR

Estimate the TE, NDE and NIE for a change in the exposures from \(a^*\) to \(a\) fixing age at testing to its 90th percentile.

Note: this step takes a while to run and will take longer for more complex BKMR fits, longer sel vectors and larger.

mediationeffects.ey10  <- mediation.bkmr(a=a, astar=astar, e.y = e.y10, fit.m=fit.m, fit.y=fit.y, fit.y.TE=fit.y.TE, X.predict.M=X.predict, X.predict.Y=X.predict, alpha=0.05, sel=sel, seed=22, K=10)

mediationeffects.ey90  <- mediation.bkmr(a=a, astar=astar, e.y = e.y90, fit.m=fit.m, fit.y=fit.y, fit.y.TE=fit.y.TE, X.predict.M=X.predict, X.predict.Y=X.predict, alpha=0.05, sel=sel, seed=22, K=10)

Look at the posterior mean, median, and 95% CI for the TE, NDE, and NIE

mediationeffects.ey10$est
#>               mean      median      lower     upper        sd
#> TE      0.94593757  0.94804084  0.7198756 1.1688652 0.1131085
#> NDE     0.97650700  0.96133770  0.4041084 1.5122865 0.2878012
#> NIE    -0.03056943 -0.03241357 -0.5850728 0.5677202 0.2879940
#> CDE10%  0.84526794  0.85391990  0.5286018 1.0973116 0.1433318
#> CDE50%  0.86766860  0.87805519  0.5693485 1.1282255 0.1374816
#> CDE75%  0.86766860  0.87805519  0.5693485 1.1282255 0.1374816
mediationeffects.ey90$est
#>              mean     median      lower     upper        sd
#> TE     0.94638543 0.94983721  0.7192391 1.1691483 0.1166110
#> NDE    0.93592364 0.92888322  0.2986041 1.5583039 0.3127008
#> NIE    0.01046179 0.01401281 -0.5507629 0.6581325 0.3196764
#> CDE10% 0.83077989 0.83275278  0.5200515 1.1285557 0.1525085
#> CDE50% 0.85236830 0.85845312  0.5757799 1.1208304 0.1426577
#> CDE75% 0.85236830 0.85845312  0.5757799 1.1208304 0.1426577

Plotting

plotdf <- as.data.frame(mediationeffects.ey10$est)
plotdf["Effect"] <- rownames(plotdf)
ggplot(plotdf, aes(Effect, mean, ymin = lower, ymax = upper ))  + 
  geom_pointrange(position = position_dodge(width = 0.75))  +  coord_flip()

Summary statistics of the predictor-response function

Risk Summary For Totel Effect

riskSummary10 = TERiskSummaries.CMA(fit.TE = fit.y.TE, e.y=e.y10, e.y.name = "E.Y", sel=sel)

ggplot(riskSummary10,
       aes(quantile,
           est,
           ymin = est - 1.96 * sd,
           ymax = est + 1.96 * sd)) +
  geom_pointrange()


riskSummary90 = TERiskSummaries.CMA(fit.TE = fit.y.TE, e.y=e.y90, e.y.name = "E.Y", sel=sel)

ggplot(riskSummary90,
       aes(quantile,
           est,
           ymin = est - 1.96 * sd,
           ymax = est + 1.96 * sd)) +
  geom_pointrange()

Risk Summary For Controlled Direct Effect

# CDE 
CDEriskSummary10 = CDERiskSummaries.CMA(fit.y = fit.y, e.y = e.y10, e.y.name = "E.Y", m.name = "m", sel = sel)

ggplot(CDEriskSummary10, aes(quantile, est, ymin = est - 1.96*sd, 
                             ymax = est + 1.96*sd, col = m)) + 
  geom_pointrange(position = position_dodge(width = 0.75))

CDEriskSummary90 = CDERiskSummaries.CMA(fit.y = fit.y, e.y = e.y90, e.y.name = "E.Y", m.name = "m", sel = sel)

ggplot(CDEriskSummary90, aes(quantile, est, ymin = est - 1.96*sd, 
                             ymax = est + 1.96*sd, col = m)) + 
  geom_pointrange(position = position_dodge(width = 0.75))

Single-predictor health risks

Total Effect

 
risks.singvar10 = SingVarRiskSummaries.CMA(BKMRfits = fit.y.TE, which.z = 1:3,
                                           e.y=e.y10, e.y.names="E.Y",
                                           sel=sel)
ggplot(risks.singvar10, aes(variable, est, ymin = est - 1.96*sd,
                            ymax = est + 1.96*sd, col = q.fixed)) +
  geom_pointrange(position = position_dodge(width = 0.75)) +
  coord_flip()



risks.singvar90 = SingVarRiskSummaries.CMA(BKMRfits = fit.y.TE, which.z = 1:3,
                                           e.y=e.y90, e.y.names="E.Y",
                                           sel=sel)
ggplot(risks.singvar90, aes(variable, est, ymin = est - 1.96*sd,
                            ymax = est + 1.96*sd, col = q.fixed)) +
  geom_pointrange(position = position_dodge(width = 0.75)) +
  coord_flip()

Controlled Direct Effect

# single variable controlled direct effects
CDErisks.singvar10 = CDESingVarRiskSummaries.CMA(BKMRfits = fit.y,
                                           e.y=e.y10, e.y.names="E.Y", m.name = "m",
                                           sel=sel)

plotCDE10 <- ggplot(CDErisks.singvar10, aes(variable, est, ymin = est - 1.96*sd,
                            ymax = est + 1.96*sd, col = q.fixed, linetype = m.fixed)) +
  geom_pointrange(position = position_dodge(width = 0.75)) +
  coord_flip()

 
CDErisks.singvar90 = CDESingVarRiskSummaries.CMA(BKMRfits = fit.y,
                                                 e.y=e.y90, e.y.names="E.Y", m.name = "m",
                                                 sel=sel)

plotCDE90 <- ggplot(CDErisks.singvar90, aes(variable, est, ymin = est - 1.96*sd,
                               ymax = est + 1.96*sd, col = q.fixed, linetype = m.fixed)) +
  geom_pointrange(position = position_dodge(width = 0.75)) +
  coord_flip()
ggarrange(plotCDE10, plotCDE90, ncol=2, nrow =1)