Direct, total and separable effects, using G-formula

Conditional pooled logistic models

First, we run a set of conditional pooled logistic models, both as a first step to calculate the risk of prostate cancer death, and to calculate weights for death due to other causes, and weights for censoring due to loss to follow-up

### Pooled logistic regression for prostate cancer death

plrFitP <-
  glm(
    prostateDeath ~ rx * (dtime + I(dtime ^ 2) + I(dtime ^ 3)) + normalAct + ageCat + hx + hgBinary ,
    data = longProstRed,
    family = binomial())

### Pooled logistic regression for death due to other causes

plrFitO <-
  glm(
    otherDeath ~ dtime + I(dtime ^ 2) + normalAct + ageCat + hx + hgBinary + rx ,
    data = longProstRed,
    family = binomial())

Direct effect

Create two expanded datasets so that hazards given A=1 can be computed for all subjects (even those with A=0), and viceversa (everyone has A = 0). In an RCT this is not necessary, could stratify models above and this step by treatment arm.
Data set construction where A = 1

treated <- baseline[rep(1:n,each=length(cutTimes)),] #One row for each interval k+1 for k=0,...,K
treated$dtime <- rep(cutTimes,n)
treated$rx <-1

Data set construction where A = 0

# Make analogous dataset for placebo
placebo <- baseline[rep(1:n,each=length(cutTimes)),] #One row for each time
placebo$dtime <- rep(cutTimes,n)
placebo$rx <- 0

Estimate conditional discrete hazards (cause specific for each cause of death/competing event conditioned for event and hazard of competing event) for each subject in each time interval in both cloned datasets

treated$hazardP <- predict(plrFitP, newdata = treated, type = 'response') 
treated$hazardO <- 0
treated$s <- (1-treated$hazardP) * (1-treated$hazardO)

placebo$hazardP <- predict(plrFitP, newdata = placebo, type = 'response') 
placebo$hazardO <- 0
placebo$s <- (1-placebo$hazardP) * (1-placebo$hazardO)

Glimpse of “treated” data:

treated %>% 
  filter(patno %in% c(489)) %>% select(
    patno, dtime, rx, hazardP, hazardO, s
  ) %>% slice(1:5, 55:59) %>%  knitr::kable()

	patno	dtime	rx	hazardP	s
8478	489	0	1	0.0231061	0.9768939
8478.1	489	1	1	0.0234620	0.9765380
8478.2	489	2	1	0.0238175	0.9761825
8478.3	489	3	1	0.0241730	0.9758270
8478.4	489	4	1	0.0245289	0.9754711
8478.54	489	54	1	0.0658483	0.9341517
8478.55	489	55	1	0.0681584	0.9318416
8478.56	489	56	1	0.0706130	0.9293870
8478.57	489	57	1	0.0732226	0.9267774
8478.58	489	58	1	0.0759986	0.9240014

Glimpse of “placebo” data:

placebo %>% 
  filter(patno %in% c(489)) %>% select(
    patno, dtime, rx, hazardP, hazardO, s
  ) %>% slice(1:5, 55:59) %>%  knitr::kable()

	patno	dtime	hazardP	s
8478	489	0	0.0458879	0.9541121
8478.1	489	1	0.0431460	0.9568540
8478.2	489	2	0.0409669	0.9590331
8478.3	489	3	0.0392643	0.9607357
8478.4	489	4	0.0379704	0.9620296
8478.54	489	54	0.0142297	0.9857703
8478.55	489	55	0.0113442	0.9886558
8478.56	489	56	0.0088875	0.9911125
8478.57	489	57	0.0068390	0.9931610
8478.58	489	58	0.0051668	0.9948332

We will implement the following estimator, wrapped as the function calculateCumInc from the utility_functions script available in this repository.

calculateCumInc <-
  function(inputData,
           timepts = cutTimes,
           competing = FALSE) {
    cumulativeIncidence <-
      matrix(NA, ncol = length(unique(inputData$patno)), nrow = length(cutTimes))
    # Insert the event probabilities at the first time interval:
    cumulativeIncidence[1,] <-
      inputData[inputData$dtime == 0,]$hazardP * inputData[inputData$dtime ==
                                                             0,]$hazardO
    # Create a matrix compatible with 'cumulativeIncidence' with survival probabilities at each time
    survivalProb <-
      t(aggregate(s ~ patno, data = inputData, FUN = cumprod)$s) #split the long data into subsets per subject
    for (i in 2:length(cutTimes)) {
      subInputDataP <-
        inputData[inputData$dtime == (i - 1),]$hazardP #OBS: dtime starts at 0
      subInputDataO <-
        (1 - inputData[inputData$dtime == (i - 1),]$hazardO) #OBS: dtime starts at 0
      if (!competing)
        cumulativeIncidence[i,] <-
        subInputDataO *  subInputDataP * survivalProb[(i - 1),] # OBS: survivalProb entry i is time point i-1
      else
        cumulativeIncidence[i,] <-
        (1 - subInputDataO) * survivalProb[(i - 1),]
    }
    meanCumulativeIncidence <-
      rowMeans(apply(cumulativeIncidence, MARGIN = 2, cumsum)) #rowMeans(cumulativeIncidence)
    return(meanCumulativeIncidence)
  }

cumIncTreated <- calculateCumInc(treated)#a=1
cumIncPlacebo <- calculateCumInc(placebo)#a=0

cumIncTreated_de_60 <- cumIncTreated[length(cutTimes)]

cumIncPlacebo_de_60 <- cumIncPlacebo[length(cutTimes)]

de_rr <- cumIncTreated_de_60 / cumIncPlacebo_de_60

de_rd <- cumIncTreated_de_60 - cumIncPlacebo_de_60

results_de <- data.frame(cumIncTreated_de_60, cumIncPlacebo_de_60, de_rr, de_rd)

knitr::kable(round(results_de, 2))

cumIncTreated_de_60	cumIncPlacebo_de_60	de_rr	de_rd
0.34	0.39	0.88	-0.05

Total effect

We are going to use the same code as in the previous section, except that in this case instead of fixing the hazard of other causes to death as 0, we are going to calculate hazards for this outcome using predicted values from the plrfit0 model and recalculate the survival probability, given death

treated$hazardO <- predict(plrFitO, newdata = treated, type = 'response')

treated$s <- (1-treated$hazardP) * (1-treated$hazardO)

placebo$hazardO <- predict(plrFitO, newdata = placebo, type = 'response')

placebo$s <- (1-placebo$hazardP) * (1-placebo$hazardO)

Glimpse of “treated” data:

treated %>% 
  filter(patno %in% c(489)) %>% select(
    patno, dtime, rx, hazardP, hazardO, s
  ) %>% slice(1:5, 55:59) %>%  knitr::kable()

	patno	dtime	rx	hazardP	hazardO	s
8478	489	0	1	0.0231061	0.0759711	0.9026781
8478.1	489	1	1	0.0234620	0.0747473	0.9035444
8478.2	489	2	1	0.0238175	0.0735928	0.9043425
8478.3	489	3	1	0.0241730	0.0725053	0.9050744
8478.4	489	4	1	0.0245289	0.0714825	0.9057420
8478.54	489	54	1	0.0658483	0.0853839	0.8543902
8478.55	489	55	1	0.0681584	0.0871989	0.8505860
8478.56	489	56	1	0.0706130	0.0891097	0.8465696
8478.57	489	57	1	0.0732226	0.0911205	0.8423290
8478.58	489	58	1	0.0759986	0.0932355	0.8378517

Glimpse of “placebo” data:

placebo %>% 
  filter(patno %in% c(489)) %>% select(
    patno, dtime, rx, hazardP, hazardO, s
  ) %>% slice(1:5, 55:59) %>%  knitr::kable()

	patno	dtime	hazardP	hazardO	s
8478	489	0	0.0458879	0.0614893	0.8954444
8478.1	489	1	0.0431460	0.0604835	0.8989802
8478.2	489	2	0.0409669	0.0595351	0.9019369
8478.3	489	3	0.0392643	0.0586422	0.9043961
8478.4	489	4	0.0379704	0.0578028	0.9064217
8478.54	489	54	0.0142297	0.0692422	0.9175134
8478.55	489	55	0.0113442	0.0707406	0.9187176
8478.56	489	56	0.0088875	0.0723194	0.9194359
8478.57	489	57	0.0068390	0.0739820	0.9196849
8478.58	489	58	0.0051668	0.0757324	0.9194921

Calculate risks and corresponding treatment effects (RR/RD) by end of follow-up using parametric g-formula

cumIncTreated_te <- calculateCumInc(treated)#a=1
cumIncPlacebo_te <- calculateCumInc(placebo)#a=0

cumIncTreated_te_60 <- cumIncTreated_te[length(cutTimes)] 

cumIncPlacebo_te_60 <- cumIncPlacebo_te[length(cutTimes)]

te_rr <- cumIncTreated_te_60 / cumIncPlacebo_te_60

te_rd <- cumIncTreated_te_60 - cumIncPlacebo_te_60

results_te <- data.frame(cumIncTreated_te_60, cumIncPlacebo_te_60, te_rr, te_rd)

knitr::kable(round(results_te, 2))

cumIncTreated_te_60	cumIncPlacebo_te_60	te_rr	te_rd
0.21	0.28	0.74	-0.07

Total effect of death on other causes

# parametric g-formula estimate of the risk of the competing event itself
cumIncTreated_te_comp <- calculateCumInc(treated, competing = TRUE)#a=1

cumIncPlacebo_te_comp <- calculateCumInc(placebo, competing = TRUE)#a=0

cumIncTreated_te_comp_60 <- cumIncTreated_te_comp[length(cutTimes)] 

cumIncPlacebo_te_comp_60 <- cumIncPlacebo_te_comp[length(cutTimes)]

te_comp_rr <- cumIncTreated_te_comp_60 / cumIncPlacebo_te_comp_60

te_comp_rd <- cumIncTreated_te_comp_60 - cumIncPlacebo_te_comp_60

results_te_comp <- data.frame(cumIncTreated_te_comp_60, cumIncPlacebo_te_comp_60, te_comp_rr, te_comp_rd)

knitr::kable(round(results_te_comp, 2))

cumIncTreated_te_comp_60	cumIncPlacebo_te_comp_60	te_comp_rr	te_comp_rd
0.51	0.41	1.23	0.09

Separable effects

Following equation (9) from Stensrud et al. Journal of the American Statistical Association. 2020

To this matter, we will use the long format data set prepared before: longProstRed.

Make one extra copy of the exposure rx, named Orx, to be used in the hazard of death due to other causes.

longProstRed$Orx <- longProstRed$rx

Fit conditional pooled logistic regression models

plrFitP <-
    glm(
      prostateDeath ~ rx*(dtime + I(dtime ^ 2)+ I(dtime ^ 3)) + normalAct + ageCat + hx + hgBinary,
      data = longProstRed,
      family = binomial()
    )
    
plrFitO <-
  glm(
    otherDeath ~ Orx * (dtime + I(dtime ^ 2) + I(dtime ^ 3)) + normalAct + ageCat + hx + hgBinary,
    data = longProstRed,
    family = binomial()
  )

Create three copies of the data:

Data set construction where Ay and Ad = 1

treated <-
    baseline[rep(1:n, each = length(cutTimes)), ] #One row for each time

treated$dtime <- rep(cutTimes, n)
  
treated$rx <- 1

treated$Orx <- 1

Dataset construction where Ay and Ad = 1

placebo <-
    baseline[rep(1:n, each = length(cutTimes)), ] #One row for each time
  
placebo$dtime <- rep(cutTimes, n)

placebo$rx <- 0

placebo$Orx <- 0

Dataset construction where Ay = 1, Ad = 0

treatAy <-
    baseline[rep(1:n, each = length(cutTimes)), ]
  
treatAy$dtime <- rep(cutTimes, n)
  
treatAy$rx <- 1
  
treatAy$Orx <- 0

Calculate hazards for each time interval in the three cloned data sets

## For Ay = 1 and Ad = 1

treated$hazardP <-
    predict(plrFitP, newdata = treated, type = 'response')
  
treated$hazardO <-
    predict(plrFitO, newdata = treated, type = 'response')
  
treated$s <- (1 - treated$hazardP) * (1 - treated$hazardO)

## For Ay = 0, Ad = 0

placebo$hazardP <-
    predict(plrFitP, newdata = placebo, type = 'response')

placebo$hazardO <-
    predict(plrFitO, newdata = placebo, type = 'response')

placebo$s <- (1 - placebo$hazardP) * (1 - placebo$hazardO)

## For Ay = 1, Ad = 0

treatAy$hazardP <-
    predict(plrFitP, newdata = treatAy, type = 'response')
  
treatAy$hazardO <-
    predict(plrFitO, newdata = treatAy, type = 'response')
  
treatAy$s <- (1 - treatAy$hazardP) * (1 - treatAy$hazardO)

Glimpse of “treated” data:

treated %>% 
  filter(patno %in% c(489)) %>% select(
    patno, dtime, rx, Orx, hazardP, hazardO, s
  ) %>% slice(1:5, 55:59) %>%  knitr::kable()

	patno	dtime	rx	Orx	hazardP	hazardO	s
8478	489	0	1	1	0.0231061	0.1215098	0.8581917
8478.1	489	1	1	1	0.0234620	0.1070610	0.8719889
8478.2	489	2	1	1	0.0238175	0.0951232	0.8833249
8478.3	489	3	1	1	0.0241730	0.0852474	0.8926403
8478.4	489	4	1	1	0.0245289	0.0770658	0.9002956
8478.54	489	54	1	1	0.0658483	0.0763298	0.8628481
8478.55	489	55	1	1	0.0681584	0.0726713	0.8641235
8478.56	489	56	1	1	0.0706130	0.0686332	0.8656002
8478.57	489	57	1	1	0.0732226	0.0642719	0.8672116
8478.58	489	58	1	1	0.0759986	0.0596529	0.8688820

Glimpse of “placebo” data:

placebo %>% 
  filter(patno %in% c(489)) %>% select(
    patno, dtime, rx, Orx, hazardP, hazardO, s
  ) %>% slice(1:5, 55:59) %>%  knitr::kable()

	patno	dtime	hazardP	hazardO	s
8478	489	0	0.0458879	0.0509117	0.9055366
8478.1	489	1	0.0431460	0.0501587	0.9088594
8478.2	489	2	0.0409669	0.0495393	0.9115233
8478.3	489	3	0.0392643	0.0490433	0.9136181
8478.4	489	4	0.0379704	0.0486621	0.9152153
8478.54	489	54	0.0142297	0.0488282	0.9376369
8478.55	489	55	0.0113442	0.0468247	0.9423623
8478.56	489	56	0.0088875	0.0447376	0.9467726
8478.57	489	57	0.0068390	0.0425801	0.9508721
8478.58	489	58	0.0051668	0.0403663	0.9546755

Glimpse of “treatAy” data:

treatAy %>% 
  filter(patno %in% c(489)) %>% select(
    patno, dtime, rx, Orx, hazardP, hazardO, s
  ) %>% slice(1:5, 55:59) %>%  knitr::kable()

	patno	dtime	rx	hazardP	hazardO	s
8478	489	0	1	0.0231061	0.0509117	0.9271586
8478.1	489	1	1	0.0234620	0.0501587	0.9275561
8478.2	489	2	1	0.0238175	0.0495393	0.9278231
8478.3	489	3	1	0.0241730	0.0490433	0.9279692
8478.4	489	4	1	0.0245289	0.0486621	0.9280027
8478.54	489	54	1	0.0658483	0.0488282	0.8885387
8478.55	489	55	1	0.0681584	0.0468247	0.8882084
8478.56	489	56	1	0.0706130	0.0447376	0.8878085
8478.57	489	57	1	0.0732226	0.0425801	0.8873151
8478.58	489	58	1	0.0759986	0.0403663	0.8867029

Calculate cumulative incidence

cumIncTreated <- calculateCumInc(treated)
cumIncTreatAy <- calculateCumInc(treatAy)
cumIncPlacebo <- calculateCumInc(placebo)

estimatesFullData <-
  data.frame(cumIncTreated, cumIncTreatAy, cumIncPlacebo)

knitr::kable(round(estimatesFullData[36,], 2))

	cumIncTreated	cumIncTreatAy	cumIncPlacebo
36	0.13	0.14	0.21

Cumulative incidence curves

plot(cutTimes, cumIncTreated, type = "s",
     ylim = c(0, 1), ylab = "Risk",
     xlab = "Months", 
     lty = 1, lwd = 1, xlim = c(0, 52))

lines(cutTimes, cumIncPlacebo, type = "s",
      col = 2, ylim = c(0, 1), lwd = 1)

lines(cutTimes, cumIncTreatAy, type = "s",
      col = 3, ylim = c(0, 1), lwd = 1)

legend(x = 0, y = 1, 
       c("(Ay=1,Ad=1)", "(Ay=1,Ad=0)", "(Ay=0,Ad=0)"),
       bty = "n", col = c(1, 3, 2),
       lty = c(1, 1, 1), cex = 1, pt.cex = 1, lwd = 1)

Go to: Data cleaning and setup here

Go to: Direct, total and separable effects, using IPW

Go to github repository

References

Young JG, Stensrud MJ, Tchetgen Tchetgen EJ, Hernán MA. A causal framework for classical statistical estimands in failure time settings with competing events. Statistics in Medicine 2020. Paper link Code
Stensrud MJ, Young JG, Didelez V, Robins JM, Hernán MA. Separable Effects for Causal Inference in the Presence of Competing Events. Journal of the American Statistical Association 2022. Paper link
Rojas-Saunero LP, Young JG, Didelez V, Ikram MA, Swanson SA. Considering questions before methods in dementia research with competing events and causal goals. American Journal of Epidemiology. 2023. Paper link Code