Direct, total and separable effects, using G-formula

Conditional pooled logistic models

  • First, we run a set of conditional pooled logistic models, both as a first step to calculate the risk of prostate cancer death, and to calculate weights for death due to other causes, and weights for censoring due to loss to follow-up
### Pooled logistic regression for prostate cancer death

plrFitP <-
  glm(
    prostateDeath ~ rx * (dtime + I(dtime ^ 2) + I(dtime ^ 3)) + normalAct + ageCat + hx + hgBinary ,
    data = longProstRed,
    family = binomial())

### Pooled logistic regression for death due to other causes

plrFitO <-
  glm(
    otherDeath ~ dtime + I(dtime ^ 2) + normalAct + ageCat + hx + hgBinary + rx ,
    data = longProstRed,
    family = binomial())

Direct effect

  • Create two expanded datasets so that hazards given A=1 can be computed for all subjects (even those with A=0), and viceversa (everyone has A = 0). In an RCT this is not necessary, could stratify models above and this step by treatment arm.

  • Data set construction where A = 1

treated <- baseline[rep(1:n,each=length(cutTimes)),] #One row for each interval k+1 for k=0,...,K
treated$dtime <- rep(cutTimes,n)
treated$rx <-1 
  • Data set construction where A = 0
# Make analogous dataset for placebo
placebo <- baseline[rep(1:n,each=length(cutTimes)),] #One row for each time
placebo$dtime <- rep(cutTimes,n)
placebo$rx <- 0 
  • Estimate conditional discrete hazards (cause specific for each cause of death/competing event conditioned for event and hazard of competing event) for each subject in each time interval in both cloned datasets
treated$hazardP <- predict(plrFitP, newdata = treated, type = 'response') 
treated$hazardO <- 0
treated$s <- (1-treated$hazardP) * (1-treated$hazardO)

placebo$hazardP <- predict(plrFitP, newdata = placebo, type = 'response') 
placebo$hazardO <- 0
placebo$s <- (1-placebo$hazardP) * (1-placebo$hazardO)
  • Glimpse of “treated” data:
treated %>% 
  filter(patno %in% c(489)) %>% select(
    patno, dtime, rx, hazardP, hazardO, s
  ) %>% slice(1:5, 55:59) %>%  knitr::kable()
patno dtime rx hazardP hazardO s
8478 489 0 1 0.0231061 0 0.9768939
8478.1 489 1 1 0.0234620 0 0.9765380
8478.2 489 2 1 0.0238175 0 0.9761825
8478.3 489 3 1 0.0241730 0 0.9758270
8478.4 489 4 1 0.0245289 0 0.9754711
8478.54 489 54 1 0.0658483 0 0.9341517
8478.55 489 55 1 0.0681584 0 0.9318416
8478.56 489 56 1 0.0706130 0 0.9293870
8478.57 489 57 1 0.0732226 0 0.9267774
8478.58 489 58 1 0.0759986 0 0.9240014
  • Glimpse of “placebo” data:
placebo %>% 
  filter(patno %in% c(489)) %>% select(
    patno, dtime, rx, hazardP, hazardO, s
  ) %>% slice(1:5, 55:59) %>%  knitr::kable()
patno dtime rx hazardP hazardO s
8478 489 0 0 0.0458879 0 0.9541121
8478.1 489 1 0 0.0431460 0 0.9568540
8478.2 489 2 0 0.0409669 0 0.9590331
8478.3 489 3 0 0.0392643 0 0.9607357
8478.4 489 4 0 0.0379704 0 0.9620296
8478.54 489 54 0 0.0142297 0 0.9857703
8478.55 489 55 0 0.0113442 0 0.9886558
8478.56 489 56 0 0.0088875 0 0.9911125
8478.57 489 57 0 0.0068390 0 0.9931610
8478.58 489 58 0 0.0051668 0 0.9948332

We will implement the following estimator, wrapped as the function calculateCumInc from the utility_functions script available in this repository.

calculateCumInc <-
  function(inputData,
           timepts = cutTimes,
           competing = FALSE) {
    cumulativeIncidence <-
      matrix(NA, ncol = length(unique(inputData$patno)), nrow = length(cutTimes))
    # Insert the event probabilities at the first time interval:
    cumulativeIncidence[1,] <-
      inputData[inputData$dtime == 0,]$hazardP * inputData[inputData$dtime ==
                                                             0,]$hazardO
    # Create a matrix compatible with 'cumulativeIncidence' with survival probabilities at each time
    survivalProb <-
      t(aggregate(s ~ patno, data = inputData, FUN = cumprod)$s) #split the long data into subsets per subject
    for (i in 2:length(cutTimes)) {
      subInputDataP <-
        inputData[inputData$dtime == (i - 1),]$hazardP #OBS: dtime starts at 0
      subInputDataO <-
        (1 - inputData[inputData$dtime == (i - 1),]$hazardO) #OBS: dtime starts at 0
      if (!competing)
        cumulativeIncidence[i,] <-
        subInputDataO *  subInputDataP * survivalProb[(i - 1),] # OBS: survivalProb entry i is time point i-1
      else
        cumulativeIncidence[i,] <-
        (1 - subInputDataO) * survivalProb[(i - 1),]
    }
    meanCumulativeIncidence <-
      rowMeans(apply(cumulativeIncidence, MARGIN = 2, cumsum)) #rowMeans(cumulativeIncidence)
    return(meanCumulativeIncidence)
  }
cumIncTreated <- calculateCumInc(treated)#a=1
cumIncPlacebo <- calculateCumInc(placebo)#a=0

cumIncTreated_de_60 <- cumIncTreated[length(cutTimes)]

cumIncPlacebo_de_60 <- cumIncPlacebo[length(cutTimes)]

de_rr <- cumIncTreated_de_60 / cumIncPlacebo_de_60

de_rd <- cumIncTreated_de_60 - cumIncPlacebo_de_60

results_de <- data.frame(cumIncTreated_de_60, cumIncPlacebo_de_60, de_rr, de_rd)

knitr::kable(round(results_de, 2)) 
cumIncTreated_de_60 cumIncPlacebo_de_60 de_rr de_rd
0.34 0.39 0.88 -0.05

Total effect

  • We are going to use the same code as in the previous section, except that in this case instead of fixing the hazard of other causes to death as 0, we are going to calculate hazards for this outcome using predicted values from the plrfit0 model and recalculate the survival probability, given death
treated$hazardO <- predict(plrFitO, newdata = treated, type = 'response')

treated$s <- (1-treated$hazardP) * (1-treated$hazardO)

placebo$hazardO <- predict(plrFitO, newdata = placebo, type = 'response')

placebo$s <- (1-placebo$hazardP) * (1-placebo$hazardO)
  • Glimpse of “treated” data:
treated %>% 
  filter(patno %in% c(489)) %>% select(
    patno, dtime, rx, hazardP, hazardO, s
  ) %>% slice(1:5, 55:59) %>%  knitr::kable()
patno dtime rx hazardP hazardO s
8478 489 0 1 0.0231061 0.0759711 0.9026781
8478.1 489 1 1 0.0234620 0.0747473 0.9035444
8478.2 489 2 1 0.0238175 0.0735928 0.9043425
8478.3 489 3 1 0.0241730 0.0725053 0.9050744
8478.4 489 4 1 0.0245289 0.0714825 0.9057420
8478.54 489 54 1 0.0658483 0.0853839 0.8543902
8478.55 489 55 1 0.0681584 0.0871989 0.8505860
8478.56 489 56 1 0.0706130 0.0891097 0.8465696
8478.57 489 57 1 0.0732226 0.0911205 0.8423290
8478.58 489 58 1 0.0759986 0.0932355 0.8378517
  • Glimpse of “placebo” data:
placebo %>% 
  filter(patno %in% c(489)) %>% select(
    patno, dtime, rx, hazardP, hazardO, s
  ) %>% slice(1:5, 55:59) %>%  knitr::kable()
patno dtime rx hazardP hazardO s
8478 489 0 0 0.0458879 0.0614893 0.8954444
8478.1 489 1 0 0.0431460 0.0604835 0.8989802
8478.2 489 2 0 0.0409669 0.0595351 0.9019369
8478.3 489 3 0 0.0392643 0.0586422 0.9043961
8478.4 489 4 0 0.0379704 0.0578028 0.9064217
8478.54 489 54 0 0.0142297 0.0692422 0.9175134
8478.55 489 55 0 0.0113442 0.0707406 0.9187176
8478.56 489 56 0 0.0088875 0.0723194 0.9194359
8478.57 489 57 0 0.0068390 0.0739820 0.9196849
8478.58 489 58 0 0.0051668 0.0757324 0.9194921
  • Calculate risks and corresponding treatment effects (RR/RD) by end of follow-up using parametric g-formula
cumIncTreated_te <- calculateCumInc(treated)#a=1
cumIncPlacebo_te <- calculateCumInc(placebo)#a=0

cumIncTreated_te_60 <- cumIncTreated_te[length(cutTimes)] 

cumIncPlacebo_te_60 <- cumIncPlacebo_te[length(cutTimes)]

te_rr <- cumIncTreated_te_60 / cumIncPlacebo_te_60

te_rd <- cumIncTreated_te_60 - cumIncPlacebo_te_60

results_te <- data.frame(cumIncTreated_te_60, cumIncPlacebo_te_60, te_rr, te_rd)

knitr::kable(round(results_te, 2))
cumIncTreated_te_60 cumIncPlacebo_te_60 te_rr te_rd
0.21 0.28 0.74 -0.07

Total effect of death on other causes

# parametric g-formula estimate of the risk of the competing event itself
cumIncTreated_te_comp <- calculateCumInc(treated, competing = TRUE)#a=1

cumIncPlacebo_te_comp <- calculateCumInc(placebo, competing = TRUE)#a=0

cumIncTreated_te_comp_60 <- cumIncTreated_te_comp[length(cutTimes)] 

cumIncPlacebo_te_comp_60 <- cumIncPlacebo_te_comp[length(cutTimes)]

te_comp_rr <- cumIncTreated_te_comp_60 / cumIncPlacebo_te_comp_60

te_comp_rd <- cumIncTreated_te_comp_60 - cumIncPlacebo_te_comp_60

results_te_comp <- data.frame(cumIncTreated_te_comp_60, cumIncPlacebo_te_comp_60, te_comp_rr, te_comp_rd)

knitr::kable(round(results_te_comp, 2))
cumIncTreated_te_comp_60 cumIncPlacebo_te_comp_60 te_comp_rr te_comp_rd
0.51 0.41 1.23 0.09

Separable effects

Following equation (9) from Stensrud et al. Journal of the American Statistical Association. 2020

To this matter, we will use the long format data set prepared before: longProstRed.

  • Make one extra copy of the exposure rx, named Orx, to be used in the hazard of death due to other causes.
longProstRed$Orx <- longProstRed$rx
  • Fit conditional pooled logistic regression models
plrFitP <-
    glm(
      prostateDeath ~ rx*(dtime + I(dtime ^ 2)+ I(dtime ^ 3)) + normalAct + ageCat + hx + hgBinary,
      data = longProstRed,
      family = binomial()
    )
    
plrFitO <-
  glm(
    otherDeath ~ Orx * (dtime + I(dtime ^ 2) + I(dtime ^ 3)) + normalAct + ageCat + hx + hgBinary,
    data = longProstRed,
    family = binomial()
  )

Create three copies of the data:

  • Data set construction where Ay and Ad = 1
treated <-
    baseline[rep(1:n, each = length(cutTimes)), ] #One row for each time

treated$dtime <- rep(cutTimes, n)
  
treated$rx <- 1

treated$Orx <- 1
  • Dataset construction where Ay and Ad = 1
placebo <-
    baseline[rep(1:n, each = length(cutTimes)), ] #One row for each time
  
placebo$dtime <- rep(cutTimes, n)

placebo$rx <- 0

placebo$Orx <- 0
  • Dataset construction where Ay = 1, Ad = 0
treatAy <-
    baseline[rep(1:n, each = length(cutTimes)), ]
  
treatAy$dtime <- rep(cutTimes, n)
  
treatAy$rx <- 1
  
treatAy$Orx <- 0
  • Calculate hazards for each time interval in the three cloned data sets
## For Ay = 1 and Ad = 1

treated$hazardP <-
    predict(plrFitP, newdata = treated, type = 'response')
  
treated$hazardO <-
    predict(plrFitO, newdata = treated, type = 'response')
  
treated$s <- (1 - treated$hazardP) * (1 - treated$hazardO)

## For Ay = 0, Ad = 0

placebo$hazardP <-
    predict(plrFitP, newdata = placebo, type = 'response')

placebo$hazardO <-
    predict(plrFitO, newdata = placebo, type = 'response')

placebo$s <- (1 - placebo$hazardP) * (1 - placebo$hazardO)

## For Ay = 1, Ad = 0

treatAy$hazardP <-
    predict(plrFitP, newdata = treatAy, type = 'response')
  
treatAy$hazardO <-
    predict(plrFitO, newdata = treatAy, type = 'response')
  
treatAy$s <- (1 - treatAy$hazardP) * (1 - treatAy$hazardO)
  • Glimpse of “treated” data:
treated %>% 
  filter(patno %in% c(489)) %>% select(
    patno, dtime, rx, Orx, hazardP, hazardO, s
  ) %>% slice(1:5, 55:59) %>%  knitr::kable()
patno dtime rx Orx hazardP hazardO s
8478 489 0 1 1 0.0231061 0.1215098 0.8581917
8478.1 489 1 1 1 0.0234620 0.1070610 0.8719889
8478.2 489 2 1 1 0.0238175 0.0951232 0.8833249
8478.3 489 3 1 1 0.0241730 0.0852474 0.8926403
8478.4 489 4 1 1 0.0245289 0.0770658 0.9002956
8478.54 489 54 1 1 0.0658483 0.0763298 0.8628481
8478.55 489 55 1 1 0.0681584 0.0726713 0.8641235
8478.56 489 56 1 1 0.0706130 0.0686332 0.8656002
8478.57 489 57 1 1 0.0732226 0.0642719 0.8672116
8478.58 489 58 1 1 0.0759986 0.0596529 0.8688820
  • Glimpse of “placebo” data:
placebo %>% 
  filter(patno %in% c(489)) %>% select(
    patno, dtime, rx, Orx, hazardP, hazardO, s
  ) %>% slice(1:5, 55:59) %>%  knitr::kable()
patno dtime rx Orx hazardP hazardO s
8478 489 0 0 0 0.0458879 0.0509117 0.9055366
8478.1 489 1 0 0 0.0431460 0.0501587 0.9088594
8478.2 489 2 0 0 0.0409669 0.0495393 0.9115233
8478.3 489 3 0 0 0.0392643 0.0490433 0.9136181
8478.4 489 4 0 0 0.0379704 0.0486621 0.9152153
8478.54 489 54 0 0 0.0142297 0.0488282 0.9376369
8478.55 489 55 0 0 0.0113442 0.0468247 0.9423623
8478.56 489 56 0 0 0.0088875 0.0447376 0.9467726
8478.57 489 57 0 0 0.0068390 0.0425801 0.9508721
8478.58 489 58 0 0 0.0051668 0.0403663 0.9546755
  • Glimpse of “treatAy” data:
treatAy %>% 
  filter(patno %in% c(489)) %>% select(
    patno, dtime, rx, Orx, hazardP, hazardO, s
  ) %>% slice(1:5, 55:59) %>%  knitr::kable()
patno dtime rx Orx hazardP hazardO s
8478 489 0 1 0 0.0231061 0.0509117 0.9271586
8478.1 489 1 1 0 0.0234620 0.0501587 0.9275561
8478.2 489 2 1 0 0.0238175 0.0495393 0.9278231
8478.3 489 3 1 0 0.0241730 0.0490433 0.9279692
8478.4 489 4 1 0 0.0245289 0.0486621 0.9280027
8478.54 489 54 1 0 0.0658483 0.0488282 0.8885387
8478.55 489 55 1 0 0.0681584 0.0468247 0.8882084
8478.56 489 56 1 0 0.0706130 0.0447376 0.8878085
8478.57 489 57 1 0 0.0732226 0.0425801 0.8873151
8478.58 489 58 1 0 0.0759986 0.0403663 0.8867029
  • Calculate cumulative incidence
cumIncTreated <- calculateCumInc(treated)
cumIncTreatAy <- calculateCumInc(treatAy)
cumIncPlacebo <- calculateCumInc(placebo)

estimatesFullData <-
  data.frame(cumIncTreated, cumIncTreatAy, cumIncPlacebo)

knitr::kable(round(estimatesFullData[36,], 2))
cumIncTreated cumIncTreatAy cumIncPlacebo
36 0.13 0.14 0.21
  • Cumulative incidence curves
plot(cutTimes, cumIncTreated, type = "s",
     ylim = c(0, 1), ylab = "Risk",
     xlab = "Months", 
     lty = 1, lwd = 1, xlim = c(0, 52))

lines(cutTimes, cumIncPlacebo, type = "s",
      col = 2, ylim = c(0, 1), lwd = 1)

lines(cutTimes, cumIncTreatAy, type = "s",
      col = 3, ylim = c(0, 1), lwd = 1)

legend(x = 0, y = 1, 
       c("(Ay=1,Ad=1)", "(Ay=1,Ad=0)", "(Ay=0,Ad=0)"),
       bty = "n", col = c(1, 3, 2),
       lty = c(1, 1, 1), cex = 1, pt.cex = 1, lwd = 1)

References

  • Young JG, Stensrud MJ, Tchetgen Tchetgen EJ, Hernán MA. A causal framework for classical statistical estimands in failure time settings with competing events. Statistics in Medicine 2020. Paper link Code

  • Stensrud MJ, Young JG, Didelez V, Robins JM, Hernán MA. Separable Effects for Causal Inference in the Presence of Competing Events. Journal of the American Statistical Association 2022. Paper link

  • Rojas-Saunero LP, Young JG, Didelez V, Ikram MA, Swanson SA. Considering questions before methods in dementia research with competing events and causal goals. American Journal of Epidemiology. 2023. Paper link Code