12 Combine biomarker trajectory extracted from from UKB and PCP data

In the previous chapters 10 and 11, we have created trajectory data for various biomarkers along with macroalbumuminuria or microalbuminuria status. In this chapter, we combine these data to produce the master trajectory data, first occurrence microalbmuinuria event table and first occurrence macroalbuminuria event table. Furthermore, using blood creatinine trajectory data, we phenotype egfr trajectory data which will be used to create prolonged low egfr first occurrence table. The first occurrence macroalbuminuria event table and prolonged low egfr first occurrence table will be used to additionally capture diabetic kidney disease cases, and the first occurrence microalbuminuria event table will be used to phenotype time-to-event data for DKD in chapter 16.

Load packages and functions.

library(tidyverse)
source("functions.R")

Load UKB and PCP biomarker trajectory data.

biomarker_traj_pcp <- readRDS("generated_data/biomarker_trajectory_pcp.RDS")
biomarker_traj_ukb <- readRDS("generated_data/biomarker_trajectory_ukb.RDS")

Combine UKB and PCP biomarker trajectory data.

biomarker_traj <- bind_rows(biomarker_traj_pcp,biomarker_traj_ukb %>% select(-visit))
biomarker_traj <- biomarker_traj %>% distinct()

Load UKB and PCP albuminuria event table.

albuminuria_event_tab_pcp <- readRDS("generated_data/albuminuria_event_tab_pcp.RDS")
albuminuria_event_tab_ukb <- readRDS("generated_data/albuminuria_event_tab_ukb.RDS")

Combine albuminuria event tables.

albuminuria_event_tab <- bind_rows(albuminuria_event_tab_pcp,albuminuria_event_tab_ukb) %>% distinct()

Create micro and microalbuminuria first identification event table.

microabu_firstoccur <- albuminuria_event_tab %>% 
  filter(type == "microalbuminuria",event == T) %>% select(f.eid,event,event_dt) %>%
  group_by(f.eid) %>% arrange(event_dt) %>% slice(1)
macroabu_firstoccur <- albuminuria_event_tab %>% 
  filter(type == "macroalbuminuria",event == T) %>% select(f.eid,event,event_dt) %>%
  group_by(f.eid) %>% arrange(event_dt) %>% slice(1)

Load the demographic table.

demog <- readRDS("generated_data/demog_selected.RDS")

Compute eGFR and create eGFR trajectory data.

creat_blood_traj <- biomarker_traj[biomarker_traj$biomarker == "creat_blood",] %>%
  left_join(demog %>% select(f.eid, SEX, DOB))

creat_blood_traj$age_egfr <-
  lubridate::decimal_date(creat_blood_traj$event_dt) - lubridate::decimal_date(creat_blood_traj$DOB)

creat_blood_traj$measurement_egfr <-
  mapply(compute_egfr,creat_blood_traj$measurement,creat_blood_traj$age_egfr,creat_blood_traj$SEX)

egfr_traj <- creat_blood_traj %>% select(f.eid,measurement_egfr,event_dt) %>%
  rename(measurement = measurement_egfr) %>% mutate(biomarker = "egfr")

egfr_traj <- egfr_traj[!is.na(egfr_traj$measurement),]

Define a “prolonged low eGFR event” as as an event with two or more low eGFR event (eGFR < 60) for more than 90 days without a normal eGFR in between (which can be ascertained only when we have at least two low eGFR event 90 days apart). low_egfr_firstoccur contains the first occurrence date of the “prolonged low eGFR event.” This event table will be used in phenotyping DKD cases later.

prolonged_low_egfr_firstoccur <- egfr_traj %>% 
  mutate(eGFR_lt60 = ifelse(measurement < 60, 1, 0)) %>%
  group_by(f.eid) %>%
  arrange(f.eid, event_dt) %>%
    mutate(switched = eGFR_lt60 != lag(eGFR_lt60)) %>%
    mutate(run_n = cumsum(ifelse(is.na(switched), 0, switched))) %>%
    group_by(f.eid, run_n) %>% 
    mutate(max_days = 
             (lubridate::decimal_date(max(event_dt, na.rm=T)) - 
                lubridate::decimal_date(min(event_dt, na.rm=T)))*365.25) %>%
    filter(eGFR_lt60 & max_days >=90) %>% # for a subject with only one event of egfr < 60, then we would have max_days = 0
    ungroup() %>% 
    group_by(f.eid) %>% arrange(event_dt) %>%
    slice(1) %>% # selecting for the start date of the first period where a subject had egfr level less 60 for 
                                  # 90+ days.
    ungroup() %>% select(f.eid, event_dt)

Add eGFR trajectory data to biomarker_traj.

biomarker_traj <- bind_rows(biomarker_traj,egfr_traj)
biomarker_traj <- biomarker_traj[!is.na(biomarker_traj$measurement) & !is.na(biomarker_traj$event_dt),]

Break up biomarker trajectory table per biomarker.

biomarker_traj_list_per_biomarker <- lapply(unique(biomarker_traj$biomarker),function(biomarker){
  biomarker_traj[biomarker_traj$biomarker == biomarker,]
})
names(biomarker_traj_list_per_biomarker) <- unique(biomarker_traj$biomarker)

Save biomarker trajectory table.

saveRDS(biomarker_traj,"generated_data/biomarker_trajectory.RDS")

Save trajectory table for each biomarker.

for(i in 1:length(biomarker_traj_list_per_biomarker)){
    biomarker <- names(biomarker_traj_list_per_biomarker)[i]
    saveRDS(biomarker_traj_list_per_biomarker[[i]],paste0("generated_data/","trajectory_",biomarker,".RDS"))
}

Save albuminuria table.

saveRDS(albuminuria_event_tab,"generated_data/albuminuria_event_tab.RDS")

Save macroalbuminuria and microalbuminuria first occurrence table.

saveRDS(macroabu_firstoccur,"generated_data/macroabu_firstoccur.RDS")
saveRDS(microabu_firstoccur,"generated_data/microabu_firstoccur.RDS")

Save prolonged low eGFR first occurrence table.

saveRDS(prolonged_low_egfr_firstoccur,"generated_data/prolonged_low_egfr_firstoccur.RDS")