12 Combine biomarker trajectory extracted from from UKB and PCP data
In the previous chapters 10 and 11, we have created trajectory data for various biomarkers along with macroalbumuminuria or microalbuminuria status. In this chapter, we combine these data to produce the master trajectory data, first occurrence microalbmuinuria event table and first occurrence macroalbuminuria event table. Furthermore, using blood creatinine trajectory data, we phenotype egfr trajectory data which will be used to create prolonged low egfr first occurrence table. The first occurrence macroalbuminuria event table and prolonged low egfr first occurrence table will be used to additionally capture diabetic kidney disease cases, and the first occurrence microalbuminuria event table will be used to phenotype time-to-event data for DKD in chapter 16.
Load packages and functions.
library(tidyverse)
source("functions.R")
Load UKB and PCP biomarker trajectory data.
<- readRDS("generated_data/biomarker_trajectory_pcp.RDS")
biomarker_traj_pcp <- readRDS("generated_data/biomarker_trajectory_ukb.RDS") biomarker_traj_ukb
Combine UKB and PCP biomarker trajectory data.
<- bind_rows(biomarker_traj_pcp,biomarker_traj_ukb %>% select(-visit))
biomarker_traj <- biomarker_traj %>% distinct() biomarker_traj
Load UKB and PCP albuminuria event table.
<- readRDS("generated_data/albuminuria_event_tab_pcp.RDS")
albuminuria_event_tab_pcp <- readRDS("generated_data/albuminuria_event_tab_ukb.RDS") albuminuria_event_tab_ukb
Combine albuminuria event tables.
<- bind_rows(albuminuria_event_tab_pcp,albuminuria_event_tab_ukb) %>% distinct() albuminuria_event_tab
Create micro and microalbuminuria first identification event table.
<- albuminuria_event_tab %>%
microabu_firstoccur filter(type == "microalbuminuria",event == T) %>% select(f.eid,event,event_dt) %>%
group_by(f.eid) %>% arrange(event_dt) %>% slice(1)
<- albuminuria_event_tab %>%
macroabu_firstoccur filter(type == "macroalbuminuria",event == T) %>% select(f.eid,event,event_dt) %>%
group_by(f.eid) %>% arrange(event_dt) %>% slice(1)
Load the demographic table.
<- readRDS("generated_data/demog_selected.RDS") demog
Compute eGFR and create eGFR trajectory data.
<- biomarker_traj[biomarker_traj$biomarker == "creat_blood",] %>%
creat_blood_traj left_join(demog %>% select(f.eid, SEX, DOB))
$age_egfr <-
creat_blood_traj::decimal_date(creat_blood_traj$event_dt) - lubridate::decimal_date(creat_blood_traj$DOB)
lubridate
$measurement_egfr <-
creat_blood_trajmapply(compute_egfr,creat_blood_traj$measurement,creat_blood_traj$age_egfr,creat_blood_traj$SEX)
<- creat_blood_traj %>% select(f.eid,measurement_egfr,event_dt) %>%
egfr_traj rename(measurement = measurement_egfr) %>% mutate(biomarker = "egfr")
<- egfr_traj[!is.na(egfr_traj$measurement),] egfr_traj
Define a “prolonged low eGFR event” as as an event with two or more low eGFR event (eGFR < 60) for more than 90 days without a normal eGFR in between (which can be ascertained only when we have at least two low eGFR event 90 days apart). low_egfr_firstoccur
contains the first occurrence date of the “prolonged low eGFR event.” This event table will be used in phenotyping DKD cases later.
<- egfr_traj %>%
prolonged_low_egfr_firstoccur mutate(eGFR_lt60 = ifelse(measurement < 60, 1, 0)) %>%
group_by(f.eid) %>%
arrange(f.eid, event_dt) %>%
mutate(switched = eGFR_lt60 != lag(eGFR_lt60)) %>%
mutate(run_n = cumsum(ifelse(is.na(switched), 0, switched))) %>%
group_by(f.eid, run_n) %>%
mutate(max_days =
::decimal_date(max(event_dt, na.rm=T)) -
(lubridate::decimal_date(min(event_dt, na.rm=T)))*365.25) %>%
lubridatefilter(eGFR_lt60 & max_days >=90) %>% # for a subject with only one event of egfr < 60, then we would have max_days = 0
ungroup() %>%
group_by(f.eid) %>% arrange(event_dt) %>%
slice(1) %>% # selecting for the start date of the first period where a subject had egfr level less 60 for
# 90+ days.
ungroup() %>% select(f.eid, event_dt)
Add eGFR trajectory data to biomarker_traj
.
<- bind_rows(biomarker_traj,egfr_traj)
biomarker_traj <- biomarker_traj[!is.na(biomarker_traj$measurement) & !is.na(biomarker_traj$event_dt),] biomarker_traj
Break up biomarker trajectory table per biomarker.
<- lapply(unique(biomarker_traj$biomarker),function(biomarker){
biomarker_traj_list_per_biomarker $biomarker == biomarker,]
biomarker_traj[biomarker_traj
})names(biomarker_traj_list_per_biomarker) <- unique(biomarker_traj$biomarker)
Save biomarker trajectory table.
saveRDS(biomarker_traj,"generated_data/biomarker_trajectory.RDS")
Save trajectory table for each biomarker.
for(i in 1:length(biomarker_traj_list_per_biomarker)){
<- names(biomarker_traj_list_per_biomarker)[i]
biomarker saveRDS(biomarker_traj_list_per_biomarker[[i]],paste0("generated_data/","trajectory_",biomarker,".RDS"))
}
Save albuminuria table.
saveRDS(albuminuria_event_tab,"generated_data/albuminuria_event_tab.RDS")
Save macroalbuminuria and microalbuminuria first occurrence table.
saveRDS(macroabu_firstoccur,"generated_data/macroabu_firstoccur.RDS")
saveRDS(microabu_firstoccur,"generated_data/microabu_firstoccur.RDS")
Save prolonged low eGFR first occurrence table.
saveRDS(prolonged_low_egfr_firstoccur,"generated_data/prolonged_low_egfr_firstoccur.RDS")