11.13 Blood Pressure
To to extract blood pressure, it is necessary to consider both the cases where both Systolic and Diastolic BP are provided in the same record, as well as cases where Systolic and Diastolic BP are given in separate records.
<- gp_clinical %>%
bp filter(grepl(BP_codes, code)) %>%
filter(value1 != "" | value2 != "" | value3 != "") %>%
mutate(value1 = as.numeric(value1), value2 = as.numeric(value2), value3 = as.numeric(value3))
#Remove some small values in value3 of unknown meaning
$value3[bp$value3 < 10] <- NA
bp
#Separate the data into two types of record: One where 2 values are given in 1 record (e.g. Systolic and Diastolic) and another where there is only one value (i.e Systolic only or Diastolic only) given per record.
Multiple values per record: Take the larger value to be systolic and the smaller value to be diastolic. Filter out any records where either of these values are 0.
<- bp %>%
bp_mult rowwise() %>%
filter(sum(!is.na(value1), !is.na(value2), !is.na(value3)) == 2) %>%
ungroup() %>%
mutate(Systolic_bp_pc = pmax(value1, value2, value3, na.rm=T)) %>%
mutate(Diastolic_bp_pc = pmin(value1, value2, value3, na.rm=T)) %>%
filter(Systolic_bp_pc != 0 & Diastolic_bp_pc != 0)
One value per record: filter out those with a value of 0.
<- bp %>%
bp_single rowwise() %>%
filter(sum(!is.na(value1), !is.na(value2), !is.na(value3)) == 1) %>%
ungroup() %>%
mutate(value = coalesce(value1, value2, value3)) %>%
filter(value != 0) %>%
arrange(f.eid, event_dt) %>%
mutate(bp_type = ifelse(grepl("systolic", term_description, ignore.case=T), "Systolic_bp_pc",
ifelse(grepl("diastolic", term_description, ignore.case = T), "Diastolic_bp_pc", "Unknown"))) %>%
select(-value1, -value2, -value3) %>%
distinct()
#Here are the results for single type:
%>%
bp_single group_by(code, term_description, bp_type) %>%
summarize(n=n(), mean=round(mean(value, 1))) %>%
arrange(bp_type, desc(n))
Look at the remaining codes to see if they are systolic or diastolic. For many of these, the same code is given twice, each with a different value. Sometimes an record is a duplicate of a systolic or diastolic measurement. If there are two unique values given per ID/date, then we can assume they are systolic (higher) and diastolic (lower). Otherwise, discard that set of values.
#Unknowns - not specified as Diastolic vs. Systolic
<- bp_single %>%
unknowns group_by(f.eid, event_dt) %>%
filter(sum(bp_type == "Unknown") > 0) %>%
mutate(n = length(unique(value))) %>%
filter(n == 2) %>%
mutate(Systolic_bp_pc = max(value)) %>%
mutate(Diastolic_bp_pc = min(value)) %>%
distinct(f.eid, event_dt, value, .keep_all = T) %>%
group_by(f.eid, event_dt) %>%
mutate(code_systolic = code[which(value == max(value))],
code_diastolic = code[which(value == min(value))],
term_description_systolic = term_description[which(value == max(value))],
term_description_diastolic = term_description[which(value == min(value))])
#Prepare for merging
<- bp_single %>%
bp_single_less distinct(f.eid, data_provider, event_dt, value, bp_type, .keep_all = T) %>%
group_by(f.eid, event_dt) %>%
filter(sum(bp_type == "Systolic_bp_pc") == 1 &
sum(bp_type == "Diastolic_bp_pc") == 1) %>%
filter(bp_type != "Unknown")
<- bp_single_less %>%
systolic filter(bp_type == "Systolic_bp_pc") %>%
::rename(code_systolic = code,
dplyrSystolic_bp_pc = value,
term_description_systolic = term_description) %>%
select(-bp_type)
<- bp_single_less %>%
diastolic filter(bp_type == "Diastolic_bp_pc") %>%
::rename(code_diastolic = code,
dplyrDiastolic_bp_pc = value,
term_description_diastolic = term_description) %>%
select(-bp_type)
<- full_join(systolic, diastolic)
bp_single_wide head(bp_single_wide)
Combine each of the cleaned subsets and implement some common sense filters (45 < Systolic bp < 300, 30 < Diastolic bp < Systolic bp).
<- unknowns %>%
unknowns_less select(-term_description, -value, -bp_type, -code, -n) %>%
distinct()
<- bp_mult %>%
bp_mult_less ::rename(term_description_both = term_description) %>%
dplyr::rename(code_both = code) %>%
dplyrselect(f.eid, event_dt, data_provider, terminology, Systolic_bp_pc, Diastolic_bp_pc, code_both, term_description_both) %>%
distinct()
<- full_join(unknowns_less, bp_single_wide) %>%
full_bp_clean full_join(bp_mult_less) %>%
filter(Systolic_bp_pc > Diastolic_bp_pc) %>%
filter(Systolic_bp_pc >= 45 & Systolic_bp_pc <= 300) %>%
filter(Diastolic_bp_pc >= 30) %>%
distinct(f.eid, event_dt, Systolic_bp_pc, Diastolic_bp_pc, .keep_all=T)
head(full_bp_clean)
ggplot(data=full_bp_clean, aes(x=log10(Systolic_bp_pc))) + geom_density() + theme_minimal()
ggplot(data=full_bp_clean, aes(x=log10(Diastolic_bp_pc))) + geom_density() + theme_minimal()