11.12 BMI
Height may be measured using meters or centimeters. Height was filtered to lie between 1.25m (4ft 1.2 inches) and 2.1m (6ft 10.7 inches). Values between 125 and 210 were assumed to be in cm and so were divided by 100. Weight is measured in kg and was filtered to lie between 30kg(66.1 lb) and 200kg(440.9 lb). BMI was filtered to lie between 12 and 75 kg/m^2.
#Extract height, weight, and BMI
<- gp_clinical %>%
hwbmi filter(grepl(height_weight_BMI_codes, code)) %>%
mutate(value = coalesce(as.numeric(value1), as.numeric(value2), as.numeric(value3))) %>%
filter(!is.na(value) & value > 0) %>%
mutate(trait = ifelse(grepl("BMI|Body Mass Index", term_description, ignore.case=T), "BMI",
ifelse(grepl("Height", term_description, ignore.case=T), "Height",
"Weight"))) %>%
mutate(value = ifelse(trait == "Height" & value > 2.1, value/100, value)) %>% #cm to meters
filter((trait == "Weight" & value < 200 & value > 30) |
== "Height" & value < 2.1 & value > 1.25 )|
(trait == "BMI" & value < 75 & value > 12)) %>%
(trait arrange(f.eid, event_dt)
#Here are the resulting terms:
%>% group_by(code, term_description, trait) %>%
hwbmi summarize(n = n(), mean=round(mean(value), 1)) %>%
arrange(trait, desc(n)) %>% kable()
#Separate the traits
<- hwbmi %>% filter(trait == "Weight")
weight <- hwbmi %>% filter(trait == "Height")
height <- hwbmi %>% filter(trait == "BMI")
BMI
#Some records from data provider 2 give weight and BMI
<- weight %>%
bmi_extra filter(data_provider == 2) %>%
mutate(bmi_extra = as.numeric(value3)) %>%
filter(!is.na(bmi_extra)) %>%
filter(bmi_extra > 12)
head(bmi_extra)
#combine the traits in wider format
<- weight %>% select(-c(terminology, value1, value2, value3, trait)) %>%
weight_pre ::rename(weight = value, weight_code = code, weight_term_description = term_description)
dplyr<- height %>% select(-c(terminology, value1, value2, value3, trait)) %>%
height_pre ::rename(height = value, height_code = code, height_term_description = term_description)
dplyr<- BMI %>% select(-c(terminology, value1, value2, value3, trait)) %>%
BMI_pre ::rename(BMI = value, BMI_code = code, BMI_term_description = term_description)
dplyr<- bmi_extra %>% select(-c(terminology, term_description, value1, value2, value3, trait, value)) %>%
bmi_extra_pre ::rename(bmi_extra_code = code)
dplyr
<- full_join(weight_pre, height_pre) %>%
joined_bmi full_join(BMI_pre) %>%
full_join(bmi_extra_pre)
head(joined_bmi)
Clean the combined data, filling in missing height values from previous/subsequent measurements or BMI. Filter out cases where the calculated and reported BMI differ by more than 1.5.
<- joined_bmi %>%
cleaned_bmi group_by(f.eid) %>%
fill(height, .direction = "downup") %>%
mutate(BMI_coalesce = round(coalesce(BMI, bmi_extra), 1),
BMI_calculated = round(weight/(height^2), 1),
BMI_mismatch = BMI_coalesce != BMI_calculated,
BMI_diff = BMI_coalesce - BMI_calculated) %>%
filter(!(!is.na(BMI_diff) & abs(BMI_diff) > 1.5)) %>%
mutate(BMI_final = coalesce(BMI_calculated, BMI_coalesce)) %>%
mutate(height = ifelse(!is.na(height), height, sqrt(weight/BMI))) %>%
::rename(BMI_reported = BMI_coalesce, height_carried = height) %>%
dplyrfilter(height_carried < 2.1 & height_carried > 1.25 &
< 75 & BMI_final > 12) %>%
BMI_final select(f.eid, data_provider, event_dt, weight, height_carried,
BMI = BMI_final) %>%
BMI_reported, BMI_calculated, distinct() %>%
mutate(source="PC")
ggplot(data=cleaned_bmi, aes(x=log10(BMI))) + geom_density() + theme_minimal()