11.12 BMI

Height may be measured using meters or centimeters. Height was filtered to lie between 1.25m (4ft 1.2 inches) and 2.1m (6ft 10.7 inches). Values between 125 and 210 were assumed to be in cm and so were divided by 100. Weight is measured in kg and was filtered to lie between 30kg(66.1 lb) and 200kg(440.9 lb). BMI was filtered to lie between 12 and 75 kg/m^2.

#Extract height, weight, and BMI
hwbmi <- gp_clinical %>%
  filter(grepl(height_weight_BMI_codes, code)) %>%
  mutate(value = coalesce(as.numeric(value1), as.numeric(value2), as.numeric(value3))) %>%
  filter(!is.na(value) & value > 0) %>%
  mutate(trait = ifelse(grepl("BMI|Body Mass Index", term_description, ignore.case=T), "BMI",
                        ifelse(grepl("Height", term_description, ignore.case=T), "Height",
                               "Weight"))) %>%
  mutate(value = ifelse(trait == "Height" & value > 2.1, value/100, value)) %>% #cm to meters
  filter((trait == "Weight" & value < 200 & value > 30) |
         (trait == "Height" & value < 2.1 & value > 1.25 )|
         (trait == "BMI" & value < 75 & value > 12)) %>% 
  arrange(f.eid, event_dt)

#Here are the resulting terms:
hwbmi %>% group_by(code, term_description, trait) %>% 
  summarize(n = n(), mean=round(mean(value), 1)) %>% 
  arrange(trait, desc(n)) %>% kable()

#Separate the traits
weight <- hwbmi %>% filter(trait == "Weight") 
height <- hwbmi %>% filter(trait == "Height") 
BMI <- hwbmi %>% filter(trait == "BMI") 

#Some records from data provider 2 give weight and BMI
bmi_extra <- weight %>% 
  filter(data_provider == 2) %>% 
  mutate(bmi_extra = as.numeric(value3)) %>% 
  filter(!is.na(bmi_extra)) %>%
  filter(bmi_extra > 12)
head(bmi_extra)

#combine the traits in wider format
weight_pre <- weight %>% select(-c(terminology, value1, value2, value3, trait)) %>%
  dplyr::rename(weight = value, weight_code = code, weight_term_description = term_description)
height_pre <- height %>% select(-c(terminology, value1, value2, value3, trait))  %>%
  dplyr::rename(height = value, height_code = code, height_term_description = term_description)
BMI_pre <- BMI %>% select(-c(terminology, value1, value2, value3, trait))  %>%
  dplyr::rename(BMI = value, BMI_code = code, BMI_term_description = term_description)
bmi_extra_pre <- bmi_extra %>% select(-c(terminology, term_description, value1, value2, value3, trait, value)) %>%
  dplyr::rename(bmi_extra_code = code)

joined_bmi <- full_join(weight_pre, height_pre) %>%
              full_join(BMI_pre) %>%
              full_join(bmi_extra_pre)
head(joined_bmi)

Clean the combined data, filling in missing height values from previous/subsequent measurements or BMI. Filter out cases where the calculated and reported BMI differ by more than 1.5.

cleaned_bmi <- joined_bmi %>%
  group_by(f.eid) %>%
  fill(height, .direction = "downup") %>%
  mutate(BMI_coalesce = round(coalesce(BMI, bmi_extra), 1),
         BMI_calculated = round(weight/(height^2), 1),
         BMI_mismatch = BMI_coalesce != BMI_calculated, 
         BMI_diff = BMI_coalesce - BMI_calculated) %>%
  filter(!(!is.na(BMI_diff) & abs(BMI_diff) > 1.5)) %>%
  mutate(BMI_final = coalesce(BMI_calculated, BMI_coalesce)) %>%
  mutate(height = ifelse(!is.na(height), height, sqrt(weight/BMI))) %>%
  dplyr::rename(BMI_reported = BMI_coalesce, height_carried = height) %>%
  filter(height_carried < 2.1 & height_carried > 1.25 & 
           BMI_final < 75 & BMI_final > 12) %>%
  select(f.eid, data_provider, event_dt, weight, height_carried, 
         BMI_reported, BMI_calculated, BMI = BMI_final) %>% 
  distinct() %>%
  mutate(source="PC")

ggplot(data=cleaned_bmi, aes(x=log10(BMI))) + geom_density() + theme_minimal()