7.3 Prepare diabetic kidney disease code dictionary

7.3.1 Define Keywords

Define inclusion keywords.

inclusion_keyword_patterns <- "chronic kidney|chronic renal|ckd|glomerul|kidney failure|renal failure|uraemi|uremi|nephropath|proteinuri|albuminuri|nephrotic|nephrosis|nephritic|renal manifestation|renal complication|end stage renal|end stage kidney|esrd"

Define inclusion keywords for case.

inclusion_keyword_patterns_case <- "stage 3|stage 4|stage 5|end stage|chronic renal failure|chronic kidney failure|chronic uraemia|G3|G4|G5|A3|diabetes|diabetic|persistent proteinur|persistent albuminur|persistent microalbuminur|persistent macroalbuminur|ns - nephrotic syndrome$|^nephrotic syndrome NOS$"

inclusion_keywords_case <- c("Nephrotic syndrome")

Define exclusion keywords.

exclusion_keyword_patterns <- "nephritic factor|antibody|disease screening|rate testing|predicted stage|acute|induced by|metals|cadmium|lead|mercury|toxic|abortion|pregnancy|gestational|delivery|calculated by|^glomerular filtration rate$|nephropathy screen|invite|incipient|rate using|laboratory study|monitoring administration|Glomerular function test|benign|haemolytic|B12 deficiency|pyonephrosis|analgesic|Exercise|Adrenal|test strip|proteinuria negative|test urine sample"

7.3.2 Define codes

Import diabetic kidney disease code lists and define inclusion codes.

ckd_codelist <- fread("raw_data/kd_code_lists/opensafely-kidney-transplant-2020-07-15.csv") %>%
  full_join(fread("raw_data/kd_code_lists/opensafely-dialysis-2020-07-16.csv")) %>%
  full_join(fread("raw_data/kd_code_lists/opensafely-chronic-kidney-disease-2020-04-14.csv")) %>%
  dplyr::rename(term_description = CTV3PreferredTermDesc) %>%
  dplyr::rename(code = CTV3ID) 
inclusion_codes <- ckd_codelist$code

7.3.3 Create diabetic kidney disease code dictionary

Create kidney disease dictionary.

kidney_disease_dict <- full_dict %>% 
  filter(grepl(inclusion_keyword_patterns,term_description, ignore.case = T) |
           code %in% inclusion_codes) %>%
  filter(!grepl(global_exclusion_keyword_patterns, term_description, ignore.case = T) &
           !grepl(exclusion_keyword_patterns, term_description, ignore.case = T))

Filter to terms that actually appeared in the PC data to expedite review.

kidney_disease_actual <- kidney_disease_dict %>%
  filter(code %in% terms_actual$code)

Generate kidney disease dictionary.

kidney_disease_case_dict <- kidney_disease_actual %>% 
  filter(grepl(inclusion_keyword_patterns_case,term_description,ignore.case = T)|
           term_description %in% inclusion_keywords_case)

Review terms.

kidney_disease_case_review <- kidney_disease_case_dict %>%
  distinct(code, term_description) %>%
  distinct(code, .keep_all = T)

See if there are any additional mapped terms. For kidney disease cases, there are none.

kidney_disease_case_terms_map <- left_join(kidney_disease_case_dict, read_map) %>%
  filter(!is.na(mapped_code)) %>%
  filter(!(mapped_code %in% kidney_disease_case_dict$code)) %>%
  arrange(code) %>%
  select(-terminology_note) %>%
  left_join(full_dict %>%
              dplyr::rename(mapped_code = code, mapped_description = term_description,
                            mapped_terminology=terminology)) %>%
  group_by(mapped_code) %>%
  slice(1) %>%
  distinct()  
kidney_disease_case_terms_map

Generate kidney disease control exclusion dictionary.

kidney_disease_control_exclusion_dict <- 
  kidney_disease_actual %>% filter(!(code %in% kidney_disease_case_dict$code))

Review terms.

kidney_disease_control_exclusion_review <-
  kidney_disease_control_exclusion_dict %>%
  distinct(code, term_description) %>%
  distinct(code, .keep_all =T)

See if there are any additional mapped terms.

kidney_disease_control_exclusion_terms_map <- left_join(kidney_disease_control_exclusion_dict, read_map) %>%
  filter(!is.na(mapped_code)) %>%
  filter(!(mapped_code %in% kidney_disease_control_exclusion_dict$code)) %>%
  arrange(code) %>%
  select(-terminology_note) %>%
  left_join(full_dict %>%
              dplyr::rename(mapped_code = code, mapped_description = term_description,
                            mapped_terminology=terminology)) %>%
  group_by(mapped_code) %>%
  slice(1) %>%
  distinct()  
kidney_disease_control_exclusion_terms_map

Combine the new terms with the original kidney disease control exclusion dictionary.

kidney_disease_control_exclusion_dict_final <- rbind(kidney_disease_control_exclusion_dict,
                 kidney_disease_control_exclusion_terms_map %>%
                   select(code = mapped_code, term_description = mapped_description, 
                          terminology = mapped_terminology, terminology_note)
)

Save kidney disease case dictionary and kidney disease control exclusion code dictionary.

saveRDS(kidney_disease_case_dict,"generated_data/kidney_disease_case_dict.RDS")
saveRDS(kidney_disease_control_exclusion_dict_final,"generated_data/kidney_disease_control_exclusion_dict.RDS")