7.1 Prepare diabetes code dictionary
7.1.1 Define keywords
Define diabetes specific exclusion keywords.
<- "|serum|antibody|remission|relative|association member|high risk of diabetes|suspected diabetes|non-diabetes|breath test|questionnaire|risk score|category score|risk calculator|inhibitor function|C-peptide level|factor binding protein 3|stress test|Insulin tolerance test|Urine screening test|X-ray|NHS Diabetes Prevention|Provision of diabetes clinical summary|diabetes mellitus screen|leaflet given|declined|C1-esterase|pituitary|helicobacter|ineligible|invite|invitation|insulinoma|steroid|secondary diabetes|pre-diabet|prediabet|insipid|provision of written information|not required|national audit|diabetes screen|renal diabetes|non-diabet|^diabetic nurse$|^Diabetic liaison nurse$|jamaica|secondary pancreatic diabetes|driving|neonatal|Addison|PABA test|growth factor|Plasma insulin level|key contact|eligibiliby|CHA2DS2|Professional judgement|Diabetes mellitus: no|non diabetic|information prescription|mother has|preg.|bronzed|Diabetes dietitian|Urine Ketone Test|deleted|refuse|gastropathy|pneumon|Frequency of hypoglycaem|^insulin level$|drug-induced|drug induced|hyperglyceridaemia" dm_specific_exclusion_keyword_patterns
Define diabetes exclusion keywords.
<- paste0(global_exclusion_keyword_patterns,dm_specific_exclusion_keyword_patterns) dm_exclusion_keyword_patterns
Define inclusion keywords.
<- "diabetic|diabetes|diabeto|insulin|hyperglyc|hypoglyc|glycemic control" dm_inclusion_keyword_patterns
7.1.2 Define codes
Define diabetes exclusion codes.
<- c("ZV653", "C3760", "J4z0", "Y3045", "7L1L2", "Y0015", "X789v", "Y7ITk", "Y2200", "42c..", "42W..", "42WZ.","66Ae.", "66Ae0","66AF.","C1…","XaCET","XaCEU", "XaCEV")
dm_exclusion_codes <- paste(c("^42W","^42c"),collapse = '|') dm_exclusion_code_patterns
Import diabetes code lists.
<- fread("raw_data/dm_code_lists/opensafely-diabetes-2020-04-15.csv") %>%
dm_codelist full_join(fread("raw_data/dm_code_lists/opensafely-type-1-diabetes-2020-06-29.csv")
%>% mutate(Category=1)) %>%
full_join(fread("raw_data/dm_code_lists/opensafely-type-2-diabetes-2020-06-29.csv")
%>% mutate(Category=2)) %>%
full_join(fread("raw_data/dm_code_lists/opensafely-diabetes-exeter-group-2020-07-06.csv")
%>% dplyr::rename(CTV3PreferredTermDesc = ctvterm)) %>%
::rename(term_description = CTV3PreferredTermDesc) %>%
dplyr::rename(code = CTV3ID) %>%
dplyrmutate(Category = as.character(Category)) %>%
full_join(
fread("raw_data/dm_code_lists/read_diabetescomplications_caliber.txt") %>%
full_join(fread("raw_data/dm_code_lists/read_diabetes_expanded_caliber.txt")) %>%
full_join(fread("raw_data/dm_code_lists/read_diabetes_caliber.txt")) %>%
::rename(code = Clinical_code) %>%
dplyr::rename(term_description = Clinical_term) %>%
dplyr::rename(Category = `Category_(code)`)
dplyr )
Define inclusion codes.
<- dm_codelist$code dm_inclusion_codes
Define additional diabetes code patterns.
<- paste(c("^66A","^C10","^F420"),collapse = '|') dm_inclusion_code_patterns
7.1.3 Create diabetes code dictionary
Create diabetes code dictionary.
<- full_dict %>%
dm_dict filter(grepl(dm_inclusion_keyword_patterns, term_description, ignore.case = T)|
%in% dm_inclusion_codes|
code grepl(dm_inclusion_code_patterns, code)) %>%
filter(!grepl(dm_exclusion_keyword_patterns, term_description, ignore.case = T),
!(code %in% dm_exclusion_codes),
!grepl(dm_exclusion_code_patterns, code))
We want to filter outcome-specific dictionaries to only those codes that actually occur in the primary care data, to expedite review of the terms that we include. First, read in the distinct terms in PC data.
<- fread("generated_data/entire_gp_clinical_30March2021_formatted.txt",
terms_actual select = "code") %>%
distinct()
Now, filter the DM dictionary to terms that exist in the PC data.
<- dm_dict %>%
dm_dict_actual filter(code %in% terms_actual$code)
<- dm_dict_actual %>%
dm_dict_review distinct(code, term_description) %>%
distinct(code, .keep_all = T)
Now, map the Read v2 terms in the dictionary to CTV3, and vice versa, to make sure we capture equivalent terms. The term mappings are provided by UKB in Resource 592.
<- read_xlsx("raw_data/all_lkps_maps_v3.xlsx", sheet=14)
map23 <- read_xlsx("raw_data/all_lkps_maps_v3.xlsx", sheet=19)
map32 <- map32 %>%
read_map select(READV3_CODE, READV2_CODE, IS_ASSURED) %>%
filter(IS_ASSURED == 1) %>%
::rename(code = READV3_CODE, mapped_code = READV2_CODE) %>%
dplyrmutate(terminology = "read3", mapped_terminology="read2") %>%
select(-IS_ASSURED) %>%
rbind(
%>% select(READV2_CODE, READV3_CODE) %>%
map23 ::rename(code = READV2_CODE, mapped_code = READV3_CODE) %>%
dplyrmutate(terminology = "read2", mapped_terminology ="read3")
%>%
) distinct() %>%
filter(code != mapped_code) %>%
filter(!grepl("\\.\\.", mapped_code) & !grepl("\\.\\.", code)) %>% #remove very broad mappings
filter(code %in% terms_actual$code & mapped_code %in% terms_actual$code) #Only keep pairs that exist in gp_clinical
Get any additional mapped codes to include in the DM dictionary.
<- left_join(dm_dict_actual, read_map) %>%
dm_terms_map filter(!is.na(mapped_code)) %>%
filter(!(mapped_code %in% dm_dict$code)) %>%
arrange(code) %>%
select(-terminology_note) %>%
left_join(full_dict %>%
::rename(mapped_code = code, mapped_description = term_description,
dplyrmapped_terminology=terminology)) %>%
group_by(mapped_code) %>%
slice(1) %>%
distinct() %>%
filter(!(grepl("[Dd]rug induced", mapped_description)))
Combine the new terms with the original DM dictionary.
<- rbind(dm_dict_actual,
dm_dict_final %>%
dm_terms_map select(code = mapped_code, term_description = mapped_description,
terminology = mapped_terminology, terminology_note)
)
Save diabetes code dictionary.
saveRDS(dm_dict_final,"generated_data/dm_dict.RDS")