data.table 실전

file 읽어오기

1-1) sas 파일을 fst 파일/csv 파일로 변경

setDTthreads(0)   # set core number (0: all)

for (v in c("bnc", "bnd", "m20", "m30", "m40", "m60", "inst", "g1e_0208", "g1e_0915")) {
  # read sas file
  read_sas(file.path("data", paste0("nsc2_", v, "_1000.sas7bdat"))) %>%
    # wrtie fst file
    write_fst(file.path("data", paste0("nsc2_", v, "_1000.fst"))) %>% 
    # write csv file
    fwrite(file.path("data", paste0("nsc2_", v, "_1000.csv")))
}

file 읽어오기

1-2) fst파일 / csv 파일 읽어오기

# fst 파일 읽어오기
inst <- read_fst("data/nsc2_inst_1000.fst", as.data.table = T)
bnc <- read_fst("data/nsc2_bnc_1000.fst", as.data.table = T)
bnd <- read_fst("data/nsc2_bnd_1000.fst", as.data.table = T)
m20 <- read_fst("data/nsc2_m20_1000.fst", as.data.table = T)
m30 <- read_fst("data/nsc2_m30_1000.fst", as.data.table = T)
m40 <- read_fst("data/nsc2_m40_1000.fst", as.data.table = T)
m60 <- read_fst("data/nsc2_m60_1000.fst", as.data.table = T)
g1e_0915 <- read_fst("data/nsc2_g1e_0915_1000.fst", as.data.table = T)

# csv 파일 읽어오기
inst <- fread("data/nsc2_inst_1000.csv")
bnc <- fread("data/nsc2_bnc_1000.csv")
bnd <- fread("data/nsc2_bnd_1000.csv")
m20 <- fread("data/nsc2_m20_1000.csv")
m30 <- fread("data/nsc2_m30_1000.csv")
m40 <- fread("data/nsc2_m40_1000.csv")
m60 <- fread("data/nsc2_m60_1000.csv")
g1e_0915 <- fread("data/nsc2_g1e_0915_1000.csv")

file 읽어오기

1-3) 데이터 전처리

  • bnd에서 새로운 변수 Deathdate 만들기
bnd <- bnd[, Deathdate := (lubridate::ym(DTH_YYYYMM) %>% lubridate::ceiling_date(unit = "month") - 1)][]


RN_INDI BTH_YYYY DTH_YYYYMM COD1 COD2 Deathdate
490260 1923 201112 R54 2011-12-31
549702 1924 200901 R54 2009-01-31
344905 1924 201110 R54 2011-10-31
476371 1931 201511 R54 2015-11-30
69579 1929 201202 R54 2012-02-29

file 읽어오기

1-3) 데이터 전처리

  • m40에서 SICK_CLSF_TYPE가 3인 데이터는 제외
# 1: 주상병, 2: 부상병, 3: 배제된 상병
m40 <- m40[SICK_CLSF_TYPE %in% c(1, 2, NA)]   # exclude 3

Inclusion

Hypertensive disease의 질병코드

code.HTN <- paste(paste0("I", 10:15), collapse = "|")
code.HTN
[1] "I10|I11|I12|I13|I14|I15"


2006년 1월 1일 이후Hypertensive disease에 대한 진료 받은 사람만 추출

data.start <- m20[like(SICK_SYM1, code.HTN) & (MDCARE_STRT_DT >= 20060101), .(Indexdate = min(MDCARE_STRT_DT)), keyby = "RN_INDI"]
# 사람 별로 첫 진단일만 뽑음
RN_INDI Indexdate
6374 20060123
7386 20060221
13546 20060808
23682 20080922
27490 20060103
36714 20100119
39217 20130402
46621 20110324
51049 20061123
56993 20090613
58649 20101207
60216 20090911
66424 20060112
69579 20060125
72225 20060124
79379 20100402
80234 20060216
92198 20060113
98052 20111103
102206 20090419
109331 20070905
125422 20090616
127519 20060310
134613 20071103
138307 20061023
138407 20060223
145424 20081002
149142 20150421
160611 20060203
161624 20060210
177099 20060131
179785 20070522
181909 20060619
186069 20080922
195513 20060313
196293 20060127
203673 20060120
207927 20101112
210270 20060102
215084 20091209
219397 20070129
230178 20060107
237845 20110518
238542 20090202
242807 20110423
242976 20080604
243053 20090610
259727 20080703
266165 20060214
291910 20070730
294913 20060127
297087 20150129
300860 20120618
303645 20070704
311226 20131221
314169 20090722
317148 20070810
318399 20060302
322147 20121217
345207 20101106
347907 20060124
349232 20100504
354989 20150106
356551 20110208
363994 20060206
364020 20060308
364892 20060116
367481 20120706
370920 20060704
372716 20091022
375694 20060119
376332 20060829
387855 20110822
393830 20090715
394639 20060505
395781 20151201
412761 20080626
416450 20070731
422180 20070830
427754 20130409
435300 20060109
437139 20060306
437237 20100508
441816 20060203
443755 20060210
444042 20070614
453742 20060113
459837 20081120
473993 20060202
476371 20100503
480569 20060125
483473 20060404
483546 20061216
484978 20060204
486441 20060119
490204 20060118
497301 20060327
501161 20060105
505228 20130402
517447 20111005
518558 20060211
518792 20110109
525200 20060105
529690 20130109
530990 20151125
540586 20080603
546772 20060724
551252 20080520
558657 20060128
559420 20141222
562083 20060103
562142 20130327
575578 20100210
588516 20080428
591264 20150319
606125 20111027
606567 20070210
611137 20120817
611279 20090123
613708 20071129
621264 20060106
624881 20071019
626896 20100714
628858 20110615
630422 20110727
634861 20060315
637115 20150914
637160 20090210
637320 20060107
652416 20130924
654182 20110808
661154 20150704
665633 20080522
668438 20091126
669498 20060109
689889 20110225
693283 20090604
694083 20130208
699907 20060203
716799 20130430
720544 20060119
720792 20070317
725112 20060223
734700 20060124
736688 20060210
737082 20060121
737871 20060203
747424 20111125
749074 20150319
759885 20131022
762129 20060102
768249 20060107
771097 20060109
775244 20061026
781445 20071012
781532 20100410
784232 20110425
792082 20111212
797920 20060209
802213 20150923
814015 20060317
814600 20060824
820458 20060626
828529 20111220
836494 20110528
839897 20091021
853696 20060819
863628 20100421
870692 20060125
874800 20120105
876492 20060202
883814 20060331
888305 20120925
888765 20100927
889062 20130309
895161 20101210
898484 20060218
899654 20060202
914987 20060117
917270 20060205
925523 20101005
934252 20151127
934726 20090128
934772 20081013
942671 20150908
944984 20101125
951170 20130831
970799 20070502
975124 20080628
976635 20090923
977635 20060220
978767 20110928
979151 20131112
985414 20060118
986287 20080820

Exclusion

2006년 1월 1일 이전에 이미 진단 받은 기록이 있는 사람 제외

# 제외 대상
excl <- m40[(MCEX_SICK_SYM %like% code.HTN) & (MDCARE_STRT_DT < 20060101), .SD[1], .SDcols = c("MDCARE_STRT_DT"), keyby = "RN_INDI"]
RN_INDI MDCARE_STRT_DT
6374 20021114
7386 20020306
27490 20020401
66424 20030721
69579 20021203
72225 20031114
79250 20040801
79379 20030922
80234 20020902
92198 20041008
109331 20020102
121496 20021113
127519 20030314
134613 20030702
138307 20050901
138407 20020408
157200 20020206
160611 20020107
161624 20031112
177099 20051222
177391 20051110
179785 20030707
181909 20020902
186069 20050607
195513 20031108
196293 20021102
203673 20050808
210270 20031208
225956 20031110
230178 20041002
238542 20050118
259727 20030121
266165 20051006
277202 20020823
291910 20020204
294913 20020205
297087 20020401
303645 20021101
311226 20050523
314169 20041127
318399 20021106
335948 20031022
347907 20020402
356971 20030802
363994 20020702
364020 20031006
364304 20030212
364892 20050816
370920 20040517
375694 20040202
393830 20021007
412761 20031121
412901 20031010
422180 20031111
422663 20041216
435300 20030103
435444 20050704
437139 20020201
441816 20020201
443755 20030602
450451 20021230
453742 20051208
459837 20040909
473993 20021202
480569 20020902
483473 20020114
483546 20021203
486441 20040518
490204 20040302
490260 20020302
497301 20020305
501161 20021121
518558 20041213
525200 20041130
558657 20050207
562083 20020906
575578 20051029
588516 20020319
606567 20050616
621264 20050308
624881 20021202
626896 20030805
628858 20041015
631331 20050418
634861 20021211
637160 20050923
637320 20041213
642899 20021005
657227 20051208
668438 20031107
669498 20020401
681763 20020204
716799 20021108
720544 20021002
725112 20051101
725720 20041227
734700 20031218
736688 20021118
737082 20021014
762129 20031229
768249 20030718
771097 20021111
781445 20040202
797920 20020702
814015 20020402
814600 20021002
817595 20020725
819784 20031121
820458 20020605
853696 20021203
870692 20051006
876492 20021202
887943 20031226
888305 20051226
889062 20050615
899654 20020612
914987 20020201
917270 20021209
942671 20031013
948365 20021211
956735 20030509
970799 20020201
971021 20020604
976635 20051123
977635 20021202
985414 20020603
986287 20050224
# anti-join으로 excl 제외하고 Indexdate의 타입을 character에서 date로 변경
data.incl <- data.start[!excl, on = "RN_INDI"][, Indexdate := as.Date(as.character(Indexdate), format = "%Y%m%d")][]
RN_INDI Indexdate
13546 2006-08-08
23682 2008-09-22
36714 2010-01-19
39217 2013-04-02
46621 2011-03-24
51049 2006-11-23
56993 2009-06-13
58649 2010-12-07
60216 2009-09-11
98052 2011-11-03
102206 2009-04-19
125422 2009-06-16
145424 2008-10-02
149142 2015-04-21
207927 2010-11-12
215084 2009-12-09
219397 2007-01-29
237845 2011-05-18
242807 2011-04-23
242976 2008-06-04
243053 2009-06-10
300860 2012-06-18
317148 2007-08-10
322147 2012-12-17
345207 2010-11-06
349232 2010-05-04
354989 2015-01-06
356551 2011-02-08
367481 2012-07-06
372716 2009-10-22
376332 2006-08-29
387855 2011-08-22
394639 2006-05-05
395781 2015-12-01
416450 2007-07-31
427754 2013-04-09
437237 2010-05-08
444042 2007-06-14
476371 2010-05-03
484978 2006-02-04
505228 2013-04-02
517447 2011-10-05
518792 2011-01-09
529690 2013-01-09
530990 2015-11-25
540586 2008-06-03
546772 2006-07-24
551252 2008-05-20
559420 2014-12-22
562142 2013-03-27
591264 2015-03-19
606125 2011-10-27
611137 2012-08-17
611279 2009-01-23
613708 2007-11-29
630422 2011-07-27
637115 2015-09-14
652416 2013-09-24
654182 2011-08-08
661154 2015-07-04
665633 2008-05-22
689889 2011-02-25
693283 2009-06-04
694083 2013-02-08
699907 2006-02-03
720792 2007-03-17
737871 2006-02-03
747424 2011-11-25
749074 2015-03-19
759885 2013-10-22
775244 2006-10-26
781532 2010-04-10
784232 2011-04-25
792082 2011-12-12
802213 2015-09-23
828529 2011-12-20
836494 2011-05-28
839897 2009-10-21
863628 2010-04-21
874800 2012-01-05
883814 2006-03-31
888765 2010-09-27
895161 2010-12-10
898484 2006-02-18
925523 2010-10-05
934252 2015-11-27
934726 2009-01-28
934772 2008-10-13
944984 2010-11-25
951170 2013-08-31
975124 2008-06-28
978767 2011-09-28
979151 2013-11-12

Exclusion

data.incl에 age, sex, death 변수 추가

  • bnd에 death 나타내는 변수, bnc에 성별 변수 있음 ⇒ data.incl에 추가
data.asd <- merge(bnd, bnc[, .(SEX = SEX[1]), keyby = "RN_INDI"], by = "RN_INDI") %>% 
  merge(data.incl, by = "RN_INDI") %>%
  .[, `:=`(Age = year(Indexdate) - as.integer(substr(BTH_YYYY, 1, 4)),
           Death = as.integer(!is.na(DTH_YYYYMM)),
           Day_FU = as.integer(pmin(as.Date("2015-12-31"), Deathdate, na.rm = T) - Indexdate))] %>%
  .[, -c("BTH_YYYY", "DTH_YYYYMM", "Deathdate")]   # 변수 제거

Exclusion

data.incl에 age, sex, death 변수 추가

  • Age: 진단 받았을 때의 나이
  • Death: DTH_YYYMM에 값이 존재하면 사망한 것
  • Day_FU: follow-up한 일수
data.asd <- merge(bnd, bnc[, .(SEX = SEX[1]), keyby = "RN_INDI"], by = "RN_INDI") %>% 
  merge(data.incl, by = "RN_INDI") %>%
  .[, `:=`(Age = year(Indexdate) - as.integer(substr(BTH_YYYY, 1, 4)),
           Death = as.integer(!is.na(DTH_YYYYMM)),  
           Day_FU = as.integer(pmin(as.Date("2015-12-31"), Deathdate, na.rm = T) - Indexdate))] %>%
  .[, -c("BTH_YYYY", "DTH_YYYYMM", "Deathdate")]   # 변수 제거

Exclusion

data.incl에 age, sex, death 변수 추가

data.asd <- merge(bnd, bnc[, .(SEX = SEX[1]), keyby = "RN_INDI"], by = "RN_INDI") %>% 
  merge(data.incl, by = "RN_INDI") %>%
  .[, `:=`(Age = year(Indexdate) - as.integer(substr(BTH_YYYY, 1, 4)),
           Death = as.integer(!is.na(DTH_YYYYMM)),  
           Day_FU = as.integer(pmin(as.Date("2015-12-31"), Deathdate, na.rm = T) - Indexdate))] %>%
  .[, -c("BTH_YYYY", "DTH_YYYYMM", "Deathdate")]   # 변수 제거


RN_INDI COD1 COD2 SEX Indexdate Age Death Day_FU
13546 1 2006-08-08 45 0 3432
23682 1 2008-09-22 53 0 2656
36714 2 2010-01-19 64 0 2172
39217 1 2013-04-02 56 0 1003
46621 1 2011-03-24 51 0 1743
51049 1 2006-11-23 21 0 3325

CCI 계산

과거력 list

code.cci <- list(
  MI = c("I21", "I22", "I252"),
  CHF = c(paste0("I", c("099", 110, 130, 132, 255, 420, 425:429, 43, 50)), "P290"),
  Peripheral_VD = c(paste0("I", c(70, 71, 731, 738, 739, 771, 790, 792)), paste0("K", c(551, 558, 559)), "Z958", "Z959"),
  Cerebro_VD = c("G45", "G46", "H340", paste0("I", 60:69)),
  Dementia = c(paste0("F0", c(0:3, 51)), "G30", "G311"),
  Chronic_pulmonary_dz = c("I278", "I279", paste0("J", c(40:47, 60:67, 684, 701, 703))),
  Rheumatologic_dz = paste0("M", c("05", "06", 315, 32:34, 351, 353, 360)),
  Peptic_ulcer_dz = paste0("K", 25:28),
  Mild_liver_dz = c("B18", paste0("K", c(700:703, 709, 713:715, 717, 73, 74, 760, 762:764, 768, 769)), "Z944"),
  DM_no_complication = paste0("E", c(100, 101, 106, 108:111, 116, 118:121, 126, 128:131, 136, 138:141, 146, 148, 149)),
  DM_complication = paste0("E", c(102:105, 107, 112:115, 117, 122:125, 127, 132:135, 137, 142:145, 147)),
  Hemi_paraplegia = paste0("G", c("041", 114, 801, 802, 81, 82, 830:834, 839)),
  Renal_dz = c("I120", "I131", paste0("N", c("032", "033", "034", "035", "036", "037", "052", "053", "054", "055", "056", "057", 18, 19, 250)), paste0("Z", c(490:492, 940, 992))),
  Malig_with_Leuk_lymphoma = paste0("C", c(paste0("0", 0:9), 10:26, 30:34, 37:41, 43, 45:58, 60:76, 81:85, 88, 90, 97)),
  Moderate_severe_liver_dz = c(paste0("I", c(85, 859, 864, 982)), paste0("K", c(704, 711, 721, 729, 765:767))),
  Metastatic_solid_tumor = paste0("C", 77:80),
  AIDS_HIV = paste0("B", c(20:22, 24)))

CCI 계산

각 병에 해당하는 CCI score 지정

cciscore <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 6, 6, 2)   # CCI score
names(cciscore) <- names(code.cci)


cciscore

                      MI                      CHF            Peripheral_VD 
                       1                        1                        1 
              Cerebro_VD                 Dementia     Chronic_pulmonary_dz 
                       1                        1                        1 
        Rheumatologic_dz          Peptic_ulcer_dz            Mild_liver_dz 
                       1                        1                        1 
      DM_no_complication          DM_complication          Hemi_paraplegia 
                       1                        2                        2 
                Renal_dz Malig_with_Leuk_lymphoma Moderate_severe_liver_dz 
                       2                        3                        6 
  Metastatic_solid_tumor                 AIDS_HIV 
                       6                        2 

CCI 계산

과거력 확인: Indexdate를 기준으로 365일 내로 진단 날(Incidate)이 존재하면 과거에 병력 존재하는 것으로 간주 - 이 경우 Indexdate와 Incidate를 이어주기 위해 data.table의 roll option을 사용

info.cci <- mclapply(names(code.cci), function(x){
  data.asd[, MDCARE_STRT_DT := Indexdate]
  dt <- m40[like(MCEX_SICK_SYM, paste(code.cci[[x]], collapse = "|"))][, MDCARE_STRT_DT := as.Date(as.character(MDCARE_STRT_DT), format = "%Y%m%d")][, .(RN_INDI, MDCARE_STRT_DT, Incidate = MDCARE_STRT_DT)]  
  dt[, .SD[1], keyby = c("RN_INDI", "MDCARE_STRT_DT")][data.asd, on = c("RN_INDI", "MDCARE_STRT_DT"), roll = 365][, ev := as.integer(!is.na(Incidate))][]$ev * cciscore[x]
}, mc.cores = 4) %>%  do.call(cbind, .) %>% cbind(rowSums(.))

colnames(info.cci) <- c(paste0("Prev_", names(code.cci)), "CCI")   # set column names


Prev_MI Prev_CHF Prev_Peripheral_VD Prev_Cerebro_VD Prev_Dementia Prev_Chronic_pulmonary_dz Prev_Rheumatologic_dz Prev_Peptic_ulcer_dz Prev_Mild_liver_dz Prev_DM_no_complication Prev_DM_complication Prev_Hemi_paraplegia Prev_Renal_dz Prev_Malig_with_Leuk_lymphoma Prev_Moderate_severe_liver_dz Prev_Metastatic_solid_tumor Prev_AIDS_HIV CCI
0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 2
0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1
0 0 1 0 0 0 0 0 0 1 0 0 0 3 0 0 0 5
0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1
0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 2

과거 약 복용 이력 확인

과거 확인할 약: Glucocorticoids, Aspirin, Clopidogrel

code.drug <- list(
  Glucocorticoids = c("116401ATB", "140801ATB", "141901ATB", "141903ATB", "160201ATB", "170901ATB", "170906ATB", "193302ATB",
                      "193305ATB", "217034ASY", "217035ASY", "217001ATB", "243201ATB", "243202ATB", "243203ATB"),
  Aspirin = c("110701ATB", "110702ATB", "111001ACE", "111001ATE", "489700ACR", "517900ACE", "517900ATE", "667500ACE"),
  Clopidogrel = c("136901ATB", "492501ATB", "495201ATB", "498801ATB", "501501ATB", "517900ACE", "517900ATE", "667500ACE")
)

과거 약 복용 이력 확인

Indexdate를 기준으로 365일 내로 약 처방일 기록(inidate)이 존재하면 보다 예전이면 과거에 약 복용한 것으로 간주 - 이 경우 Indexdate와 inidate를 이어주기 위해 data.table의 roll option을 사용

info.prevmed <- mclapply(code.drug, function(x){
  data.asd[, MDCARE_STRT_DT := Indexdate]
  dt <- m60[GNL_NM_CD %in% x][, MDCARE_STRT_DT := as.Date(as.character(MDCARE_STRT_DT), format = "%Y%m%d")][, .(RN_INDI, MDCARE_STRT_DT, inidate = MDCARE_STRT_DT)] 
  dt[, .SD[1], keyby = c("RN_INDI", "MDCARE_STRT_DT")][data.asd, on = c("RN_INDI", "MDCARE_STRT_DT"), roll = 365][, ev := as.integer(!is.na(inidate))][]$ev
}, mc.cores = 3) %>% do.call(cbind, .)

colnames(info.prevmed) <- paste0("Prev_", names(code.drug))  # set column names


Prev_Glucocorticoids Prev_Aspirin Prev_Clopidogrel
0 0 0
0 0 0
0 0 0
1 0 0
1 0 0

Outcome

MI 발병했는지 확인: MI가 발병한 날(MIdate)이 Indexdate을 기준으로 그 뒤에 기록이 있으면 발병한 것으로 간주 - 이 경우 Indexdate와 inidate를 이어주기 위해 data.table의 roll option 중 -Inf을 사용

  • MI: MI 발병했으면 1
  • MIday: MI 발병하기까지 일수
data.asd[, MDCARE_STRT_DT := Indexdate]
info.MI <- m40 %>% 
  .[like(MCEX_SICK_SYM, paste(code.cci[["MI"]], collapse = "|")), .(RN_INDI, MDCARE_STRT_DT = as.Date(as.character(MDCARE_STRT_DT), format = "%Y%m%d"), MIdate = as.Date(as.character(MDCARE_STRT_DT), format = "%Y%m%d"))] %>%
  .[data.asd, on = c("RN_INDI", "MDCARE_STRT_DT"), roll = -Inf] %>% 
  .[Indexdate <= MIdate] %>% 
  .[order(MIdate), .(MI = 1, MIday = as.integer(MIdate - Indexdate)[1]), keyby = "RN_INDI"]


RN_INDI MI MIday
484978 1 0
562142 1 0
874800 1 571

심화

약 복용기간 계산

Proton-pump inhibitor: 양성자 펌프 억제제

code.ppi <-  c("367201ACH", "367201ATB", "367201ATD", "367202ACH", "367202ATB", 
               "367202ATD", "498001ACH", "498002ACH", "509901ACH", "509902ACH", 
               "670700ATB", "204401ACE", "204401ATE", "204402ATE", "204403ATE", 
               "664500ATB", "640200ATB", "664500ATB", "208801ATE", "208802ATE", 
               "656701ATE", "519201ATE", "519202ATE", "656701ATE", "519203ATE", 
               "222201ATE", "222202ATE", "222203ATE", "181301ACE", "181301ATD", 
               "181302ACE", "181302ATD", "181302ATE", "621901ACR", "621902ACR", 
               "505501ATE")

약 복용기간 계산

PPI 복용한 데이터만 추출

m60.drug <- m60[GNL_NM_CD %in% code.ppi][order(MDCARE_STRT_DT, TOT_MCNT), .SD[.N], keyby = "RN_KEY"]

# MDCARE_STRT_DT의 타입을 date로 변경
m60.drug[, MDCARE_STRT_DT := lubridate::ymd(MDCARE_STRT_DT)]


RN_KEY RN_INDI MDCARE_STRT_DT FORM_CD MCARE_DESC_LN_NO CLSF_PTTN_CD MPRSC_TIME1_TUYAK_CPCT MPRSC_DD1_TUYAK_CPCT TOT_MCNT UPRC AMT GNL_NM_CD EFMDC_CLSF_NO
2002010011241 79250 2002-01-07 3 3 3 1 1 14 1097 15358 204401ACE 232
2002050234230 621264 2002-05-06 3 6 3 1 1 10 1097 10970 204401ACE 232
2002050704689 92198 2002-05-08 3 1 3 1 1 7 1495 10465 204401ACE 232
2002050814923 802213 2002-05-20 3 44 3 1 1 2 789 1578 204401ACE 232
2002070507330 621264 2002-07-22 3 6 3 1 1 10 1097 10970 204401ACE 232
2003010299300 748540 2003-01-06 3 1 3 1 1 14 935 13090 204401ATE 232
2003020105528 748540 2003-02-07 3 1 3 1 1 14 935 13090 204401ATE 232

약 복용기간 계산

복용기간 계산 함수

두 처방 날짜 사이 간격이 gap 이하이면 연속 복용으로 간주함

dur_conti <- function(indi, gap = 30){
  # 약 복용 시작 날짜 + 복용 기간
  data.ind <- m60.drug[RN_INDI == indi, .(start = MDCARE_STRT_DT, TOT_MCNT)]
  
  # 약 복용한 날짜 순서대로 나열
  datelist <- lapply(1:nrow(data.ind), function(x){data.ind[x, seq(start, start + TOT_MCNT, by = 1)]}) %>% 
    do.call(c, .) %>% unique %>% sort
  
  df <- diff(datelist)   # 날짜 사이 간격
  df[df <= gap] <- 1   # 간격이 gap 이하이면 연속 복용 
  
  # indi + 복용 시작 날짜 + 복용 기간
  res <- data.table(RN_INDI = indi, 
                    start = datelist[1],
                    dur_conti = ifelse(any(df > 1), which(df > 1)[1] - 1, as.integer(sum(df))))
  return(res)
}


dur_conti(indi = 80234)
   RN_INDI      start dur_conti
     <num>     <Date>     <num>
1:   80234 2007-04-26        89

약 복용기간 계산

모든 사람에 대해 함수 적용해 사람마다 약 복용 기간 계산

mclapply(unique(m60.drug$RN_INDI), dur_conti, mc.cores = 1) %>% rbindlist() %>% .[!is.na(RN_INDI)]


RN_INDI start dur_conti
79250 2002-01-07 14
621264 2002-05-06 10
92198 2002-05-08 7
802213 2002-05-20 2
748540 2003-01-06 29
633585 2003-07-15 14
849556 2003-10-06 7
354421 2003-11-10 30
568865 2003-11-15 28
207927 2003-11-21 7
624881 2004-02-16 42
805018 2004-02-13 7
166015 2004-10-14 7
572384 2005-05-09 36
575578 2005-10-19 7
519824 2005-11-01 7
888305 2005-12-26 7
549602 2006-01-02 28
964637 2006-01-13 7
897251 2006-02-02 14
688302 2006-05-04 7
102141 2006-09-22 30
109331 2006-09-01 43
737871 2006-10-13 14
915731 2006-10-24 14
367481 2006-12-28 28
215084 2007-02-14 44
322147 2007-04-19 29
277202 2007-04-05 14
876492 2007-04-03 16
80234 2007-04-26 89
921924 2007-05-19 22
1009573 2007-05-23 7
242807 2007-06-28 22
668438 2007-06-29 54
540586 2007-07-23 7
163096 2007-08-25 5
627701 2007-09-14 34
808467 2007-10-12 21
335948 2007-10-29 58
418137 2007-11-10 14
138407 2007-12-04 21
931141 2008-01-28 30
364020 2008-02-13 22
783750 2008-02-28 7
912895 2008-03-19 29
637160 2008-04-01 24
160611 2008-05-21 18
929195 2008-06-10 7
195513 2008-07-21 7
412901 2008-07-05 92
1006264 2008-08-26 28
986287 2008-09-01 43
483473 2008-09-01 7
290357 2008-10-08 7
888234 2008-10-23 12
479874 2008-11-18 7
798472 2008-11-14 14
225956 2008-11-06 7
756414 2008-12-24 72
835408 2008-12-09 7
324678 2008-12-04 14
637115 2009-01-23 29
922716 2009-02-18 7
610592 2009-03-11 7
663811 2009-05-01 7
484978 2009-07-13 10
299704 2009-07-23 28
934726 2009-07-13 10
183321 2009-08-21 7
334970 2009-08-21 7
230178 2009-09-01 14
551360 2009-10-17 63
411180 2009-10-12 7
992522 2009-10-24 14
260923 2009-11-27 7
311226 2009-11-21 5
197894 2009-11-03 14
179714 2009-12-19 25
262452 2009-12-01 14
15533 2009-12-19 28
326639 2009-12-07 1
4664 2010-01-20 3
486732 2010-01-21 7
599419 2010-01-12 28
763384 2010-01-19 30
326432 2010-02-20 20
186069 2010-02-20 59
226594 2010-02-23 28
517447 2010-02-04 3
204278 2010-03-10 11
238542 2010-03-22 30
334131 2010-03-12 92
153851 2010-04-13 42
730968 2010-04-23 3
701404 2010-05-03 14
979469 2010-06-04 147
792082 2010-06-23 14
689889 2010-08-04 28
718591 2010-08-23 7
356798 2010-08-05 21
606567 2010-09-18 10
606303 2010-10-16 85
314169 2010-10-21 14
942204 2010-11-08 3
671580 2010-11-10 14
196293 2010-11-10 50
318399 2010-12-24 5
530990 2010-12-04 7
177099 2010-12-08 30
2270 2010-12-23 14
529690 2010-12-29 6
79207 2011-01-05 3
344809 2011-01-18 28
699142 2011-02-11 7
30622 2011-03-08 7
3690 2011-03-28 7
562142 2011-03-22 7
708262 2011-04-02 15
294913 2011-04-19 3
394639 2011-04-06 14
114537 2011-04-19 30
306589 2011-05-11 5
541685 2011-05-13 37
302146 2011-06-21 14
453742 2011-07-11 90
376332 2011-07-27 30
535689 2011-07-01 3
99917 2011-08-05 14
1005547 2011-09-14 7
793962 2011-10-05 15
366173 2011-10-21 39
670343 2011-10-27 56
660791 2011-10-15 30
183606 2011-11-18 7
956735 2011-11-10 20
38967 2011-11-02 28
36714 2011-11-03 23
781445 2011-11-08 28
266734 2011-11-10 22
163650 2011-12-29 10
886653 2011-12-06 6
349232 2011-12-01 5
71582 2011-12-08 5
914987 2011-12-13 13
875758 2011-12-14 7
185597 2011-12-26 14
676313 2012-01-18 2
346335 2012-01-31 7
343874 2012-02-21 21
218649 2012-03-17 35
264628 2012-03-27 14
6374 2012-03-14 174
259727 2012-03-27 7
47374 2012-04-02 14
836494 2012-04-24 60
84427 2012-04-03 3
7386 2012-04-16 57
554296 2012-04-18 3
554585 2012-05-12 28
347108 2012-05-03 21
45461 2012-05-22 3
46233 2012-05-16 36
911766 2012-05-22 5
587805 2012-05-09 50
377375 2012-05-25 29
542690 2012-06-14 14
752067 2012-06-18 28
412761 2012-07-04 21
491031 2012-07-17 14
560878 2012-08-21 53
483546 2012-08-03 279
166324 2012-09-14 14
889062 2012-10-10 7
60216 2012-10-31 15
437139 2012-11-22 34
51049 2012-11-16 10
1010623 2012-11-08 85
523536 2012-11-23 6
976635 2012-11-01 14
98052 2012-11-10 23
242976 2012-12-08 37
56250 2012-12-15 15
334536 2012-12-01 7
824794 2012-12-06 14
734700 2012-12-07 5
478840 2012-12-31 25
892360 2012-12-13 7
681010 2012-12-13 3
480668 2012-12-31 19
975124 2013-01-10 12
853696 2013-01-02 7
652416 2013-01-28 2
144317 2013-01-24 37
995838 2013-01-08 3
559420 2013-02-16 7
294572 2013-02-26 7
83553 2013-02-26 7
473250 2013-02-08 61
387855 2013-03-20 42
518792 2013-03-07 28
814015 2013-03-02 36
243053 2013-04-22 44
820458 2013-04-23 92
506774 2013-04-08 7
606125 2013-04-12 3
882980 2013-05-08 13
704501 2013-05-27 2
690677 2013-05-11 7
316400 2013-06-05 2
830306 2013-07-02 3
942671 2013-08-30 15
167400 2013-08-26 14
589540 2013-09-01 4
951170 2013-09-03 5
437237 2013-11-26 10
934772 2013-11-21 7
776098 2013-11-01 3
814115 2013-12-23 28
163129 2013-12-13 36
753924 2013-12-18 4
354989 2013-12-30 7
284267 2013-12-12 14
572549 2013-12-26 30
767368 2014-01-04 14
628127 2014-01-21 5
738257 2014-01-23 15
134307 2014-01-14 18
790184 2014-01-23 22
2064628 2014-02-10 7
750632 2014-03-03 7
946485 2014-03-15 7
145961 2014-04-10 7
279031 2014-04-14 77
663279 2014-05-16 36
979093 2014-05-03 92
39217 2014-05-26 20
979358 2014-05-30 21
357738 2014-05-03 16
421007 2014-06-11 3
797920 2014-07-31 5
727245 2014-08-30 20
92313 2014-08-28 50
962008 2014-08-23 28
442552 2014-10-24 7
248846 2014-10-18 18
440123 2014-10-16 7
565189 2014-10-17 24
125422 2014-10-24 14
591253 2014-10-06 27
390119 2014-10-18 57
212433 2014-11-17 3
481042 2014-12-01 16
924778 2014-12-03 14
367840 2014-12-01 7
364304 2014-12-03 36
27226 2014-12-30 29
252507 2014-12-13 3
178270 2015-01-28 3
752655 2015-02-02 39
332997 2015-02-28 22
300860 2015-03-04 28
913340 2015-03-16 7
942522 2015-03-23 5
532261 2015-04-07 31
437697 2015-04-16 3
586305 2015-04-08 4
79379 2015-05-20 28
452930 2015-06-19 7
422180 2015-06-05 28
231997 2015-06-17 56
121311 2015-06-18 7
97785 2015-07-28 4
9866 2015-07-24 7
85219 2015-07-16 7
652060 2015-07-07 3
686497 2015-07-01 14
693283 2015-07-07 60
834975 2015-07-20 7
8830 2015-08-24 14
911306 2015-08-03 14
518558 2015-08-18 28
93164 2015-08-31 4
680010 2015-08-18 22
775563 2015-09-01 7
347907 2015-09-14 35
41309 2015-10-02 5
764100 2015-10-07 14
874800 2015-11-24 5
502784 2015-11-10 28
182208 2015-11-09 4
440107 2015-11-09 28
934252 2015-11-27 7
771097 2015-12-17 28
556220 2015-12-30 7
153174 2015-12-02 14
167561 2015-12-11 3
138307 2015-12-19 28
266165 2015-12-14 10