Cart 0

R Codes

R is an open source programming language that can be used to extract the important information out of datasets and transfer it into the RI Calculator where it can be further analyzed and visualized. RStudio is an open-source integrated development environment for R.

Like the RI Calculator, R codes must be customized to meet the centralized monitoring needs of different projects. Below are examples of R codes that can be used to extract information out of datasets in different formats (e.g.: sas7bdat, txt, xlsx) and transfer this information into the RI Calculator.

Create a VISIT dataset to compute the Enrolment Rate, Screen Failure Rate and Withdrawal Rate KRIs.

Input (visit.sas7bdat):

projectid Subject Site Folder FolderName VISITDAT
PRJ-999 999-002-001 2 SCREEN Screening 2018-03-21
PRJ-999 999-002-001 2 DAY1 Day 1 2018-03-29
PRJ-999 999-002-001 2 W4 Week 4 2018-04-12
PRJ-999 999-002-001 2 W12 Week 12 2018-07-06
PRJ-999 999-002-002 2 W24 Week 24 2018-09-13
PRJ-999 999-002-002 2 SCREEN Screening 2018-02-15
PRJ-999 999-002-002 2 DAY1 Day 1 2018-12-11
PRJ-999 999-003-001 3 SCREEN Screening 2018-08-31
PRJ-999 999-003-001 3 DAY1 Day 1 2018-10-17
PRJ-999 999-003-001 3 ET Early Termination 2018-01-19

R code:

#Load all packages containing functions used in the different codes snippets
library(haven)
library(dplyr)
library(chron)
library(lubridate)
library(data.table)

#Import visit dataset into R 
visit<-read_sas("C:/visit.sas7bdat")
#Keep only the Variables needed for further computing
visit<-visit[c("Subject", "Folder", "VISITDAT")]
#Rename variables for standardization purpose
visit<-visit %>% rename (SUBJID = Subject,
                         VISIT = Folder,
                         VISITDT = VISITDAT)

#Import follow-up dataset into R to extract follow-up visits dates
fu<-read_sas("C:/ds_fu.sas7bdat")
#Keep only the Variables needed for further computing
fu<-fu[c("Subject","Folder","DSSTDAT_FU")]
#Rename variables for standardization purpose
fu<-fu %>% rename (SUBJID = Subject,
                   VISIT = Folder,
                   VISITDT = DSSTDAT_FU)

#Combine visit and follow-up datasets (stack them)
visit<-rbind(visit,fu)

#Import the enrolment dataset to obtain the subject's enrolment statuses.
status<-read_sas("C:/enroll.sas7bdat")
#Keep only the Variables needed for further computing
status<-status[c("Subject","RUNINYN_STD")]
#Rename variables for standardization purpose and for merging.
status<-status %>% rename (SUBJID = Subject,
                         SCREENSTATUS = RUNINYN_STD)

#Merge visit with subject status
visit<-merge(visit, status)
#Assign character format to date variable
visit<-transform(visit,
              VISITDT=as.character(VISITDT, format=""))
#created new dataset from visit dataset to perform calculations on
delay<-visit

#transpose dataset
SCR<-subset(delay, VISIT=="SCREEN")
SCR<-SCR[c("SUBJID","VISITDT","SCREENSTATUS")]
SCR<-SCR %>% rename (VISITSCR = VISITDT)

DAY1<-subset(delay, VISIT=="DAY1")
DAY1<-DAY1[c("SUBJID","VISITDT")]
DAY1<-DAY1 %>% rename (VISITDAY1 = VISITDT)

W4<-subset(delay, VISIT=="W4")
W4<-W4[c("SUBJID","VISITDT")]
W4<-W4 %>% rename (VISITW4 = VISITDT)

W12<-subset(delay, VISIT=="W12")
W12<-W12[c("SUBJID","VISITDT")]
W12<-W12 %>% rename (VISITW12 = VISITDT)

W24<-subset(delay, VISIT=="W24")
W24<-W24[c("SUBJID","VISITDT")]
W24<-W24 %>% rename (VISITW24 = VISITDT)

ET<-subset(delay, VISIT=="ET")
ET<-ET[c("SUBJID","VISITDT")]
ET<-ET %>% rename (VISITET = VISITDT)

FU<-subset(delay, VISIT=="FU")
FU<-FU[c("SUBJID","VISITDT")]
FU<-FU %>% rename (VISITFU = VISITDT)

#Get Termination date from the DS dataset
DS<-read_sas("F:/Studies/ERC-242/1 - DATA CLEANING/RI Calculator/alldata/ds.sas7bdat")
DS<-DS[c("Subject","DSDECOD","DSSTDAT")]
DS<-DS %>% rename (SUBJID = Subject,
                   VISITTERM = DSSTDAT)

#Merge datasets to get an horizontal table of subjects visits 
delay<-Reduce(function(...) merge(..., all = TRUE, by = "SUBJID"),
                    list(SCR, DAY1, W4, W12, W24, ET, FU, DS))

#Assign date format to date variables to perform calculations
delay<-transform(delay,
                 VISITSCR=as.Date(VISITSCR, format="%Y-%m-%d"),
                 VISITDAY1=as.Date(VISITDAY1, format="%Y-%m-%d"),
                 VISITW4=as.Date(VISITW4, format="%Y-%m-%d"),
                 VISITW12=as.Date(VISITW12, format="%Y-%m-%d"),
                 VISITW24=as.Date(VISITW24, format="%Y-%m-%d"),
                 VISITET=as.Date(VISITET, format="%Y-%m-%d"),
                 VISITFU=as.Date(VISITFU, format="%Y-%m-%d"),
                 VISITTERM=as.Date(VISITTERM, format="%Y-%m-%d"),
                 CurrentDate=as.Date(Sys.Date(),format="%Y-%m-%d"))

#Create a separate dataset that indicates the number of days since screening as "subject-days" for each subject
subjectdays<-transform(delay,
                 SUBJECTDAYS=ifelse(SCREENSTATUS=="N",0,
#If an Early Termination date is available, use it to calculate subject days
                                    ifelse(!is.na(VISITET), VISITET-VISITSCR,
#If a last visit (W24) date is available, use it to calculate subject days. Otherwise use the current date.                                           
                                           ifelse(!is.na(VISITW24), VISITW24-VISITSCR, CurrentDate-VISITSCR))))
#Keep only the needed variables
subjectdays<-subjectdays[c("SUBJID","SUBJECTDAYS")]

#Back to the calculation of delays...Determine the target dates for each visit based on the screening date
delay<-transform(delay,
                 DAY1TARGET=VISITSCR+28,
                 W4TARGET=VISITDAY1+28,
                 W12TARGET=VISITDAY1+84,
                 W24TARGET=VISITDAY1+168,
#Calculate the targetdate for folllow-up based on ET date of last visit date
                 FUTARGET=if_else(!is.na(VISITET), VISITET+30,
                                 if_else(!is.na(VISITW24), VISITW24+30,
                                        as.Date(NA))))

#Add variable to indicate which visits are overdue or occured out of range (7 days from target)
delay<-transform(delay,
                 DAY1OUT=ifelse(VISITDAY1<DAY1TARGET-7 | VISITDAY1>DAY1TARGET+7,1,""),
                 DAY1OVER=ifelse(CurrentDate>DAY1TARGET+7 & is.na(VISITSCR2) & is.na(VISITET) & is.na(VISITTERM),1,""),
                 W4OUT=ifelse(VISITW4<W4TARGET-7 | VISITW4>W4TARGET+7,1,""),
                 W4OVER=ifelse(CurrentDate>W4TARGET+7 & is.na(VISITW4) & is.na(VISITET) & is.na(VISITTERM),1,""),
                 W12OUT=ifelse(VISITW12<W12TARGET-7 | VISITW12>W12TARGET+7,1,""),
                 W12OVER=ifelse(CurrentDate>W12TARGET+7 & is.na(VISITW12) & is.na(VISITET) & is.na(VISITTERM),1,""),
                 W24OUT=ifelse(VISITW24>W24TARGET+14,1,""),
                 W24OVER=ifelse(CurrentDate>W24TARGET+14 & is.na(VISITW24) & is.na(VISITET) & is.na(VISITTERM),1,""),
                 FUOUT=ifelse(VISITFU>FUTARGET,1,""),
                 FUOVER=ifelse(CurrentDate>FUTARGET+7 & is.na(VISITFU),1,"")
                 )

#convert to dataframe for further manipulation
delay<-as.data.frame(delay)

#Assign character format to date variable before import into the RI Calculator
delay<-transform(delay,
                 VISITSCR=as.character.Date(VISITSCR),
                 SCR2TARGET=as.character.Date(SCR2TARGET),
                 VISITDAY1=as.character.Date(VISITDAY1),
                 DAY1TARGET=as.character.Date(DAY1TARGET),
                 VISITW4=as.character.Date(VISITW4),
                 W4TARGET=as.character.Date(W4TARGET),
                 VISITW12=as.character.Date(VISITW12),
                 W12TARGET=as.character.Date(W12TARGET),
                 VISITW24=as.character.Date(VISITW24),
                 W24TARGET=as.character.Date(W24TARGET),
                 VISITET=as.character.Date(VISITET),
                 VISITFU=as.character.Date(VISITFU),
                 FUTARGET=as.character.Date(FUTARGET),
                 VISITTERM=as.character.Date(VISITTERM))

#Choose and order final variable into a nice table
delay<-delay[c("SUBJID",
               "SCREENSTATUS",
               "VISITSCR",
               "VISITDAY1",
               "DAY1TARGET",
               "DAY1OUT",
               "DAY1OVER",
               "VISITW4",
               "W4TARGET",
               "W4OUT",
               "W4OVER",
               "VISITW12",
               "W12TARGET",
               "W12OUT",
               "W12OVER",
               "VISITW24",
               "W24TARGET",
               "W24OUT",
               "W24OVER",
               "VISITET",
               "VISITFU",
               "FUTARGET",
               "FUOUT",
               "FUOVER",
               "VISITTERM"
)]

#transpose table back to vertical
SCR<-delay[c("SUBJID","VISITSCR")]
SCR<-transform(SCR,
               VISIT = "SCREEN",
               TARGET = as.Date(NA),
               OUT = as.factor(NA),
               OVERDUE = as.factor(NA))
SCR<-SCR %>% rename (VISITDT = VISITSCR)

DAY1<-delay[c("SUBJID","VISITDAY1","DAY1TARGET","DAY1OUT","DAY1OVER")]
DAY1<-transform(DAY1,
                VISIT = "DAY1")
DAY1<-DAY1 %>% rename (VISITDT = VISITDAY1,
                       TARGET = DAY1TARGET,
                       OUT = DAY1OUT,
                       OVERDUE = DAY1OVER)

W4<-delay[c("SUBJID","VISITW4","W4TARGET","W4OUT","W4OVER")]
W4<-transform(W4,
              VISIT = "W4")
W4<-W4 %>% rename (VISITDT = VISITW4,
                   TARGET = W4TARGET,
                   OUT = W4OUT,
                   OVERDUE = W4OVER)

W12<-delay[c("SUBJID","VISITW12","W12TARGET","W12OUT","W12OVER")]
W12<-transform(W12,
              VISIT = "W12")
W12<-W12 %>% rename (VISITDT = VISITW12,
                     TARGET = W12TARGET,
                     OUT = W12OUT,
                     OVERDUE = W12OVER)

W24<-delay[c("SUBJID","VISITW24","W24TARGET","W24OUT","W24OVER")]
W24<-transform(W24,
               VISIT = "W24")
W24<-W24 %>% rename (VISITDT = VISITW24,
                     TARGET = W24TARGET,
                     OUT = W24OUT,
                     OVERDUE = W24OVER)

ET<-delay[c("SUBJID","VISITET")]
ET<-transform(ET,
              VISIT = "ET",
              TARGET = as.Date(NA),
              OUT = as.factor(NA),
              OVERDUE = as.factor(NA))
ET<-ET %>% rename (VISITDT = VISITET)
#Keep only ET records if an ET visit actually occured
ET<-subset(ET,(!is.na(VISITDT)))

FU<-delay[c("SUBJID","VISITFU","FUTARGET","FUOUT","FUOVER")]
FU<-transform(FU,
              VISIT="FU")
FU<-FU %>% rename (VISITDT = VISITFU,
                   TARGET = FUTARGET,
                   OUT = FUOUT,
                   OVERDUE = FUOVER)

TERM<-delay[c("SUBJID","VISITTERM")]
TERM<-transform(TERM,
                VISIT="TERMINATION",
                TARGET=as.Date(NA),
                OUT=as.factor(NA),
                OVERDUE=as.factor(NA))
TERM<-TERM %>% rename (VISITDT = VISITTERM)

#stack visit datasets into a single dataset
delay2<-rbind(SCR,DAY1,W4,W12,W24,ET,FU,TERM)
#order final variables 
delay2<-delay2[c("SUBJID","VISIT","VISITDT","TARGET","OUT","OVERDUE")]
#Rename the dataset visit
visit<-delay2

#Sort by SUBJID
attach(visit)
visit<-visit[order(SUBJID),]
detach(visit)

Output:

SUBJID VISIT VISITDT TARGET OUT OVERDUE
999-002-001 SCREEN 2018-03-21
999-002-001 DAY1 2018-03-29 2018-04-20
999-002-001 W4 2018-04-12 2018-04-26
999-002-001 W12 2018-07-06 2018-06-21
999-002-001 W24 2018-09-30 2018-09-13 1
999-002-001 FU 2018-10-14 2018-10-13
999-002-002 SCREEN 2018-02-15
999-002-002 DAY1 2018-12-11 2018-03-17
999-002-002 W4 2019-01-08 1
999-003-001 SCREEN 2018-08-31 NA
999-003-001 DAY1 2018-10-17 2018-09-30
999-003-001 ET 2018-01-19 NA

Create a TIME dataset to compute Time to Data Entry KRI

Input (AuditTrail.txt):

StudyName|SiteId|SubjectName|FolderName|FormId|FormName|FieldName|AuditAction|UserID|AuditActionType|AuditTime
PRJ-999|2|999-002-001|Screening|9190|Demographics|SEX|Record created.|204|Created|10/30/2018 12:40:02 PM
PRJ-999|2|999-002-001|Day 1|9130|Vitals|SYSBP| User entered '120' |204|Entered|11/29/2018 12:40:02 PM
PRJ-999|2|999-002-001|Week 4|9130|GynExam|NATSUR| User entered '1' |204|Entered|12/27/2018 12:40:02 PM
PRJ-999|2|999-002-001|Week 12|9179|GynExam|NATSUR| User entered '1' |204|Entered|21/02/2019 1:42:20 PM
PRJ-999|2|999-002-001|Week 24|9182|GynExam|NATSUR| User entered '2' |204|Entered|16/05/2019 1:42:20 PM
PRJ-999|2|999-002-002|Screening|9167|Screen Failure|VETHICK| User entered '0' reason for change: Data Entry Error|152|EnteredWithChangeCode|11/16/2018 9:17:31 PM
PRJ-999|2|999-002-002|Day 1|9167|FSDS|FSDDIAG| User entered '1' reason for change: Data Entry Error|152|EnteredWithChangeCode|12/13/2018 9:17:31 PM
PRJ-999|3|999-003-001|Screening|9167|Demographics|RACE| User entered '0' |152|Entered|11/16/2018 9:17:43 PM
PRJ-999|3|999-003-001|Screening|9134|Medical History|STROKENY| User entered '001'|238|Entered|11/17/2018 1:20:30 PM
PRJ-999|3|999-003-001|Early Termination|9151|Disposition|ENDDATC|7|192055| User entered 'un UNK 2002' |152|Entered|12/16/2018 11:01:46 PM

R code:

#Import the AuditTrail.txt into R
audittrail<-read.delim('C:/Stream-AuditTrail.txt', header=TRUE, sep="|", dec=".", fill=TRUE, quote="" )
#Keep only the Variables needed for further computing
audittrail<-audittrail[c("SubjectName","AuditAction","AuditActionType","RoleName","AuditTime","FolderName","FormName","FieldName")]
#Filter the audit trail to keep only  records corresponding to CRC's data entries
audittrail_entry<-subset(audittrail, RoleName=="Clinical Research Coordinator" & AuditActionType=="Entered")
#Rename variables for standardization purpose and for merging.
audittrail_entry<-audittrail_entry %>% rename (SUBJID = SubjectName)
#Derive the variable VISIT from FolderName
audittrail_entry<-transform(audittrail_entry,
                      VISIT=ifelse(FolderName=="Screening 1", "SCREEN",
                                          ifelse(FolderName=="Day 1 (1)","DAY1",
                                                 ifelse(FolderName=="Week 12 (1)","W12",
                                                        ifelse(FolderName=="Week 24 (1)","W24",
                                                               ifelse(FolderName=="Follow-Up (1)","FU",
                                                                      ifelse(FolderName=="Study Completion / Early Termination (1)","SCET",
                                                                             ifelse(FolderName=="Study Completion","ET",""))))))))
#Keep only the record corresponding to the first datapoint entered per form per visit
audittrail_entry<-as.data.table(audittrail_entry)
audittrail_entry<-audittrail_entry[, ID := sequence(.N), by = c("SUBJID","FolderName","FormName")]
audittrail_entry<-subset(audittrail_entry, ID=="1")
#merge audit trail with VISIT dataset
audittrail_entry<-merge(audittrail_entry,visit)


#Standardize date format so they can be subtracted from one another
time<-transform(audittrail_entry,
                AuditTime=as.Date(AuditTime, format="%m/%d/%Y"),
                VISITDT=as.Date(VISITDT))
#Compute Time to Data Entry 
time<-transform(time, 
                TIMETODATAENTRY=AuditTime-VISITDT)
#Rename AuditTime to EntryDate for clarification purpose
time<-time %>% rename(EntryDate=AuditTime)
#Assign character format to date variable before import into the RI Calculator
time<-transform(time,
                 EntryDate=as.character.Date(EntryDate))
#Convert to data.frame for further processing
time<-as.data.frame(time)
#Keep only the variables to be imported in the RI Calculator
time<-time[c("SUBJID","FolderName","FormName","EntryDate","TIMETODATAENTRY")]
#Sort by SUBJID
attach(time)
time<-time[order(SUBJID),]
detach(time)

Output:

SUBJID VISIT FormName EntryDate TIMETODATAENTRY
999-002-001 SCREEN Demographics 2018-10-30 2
999-002-001 DAY1 Vitals 2018-11-29 1
999-002-001 W4 GynExam 2018-12-27 0
999-002-001 W12 GynExam 2019-02-21 0
999-002-001 W24 GynExam 2019-05-16 1
999-002-002 SCREEN ScreenFailure 2018-11-16 2
999-002-002 DAY1 FSDS 2018-12-13 0
999-003-001 SCREEN Demographics 2019-11-16 1
999-003-001 SCREEN Medical History 2019-11-17 14
999-003-001 ET Disposition 2018-12-16 5

Create a PROGRESS dataset containing subject-specific Number of forms completed and Number of days under study which are used to compute different rates in the RI Calculator

Input: audittrail R dataset created above

R code:

#Filter the audit trail to keep only records corresponding to CRC's data entries
audittrail_entry<-subset(audittrail, RoleName=="Clinical Research Coordinator" & AuditActionType=="Entered")
#Rename variables for standardization purpose
forms<-forms %>% rename (SUBJID=SubjectName)
#Filter out unique forms (per  Subject, per visit, per form) from all data points records.
forms<-as.data.table(forms)
forms<-forms[, ID := sequence(.N), by = c("SUBJID","FolderName","FormName")]
forms<-subset(forms, ID=="1")
#Sum the number of unique form per subject 
forms<-forms %>% group_by(SUBJID) %>% mutate(count = sum(ID))
forms<-forms %>% distinct(SUBJID, .keep_all = TRUE)
#Rename variables for standardization purpose
forms<-forms %>% rename (TOTALFORMS=count)
#Keep only the variables to be imported in the RI Calculator
forms<-forms[c("SUBJID","TOTALFORMS")]
#Merge with the datset "subjecdays" previous created in the "VISIT" snippet. 
progress<-merge(forms,subjectdays)

Output:

SUBJID TOTALFORMS SUBJECTDAYS
999-002-001 9 3
999-002-002 57 168
999-002-003 55 168
999-002-004 9 30
999-002-005 9 6
999-002-006 24 112
999-002-007 9 1
... ... ...

Create an ERROR dataset to compute the Error Rate KRI in the RI Calculator

Input: audittrail R dataset created above

R code:

#Extract information from the audittrail.txt previously imported
error<-audittrail
error<-subset(error, RoleName=="Clinical Research Coordinator" & 
                (FormName!="Run-in Study Drug Accountability"|FormName!="Run-in Study Drug Accountability"))
error<-filter(error, grepl("Data Entry Error", AuditAction))
error<-error %>% rename (SUBJID=SubjectName, VISIT=FolderName)
error<-error[c("SUBJID","VISIT","FormName","FieldName")]

#sort by SUBJID
attach(error)
error<-error[order(SUBJID),]
detach(error)

Output:

SUBJID VISIT FormName FieldName
999-002-002 Screening ScreenFailure VETHICK
999-002-002 Day 1 FSDS FSDDIAG
999-004-007 Screening Medical History STARTDTC
999-004-007 Day 1 Vital Signs SYSBP
999-004-007 Day 1 Vital Signs DIABP
999-004-007 Day 1 Vital Signs DIABP
999-005-002 Week 4 ScreenFailure VETHICK
999-005-003 Week 12 FSDS FSDDIAG
999-005-003 Week 12 FSDS FSDDIAG
999-005-003 ET Disposition ENDDATC

Create a QUERY dataset to compute Query Rate and Time to Query Resolution KRIs

Input (QueryDetails.txt):

StudyName|SiteId|SubjectName|Folder|Form|Field|QryOpenDate|QryResponseDate|QryClosedDate								
PRJ-999|2|999-002-001|SCREEN|Demographics|VETHICK|2018-05-07|2018-05-16|2018-06-27
PRJ-999|2|999-002-001|DAY1|Vitals|FSDDIAG|2018-06-27|2018-07-11|2018-09-19
PRJ-999|2|999-002-001|W4|GynExam|STARTDTC|2018-11-21|2018-11-21|2018-11-28	
PRJ-999|2|999-002-001|W12|GynExam|SYSBP|2018-09-19|2018-09-19|2019-02-12	
PRJ-999|2|999-002-001|W24|GynExam|DIABP|2018-06-01|2018-06-05|2018-06-05
PRJ-999|2|999-002-002|SCREEN|ScreenFailure|DIABP|2018-06-01|2018-06-05|2018-06-05
PRJ-999|2|999-002-002|DAY1|FSDS|VETHICK|2018-06-13|2018-06-19|2018-06-21
PRJ-999|2|999-003-001|SCREEN|Demographics|FSDDIAG|2018-06-13|2018-06-19|2018-06-21
PRJ-999|2|999-003-001|SCREEN|Medical History|FSDDIAG|2018-05-04|2018-05-04|2018-05-05	
PRJ-999|2|999-003-001|ET|Disposition|ENDDATC|2018-04-26|2018-05-03|2018-05-04

R code:

#Import QueryDetails.txt file into R
query<-read.delim("F:/Studies/ERC-242/1 - DATA CLEANING/RI Calculator/alldata/Stream-QueryDetail.txt",header=TRUE, sep="|", dec=".",fill=TRUE, quote="" )
query<-as.data.frame(query)
#Keep onlt the variable needed for further computing
query<-query[c("SubjectName","Folder","Form","Field","QryOpenDate","QryResponseDate","QryClosedDate")]
#Rename variables for standardization purpose
query<-query %>% rename (SUBJID=SubjectName, VISIT=Folder)
#Assign date format to date variables so they can be used to perform calculation
query<-transform(query,
                 OpenDT=as.Date(QryOpenDate, format="%m/%d/%Y"),
                 ResponseDT=as.Date(QryResponseDate, format="%m/%d/%Y"),
                 ClosedDT=as.Date(QryClosedDate, format="%m/%d/%Y"),
                 #Add a variable corresponding to the current date
                 CurrentDT=as.Date(Sys.Date(),format="%m/%d/%Y")
                 )
#Calculate the number of days each query has been opened for with the following conditions:
query<-transform(query,
                 DAYSOPEN=ifelse(is.na(ResponseDT) & is.na(ClosedDT), OpenDT-CurrentDT,
                          ifelse(is.na(ResponseDT) & !is.na(ClosedDT), 0,
                          ResponseDT-OpenDT
                          )))
#Assign character formats to date variables before import into the RI Calculator
query<-transform(query,
                 OpenDT=as.character.Date(OpenDT),
                 ResponseDT=as.character.Date(ResponseDT),
                 ClosedDT=as.character.Date(ClosedDT)
                 )
#Keep only the variables to be imported in the RI Calculator
query<-query[c("SUBJID","VISIT","Form","OpenDT","ResponseDT","ClosedDT","DAYSOPEN")]
#sort by SUBJID
attach(query)
query<-query[order(SUBJID),]
detach(query)

Output:

SUBJID VISIT Form Field OpenDT ResponseDT ClosedDT DAYSOPEN
999-002-001 SCREEN Demographics VETHICK 2018-05-07 2018-05-16 2018-06-27 9
999-002-001 DAY1 Vitals FSDDIAG 2018-06-27 2018-07-11 2018-09-19 14
999-002-001 W4 GynExam STARTDTC 2018-11-21 2018-11-21 2018-11-28 0
999-002-001 W12 GynExam SYSBP 2018-09-19 2018-09-19 2019-02-12 0
999-002-001 W24 GynExam DIABP 2018-06-01 2018-06-05 2018-06-05 4
999-002-002 SCREEN ScreenFailure DIABP 2018-06-01 2018-06-05 2018-06-05 4
999-002-002 DAY1 FSDS VETHICK 2018-06-13 2018-06-19 2018-06-21 6
999-003-001 SCREEN Demographics FSDDIAG 2018-06-13 2018-06-19 2018-06-21 6
999-003-001 SCREEN Medical History FSDDIAG 2018-05-04 2018-05-04 2018-05-05 0
999-003-001 ET Disposition ENDDATC 2018-04-26 2018-05-03 2018-05-04 7
... ... ... ... ... ... ... ....

Create a DEV dataset to compute the Deviation Rate KRI

Input (Protocol Deviation Log.xlsx):

SubjectNumber Deviation Date Visit Number Deviation Category Description Issue Level
242-007-001 2018-06-28 Week 24 Study_Procedures Wrong arm used for BP 1
242-011-004 2018-02-20 Unscheduled Study_Procedures Subject was not fasting 1
242-026-002 2018-07-19 Day 1 Eligibility_Enrolment Subject stopped taking IP 2
242-118-027 2018-05-10 Screening Unnecessary_Procedures sample collected but not required 2
242-121-022 2018-08-29 Day 1 Eligibility_Enrolment Ineligle subject randomized 3
242-082-027 2018-11-08 Week 24 Study_Drug_Administration Box 5 was discarded 1
#Load packages containing functions used in this snippet 
library(openxlsx)

dev<-read.xlsx('C:/PRJ-999 Protocol Deviation log.xlsx')
#Convert excel dates to readable format (if you need them) 
dev<-transform(dev,
               Deviation.Date=convertToDate(Deviation.Date, origin = "1900-01-01"))
#Rename variables for standardization purpose
dev<-dev %>% rename (SUBJID=SubjectNumber, ISSUELEVEL=Issue.Level)
#Keep only the variables to be imported in the RI Calculator
dev<-dev[c("SUBJID","ISSUELEVEL")]
#sort by SUBJID
attach(dev)
dev<-dev[order(SUBJID),]
detach(dev)

Output:

SUBJID ISSUE_LEVEL
242-007-001 1
242-011-004 1
242-026-002 2
242-118-027 2
242-121-022 3
242-082-027 1
... ...

Create an AE dataset to compute the AE Rate KRI

Input (ae.sas7bdat):

project Subject Folder AETERM AETERMPT AESTDAT AEENDAT
PRJ-999 999-002-001 AE PLANNED CARPAL TUNNEL SURGERY Carpal tunnel decompression 31-Dec-18 31-Dec-18
PRJ-999 999-002-005 AE COLD SYMPTOMS Viral upper respiratory tract infection 01-Jan-19 05-Jan-19
PRJ-999 999-003-001 AE TORN RIGHT MENISCUS Meniscus injury 04-Jan-19
PRJ-999 999-003-001 AE RIGHT KNEE PAIN Arthralgia 08-Sep-19 10-Sep-19
PRJ-999 999-003-001 AE ELEVATED LIVER ENZYMES Hepatic enzyme increased 09-Jun-18
PRJ-999 999-003-004 AE BROKEN DENTAL CROWN Device breakage 01-Jul-19 03-Jul-19
PRJ-999 999-003-005 AE INFLUENZA Influenza 12-Nov-18 17-Nov-18
#Import AE dataset into R 
ae<-read_sas("C:/ae.sas7bdat")
#Keep only the Variables needed for further computing
ae<-ae[c("Subject","AETERM","AESTDAT")]
#Rename variables for standardization purpose
ae<-ae %>% rename (SUBJID=Subject)
#Assign character format to date variable before import into the RI Calculator
ae<-transform(ae,
              AESTDAT=as.character(as.Date(AESTDAT, format="%Y-%m-%d")))
#Sort by SUBJID
attach(ae)
ae<-ae[order(SUBJID),]
detach(ae)

Output:

Subject AETERM AESTDAT
999-002-001 PLANNED CARPAL TUNNEL SURGERY 31-Dec-18
999-002-005 COLD SYMPTOMS 01-Jan-19
999-003-001 TORN RIGHT MENISCUS 04-Jan-19
999-003-001 RIGHT KNEE PAIN 08-Sep-19
999-003-001 ELEVATED LIVER ENZYMES 09-Jun-18
999-003-004 BROKEN DENTAL CROWN 01-Jul-19
999-003-005 INFLUENZA 12-Nov-18

Transfer all datasets created above into the RI Calculator

Input: all datasets created above

R code

#Load the package required to import and manipulate the RI_Calculator
library(XLConnect)
#Import the RI Calculator into R
wb<-loadWorkbook("C:/RICalculator.xlsm", create = TRUE)

#Write datasets created above into the RI Calculator
setStyleAction(wb, XLC$"STYLE_ACTION.NONE") writeWorksheet(wb, visit, sheet = "VISIT", startRow = 1, startCol = 1) writeWorksheet(wb, time, sheet = "TIME", startRow = 1, startCol = 1) writeWorksheet(wb, progress, sheet = "PROG", startRow = 1, startCol = 1) writeWorksheet(wb, error, sheet = "ERROR", startRow = 1, startCol = 1) writeWorksheet(wb, query, sheet = "QUERY", startRow = 1, startCol = 1) writeWorksheet(wb, dev, sheet = "DEV", startRow = 1, startCol = 1) writeWorksheet(wb, ae, sheet = "AE", startRow = 1, startCol = 1) #Save the RI calculator as a new updated workbook saveWorkbook(wb,"C:/RICalculatorUpdated.xlsm")