R Codes

R is an open source programming language that can be used to extract the important information out of datasets and transfer it into the RI Calculator where it can be further analyzed and visualized. RStudio is an open-source integrated development environment for R.

Like the RI Calculator, R codes must be customized to meet the centralized monitoring needs of different projects. Below are examples of R codes that can be used to extract information out of datasets in different formats (e.g.: sas7bdat, txt, xlsx) and transfer this information into the RI Calculator.

Create a VISIT dataset to compute the Enrolment Rate, Screen Failure Rate and Withdrawal Rate KRIs.

Input (visit.sas7bdat):

projectid	Subject	Site	Folder	FolderName	VISITDAT
PRJ-999	999-002-001	2	SCREEN	Screening	2018-03-21
PRJ-999	999-002-001	2	DAY1	Day 1	2018-03-29
PRJ-999	999-002-001	2	W4	Week 4	2018-04-12
PRJ-999	999-002-001	2	W12	Week 12	2018-07-06
PRJ-999	999-002-002	2	W24	Week 24	2018-09-13
PRJ-999	999-002-002	2	SCREEN	Screening	2018-02-15
PRJ-999	999-002-002	2	DAY1	Day 1	2018-12-11
PRJ-999	999-003-001	3	SCREEN	Screening	2018-08-31
PRJ-999	999-003-001	3	DAY1	Day 1	2018-10-17
PRJ-999	999-003-001	3	ET	Early Termination	2018-01-19
…	…	…	…	…	…

R code:

#Load all packages containing functions used in the different codes snippets
library(haven)
library(dplyr)
library(chron)
library(lubridate)
library(data.table)

#Import visit dataset into R 
visit<-read_sas("C:/visit.sas7bdat")
#Keep only the Variables needed for further computing
visit<-visit[c("Subject", "Folder", "VISITDAT")]
#Rename variables for standardization purpose
visit<-visit %>% rename (SUBJID = Subject,
                         VISIT = Folder,
                         VISITDT = VISITDAT)

#Import follow-up dataset into R to extract follow-up visits dates
fu<-read_sas("C:/ds_fu.sas7bdat")
#Keep only the Variables needed for further computing
fu<-fu[c("Subject","Folder","DSSTDAT_FU")]
#Rename variables for standardization purpose
fu<-fu %>% rename (SUBJID = Subject,
                   VISIT = Folder,
                   VISITDT = DSSTDAT_FU)

#Combine visit and follow-up datasets (stack them)
visit<-rbind(visit,fu)

#Import the enrolment dataset to obtain the subject's enrolment statuses.
status<-read_sas("C:/enroll.sas7bdat")
#Keep only the Variables needed for further computing
status<-status[c("Subject","RUNINYN_STD")]
#Rename variables for standardization purpose and for merging.
status<-status %>% rename (SUBJID = Subject,
                         SCREENSTATUS = RUNINYN_STD)

#Merge visit with subject status
visit<-merge(visit, status)
#Assign character format to date variable
visit<-transform(visit,
              VISITDT=as.character(VISITDT, format=""))
#created new dataset from visit dataset to perform calculations on
delay<-visit

#transpose dataset
SCR<-subset(delay, VISIT=="SCREEN")
SCR<-SCR[c("SUBJID","VISITDT","SCREENSTATUS")]
SCR<-SCR %>% rename (VISITSCR = VISITDT)

DAY1<-subset(delay, VISIT=="DAY1")
DAY1<-DAY1[c("SUBJID","VISITDT")]
DAY1<-DAY1 %>% rename (VISITDAY1 = VISITDT)

W4<-subset(delay, VISIT=="W4")
W4<-W4[c("SUBJID","VISITDT")]
W4<-W4 %>% rename (VISITW4 = VISITDT)

W12<-subset(delay, VISIT=="W12")
W12<-W12[c("SUBJID","VISITDT")]
W12<-W12 %>% rename (VISITW12 = VISITDT)

W24<-subset(delay, VISIT=="W24")
W24<-W24[c("SUBJID","VISITDT")]
W24<-W24 %>% rename (VISITW24 = VISITDT)

ET<-subset(delay, VISIT=="ET")
ET<-ET[c("SUBJID","VISITDT")]
ET<-ET %>% rename (VISITET = VISITDT)

FU<-subset(delay, VISIT=="FU")
FU<-FU[c("SUBJID","VISITDT")]
FU<-FU %>% rename (VISITFU = VISITDT)

#Get Termination date from the DS dataset
DS<-read_sas("F:/Studies/ERC-242/1 - DATA CLEANING/RI Calculator/alldata/ds.sas7bdat")
DS<-DS[c("Subject","DSDECOD","DSSTDAT")]
DS<-DS %>% rename (SUBJID = Subject,
                   VISITTERM = DSSTDAT)

#Merge datasets to get an horizontal table of subjects visits 
delay<-Reduce(function(...) merge(..., all = TRUE, by = "SUBJID"),
                    list(SCR, DAY1, W4, W12, W24, ET, FU, DS))

#Assign date format to date variables to perform calculations
delay<-transform(delay,
                 VISITSCR=as.Date(VISITSCR, format="%Y-%m-%d"),
                 VISITDAY1=as.Date(VISITDAY1, format="%Y-%m-%d"),
                 VISITW4=as.Date(VISITW4, format="%Y-%m-%d"),
                 VISITW12=as.Date(VISITW12, format="%Y-%m-%d"),
                 VISITW24=as.Date(VISITW24, format="%Y-%m-%d"),
                 VISITET=as.Date(VISITET, format="%Y-%m-%d"),
                 VISITFU=as.Date(VISITFU, format="%Y-%m-%d"),
                 VISITTERM=as.Date(VISITTERM, format="%Y-%m-%d"),
                 CurrentDate=as.Date(Sys.Date(),format="%Y-%m-%d"))

#Create a separate dataset that indicates the number of days since screening as "subject-days" for each subject
subjectdays<-transform(delay,
                 SUBJECTDAYS=ifelse(SCREENSTATUS=="N",0,
#If an Early Termination date is available, use it to calculate subject days
                                    ifelse(!is.na(VISITET), VISITET-VISITSCR,
#If a last visit (W24) date is available, use it to calculate subject days. Otherwise use the current date.                                           
                                           ifelse(!is.na(VISITW24), VISITW24-VISITSCR, CurrentDate-VISITSCR))))
#Keep only the needed variables
subjectdays<-subjectdays[c("SUBJID","SUBJECTDAYS")]

#Back to the calculation of delays...Determine the target dates for each visit based on the screening date
delay<-transform(delay,
                 DAY1TARGET=VISITSCR+28,
                 W4TARGET=VISITDAY1+28,
                 W12TARGET=VISITDAY1+84,
                 W24TARGET=VISITDAY1+168,
#Calculate the targetdate for folllow-up based on ET date of last visit date
                 FUTARGET=if_else(!is.na(VISITET), VISITET+30,
                                 if_else(!is.na(VISITW24), VISITW24+30,
                                        as.Date(NA))))

#Add variable to indicate which visits are overdue or occured out of range (7 days from target)
delay<-transform(delay,
                 DAY1OUT=ifelse(VISITDAY1<DAY1TARGET-7 | VISITDAY1>DAY1TARGET+7,1,""),
                 DAY1OVER=ifelse(CurrentDate>DAY1TARGET+7 & is.na(VISITSCR2) & is.na(VISITET) & is.na(VISITTERM),1,""),
                 W4OUT=ifelse(VISITW4<W4TARGET-7 | VISITW4>W4TARGET+7,1,""),
                 W4OVER=ifelse(CurrentDate>W4TARGET+7 & is.na(VISITW4) & is.na(VISITET) & is.na(VISITTERM),1,""),
                 W12OUT=ifelse(VISITW12<W12TARGET-7 | VISITW12>W12TARGET+7,1,""),
                 W12OVER=ifelse(CurrentDate>W12TARGET+7 & is.na(VISITW12) & is.na(VISITET) & is.na(VISITTERM),1,""),
                 W24OUT=ifelse(VISITW24>W24TARGET+14,1,""),
                 W24OVER=ifelse(CurrentDate>W24TARGET+14 & is.na(VISITW24) & is.na(VISITET) & is.na(VISITTERM),1,""),
                 FUOUT=ifelse(VISITFU>FUTARGET,1,""),
                 FUOVER=ifelse(CurrentDate>FUTARGET+7 & is.na(VISITFU),1,"")
                 )

#convert to dataframe for further manipulation
delay<-as.data.frame(delay)

#Assign character format to date variable before import into the RI Calculator
delay<-transform(delay,
                 VISITSCR=as.character.Date(VISITSCR),
                 SCR2TARGET=as.character.Date(SCR2TARGET),
                 VISITDAY1=as.character.Date(VISITDAY1),
                 DAY1TARGET=as.character.Date(DAY1TARGET),
                 VISITW4=as.character.Date(VISITW4),
                 W4TARGET=as.character.Date(W4TARGET),
                 VISITW12=as.character.Date(VISITW12),
                 W12TARGET=as.character.Date(W12TARGET),
                 VISITW24=as.character.Date(VISITW24),
                 W24TARGET=as.character.Date(W24TARGET),
                 VISITET=as.character.Date(VISITET),
                 VISITFU=as.character.Date(VISITFU),
                 FUTARGET=as.character.Date(FUTARGET),
                 VISITTERM=as.character.Date(VISITTERM))

#Choose and order final variable into a nice table
delay<-delay[c("SUBJID",
               "SCREENSTATUS",
               "VISITSCR",
               "VISITDAY1",
               "DAY1TARGET",
               "DAY1OUT",
               "DAY1OVER",
               "VISITW4",
               "W4TARGET",
               "W4OUT",
               "W4OVER",
               "VISITW12",
               "W12TARGET",
               "W12OUT",
               "W12OVER",
               "VISITW24",
               "W24TARGET",
               "W24OUT",
               "W24OVER",
               "VISITET",
               "VISITFU",
               "FUTARGET",
               "FUOUT",
               "FUOVER",
               "VISITTERM"
)]

#transpose table back to vertical
SCR<-delay[c("SUBJID","VISITSCR")]
SCR<-transform(SCR,
               VISIT = "SCREEN",
               TARGET = as.Date(NA),
               OUT = as.factor(NA),
               OVERDUE = as.factor(NA))
SCR<-SCR %>% rename (VISITDT = VISITSCR)

DAY1<-delay[c("SUBJID","VISITDAY1","DAY1TARGET","DAY1OUT","DAY1OVER")]
DAY1<-transform(DAY1,
                VISIT = "DAY1")
DAY1<-DAY1 %>% rename (VISITDT = VISITDAY1,
                       TARGET = DAY1TARGET,
                       OUT = DAY1OUT,
                       OVERDUE = DAY1OVER)

W4<-delay[c("SUBJID","VISITW4","W4TARGET","W4OUT","W4OVER")]
W4<-transform(W4,
              VISIT = "W4")
W4<-W4 %>% rename (VISITDT = VISITW4,
                   TARGET = W4TARGET,
                   OUT = W4OUT,
                   OVERDUE = W4OVER)

W12<-delay[c("SUBJID","VISITW12","W12TARGET","W12OUT","W12OVER")]
W12<-transform(W12,
              VISIT = "W12")
W12<-W12 %>% rename (VISITDT = VISITW12,
                     TARGET = W12TARGET,
                     OUT = W12OUT,
                     OVERDUE = W12OVER)

W24<-delay[c("SUBJID","VISITW24","W24TARGET","W24OUT","W24OVER")]
W24<-transform(W24,
               VISIT = "W24")
W24<-W24 %>% rename (VISITDT = VISITW24,
                     TARGET = W24TARGET,
                     OUT = W24OUT,
                     OVERDUE = W24OVER)

ET<-delay[c("SUBJID","VISITET")]
ET<-transform(ET,
              VISIT = "ET",
              TARGET = as.Date(NA),
              OUT = as.factor(NA),
              OVERDUE = as.factor(NA))
ET<-ET %>% rename (VISITDT = VISITET)
#Keep only ET records if an ET visit actually occured
ET<-subset(ET,(!is.na(VISITDT)))

FU<-delay[c("SUBJID","VISITFU","FUTARGET","FUOUT","FUOVER")]
FU<-transform(FU,
              VISIT="FU")
FU<-FU %>% rename (VISITDT = VISITFU,
                   TARGET = FUTARGET,
                   OUT = FUOUT,
                   OVERDUE = FUOVER)

TERM<-delay[c("SUBJID","VISITTERM")]
TERM<-transform(TERM,
                VISIT="TERMINATION",
                TARGET=as.Date(NA),
                OUT=as.factor(NA),
                OVERDUE=as.factor(NA))
TERM<-TERM %>% rename (VISITDT = VISITTERM)

#stack visit datasets into a single dataset
delay2<-rbind(SCR,DAY1,W4,W12,W24,ET,FU,TERM)
#order final variables 
delay2<-delay2[c("SUBJID","VISIT","VISITDT","TARGET","OUT","OVERDUE")]
#Rename the dataset visit
visit<-delay2

#Sort by SUBJID
attach(visit)
visit<-visit[order(SUBJID),]
detach(visit)

Output:

SUBJID	VISIT	VISITDT	TARGET	OUT	OVERDUE
999-002-001	SCREEN	2018-03-21
999-002-001	DAY1	2018-03-29	2018-04-20
999-002-001	W4	2018-04-12	2018-04-26
999-002-001	W12	2018-07-06	2018-06-21
999-002-001	W24	2018-09-30	2018-09-13	1
999-002-001	FU	2018-10-14	2018-10-13
999-002-002	SCREEN	2018-02-15
999-002-002	DAY1	2018-12-11	2018-03-17
999-002-002	W4		2019-01-08		1
999-003-001	SCREEN	2018-08-31	NA
999-003-001	DAY1	2018-10-17	2018-09-30
999-003-001	ET	2018-01-19	NA
…	…	…	…	…	…

Create a TIME dataset to compute Time to Data Entry KRI

Input (AuditTrail.txt):

StudyName|SiteId|SubjectName|FolderName|FormId|FormName|FieldName|AuditAction|UserID|AuditActionType|AuditTime
PRJ-999|2|999-002-001|Screening|9190|Demographics|SEX|Record created.|204|Created|10/30/2018 12:40:02 PM
PRJ-999|2|999-002-001|Day 1|9130|Vitals|SYSBP| User entered '120' |204|Entered|11/29/2018 12:40:02 PM
PRJ-999|2|999-002-001|Week 4|9130|GynExam|NATSUR| User entered '1' |204|Entered|12/27/2018 12:40:02 PM
PRJ-999|2|999-002-001|Week 12|9179|GynExam|NATSUR| User entered '1' |204|Entered|21/02/2019 1:42:20 PM
PRJ-999|2|999-002-001|Week 24|9182|GynExam|NATSUR| User entered '2' |204|Entered|16/05/2019 1:42:20 PM
PRJ-999|2|999-002-002|Screening|9167|Screen Failure|VETHICK| User entered '0' reason for change: Data Entry Error|152|EnteredWithChangeCode|11/16/2018 9:17:31 PM
PRJ-999|2|999-002-002|Day 1|9167|FSDS|FSDDIAG| User entered '1' reason for change: Data Entry Error|152|EnteredWithChangeCode|12/13/2018 9:17:31 PM
PRJ-999|3|999-003-001|Screening|9167|Demographics|RACE| User entered '0' |152|Entered|11/16/2018 9:17:43 PM
PRJ-999|3|999-003-001|Screening|9134|Medical History|STROKENY| User entered '001'|238|Entered|11/17/2018 1:20:30 PM
PRJ-999|3|999-003-001|Early Termination|9151|Disposition|ENDDATC|7|192055| User entered 'un UNK 2002' |152|Entered|12/16/2018 11:01:46 PM

R code:

#Import the AuditTrail.txt into R
audittrail<-read.delim('C:/Stream-AuditTrail.txt', header=TRUE, sep="|", dec=".", fill=TRUE, quote="" )
#Keep only the Variables needed for further computing
audittrail<-audittrail[c("SubjectName","AuditAction","AuditActionType","RoleName","AuditTime","FolderName","FormName","FieldName")]
#Filter the audit trail to keep only  records corresponding to CRC's data entries
audittrail_entry<-subset(audittrail, RoleName=="Clinical Research Coordinator" & AuditActionType=="Entered")
#Rename variables for standardization purpose and for merging.
audittrail_entry<-audittrail_entry %>% rename (SUBJID = SubjectName)
#Derive the variable VISIT from FolderName
audittrail_entry<-transform(audittrail_entry,
                      VISIT=ifelse(FolderName=="Screening 1", "SCREEN",
                                          ifelse(FolderName=="Day 1 (1)","DAY1",
                                                 ifelse(FolderName=="Week 12 (1)","W12",
                                                        ifelse(FolderName=="Week 24 (1)","W24",
                                                               ifelse(FolderName=="Follow-Up (1)","FU",
                                                                      ifelse(FolderName=="Study Completion / Early Termination (1)","SCET",
                                                                             ifelse(FolderName=="Study Completion","ET",""))))))))
#Keep only the record corresponding to the first datapoint entered per form per visit
audittrail_entry<-as.data.table(audittrail_entry)
audittrail_entry<-audittrail_entry[, ID := sequence(.N), by = c("SUBJID","FolderName","FormName")]
audittrail_entry<-subset(audittrail_entry, ID=="1")
#merge audit trail with VISIT dataset
audittrail_entry<-merge(audittrail_entry,visit)


#Standardize date format so they can be subtracted from one another
time<-transform(audittrail_entry,
                AuditTime=as.Date(AuditTime, format="%m/%d/%Y"),
                VISITDT=as.Date(VISITDT))
#Compute Time to Data Entry 
time<-transform(time, 
                TIMETODATAENTRY=AuditTime-VISITDT)
#Rename AuditTime to EntryDate for clarification purpose
time<-time %>% rename(EntryDate=AuditTime)
#Assign character format to date variable before import into the RI Calculator
time<-transform(time,
                 EntryDate=as.character.Date(EntryDate))
#Convert to data.frame for further processing
time<-as.data.frame(time)
#Keep only the variables to be imported in the RI Calculator
time<-time[c("SUBJID","FolderName","FormName","EntryDate","TIMETODATAENTRY")]
#Sort by SUBJID
attach(time)
time<-time[order(SUBJID),]
detach(time)

Output:

SUBJID	VISIT	FormName	EntryDate	TIMETODATAENTRY
999-002-001	SCREEN	Demographics	2018-10-30	2
999-002-001	DAY1	Vitals	2018-11-29	1
999-002-001	W4	GynExam	2018-12-27	0
999-002-001	W12	GynExam	2019-02-21	0
999-002-001	W24	GynExam	2019-05-16	1
999-002-002	SCREEN	ScreenFailure	2018-11-16	2
999-002-002	DAY1	FSDS	2018-12-13	0
999-003-001	SCREEN	Demographics	2019-11-16	1
999-003-001	SCREEN	Medical History	2019-11-17	14
999-003-001	ET	Disposition	2018-12-16	5
…	…	…	…	…

Create a PROGRESS dataset containing subject-specific Number of forms completed and Number of days under study which are used to compute different rates in the RI Calculator

Input: audittrail R dataset created above

R code:

#Filter the audit trail to keep only records corresponding to CRC's data entries
audittrail_entry<-subset(audittrail, RoleName=="Clinical Research Coordinator" & AuditActionType=="Entered")
#Rename variables for standardization purpose
forms<-forms %>% rename (SUBJID=SubjectName)
#Filter out unique forms (per  Subject, per visit, per form) from all data points records.
forms<-as.data.table(forms)
forms<-forms[, ID := sequence(.N), by = c("SUBJID","FolderName","FormName")]
forms<-subset(forms, ID=="1")
#Sum the number of unique form per subject 
forms<-forms %>% group_by(SUBJID) %>% mutate(count = sum(ID))
forms<-forms %>% distinct(SUBJID, .keep_all = TRUE)
#Rename variables for standardization purpose
forms<-forms %>% rename (TOTALFORMS=count)
#Keep only the variables to be imported in the RI Calculator
forms<-forms[c("SUBJID","TOTALFORMS")]
#Merge with the datset "subjecdays" previous created in the "VISIT" snippet. 
progress<-merge(forms,subjectdays)

Output:

SUBJID	TOTALFORMS	SUBJECTDAYS
999-002-001	9	3
999-002-002	57	168
999-002-003	55	168
999-002-004	9	30
999-002-005	9	6
999-002-006	24	112
999-002-007	9	1
...	...	...

Create an ERROR dataset to compute the Error Rate KRI in the RI Calculator

Input: audittrail R dataset created above

R code:

#Extract information from the audittrail.txt previously imported
error<-audittrail
error<-subset(error, RoleName=="Clinical Research Coordinator" & 
                (FormName!="Run-in Study Drug Accountability"|FormName!="Run-in Study Drug Accountability"))
error<-filter(error, grepl("Data Entry Error", AuditAction))
error<-error %>% rename (SUBJID=SubjectName, VISIT=FolderName)
error<-error[c("SUBJID","VISIT","FormName","FieldName")]

#sort by SUBJID
attach(error)
error<-error[order(SUBJID),]
detach(error)

Output:

SUBJID	VISIT	FormName	FieldName
999-002-002	Screening	ScreenFailure	VETHICK
999-002-002	Day 1	FSDS	FSDDIAG
999-004-007	Screening	Medical History	STARTDTC
999-004-007	Day 1	Vital Signs	SYSBP
999-004-007	Day 1	Vital Signs	DIABP
999-004-007	Day 1	Vital Signs	DIABP
999-005-002	Week 4	ScreenFailure	VETHICK
999-005-003	Week 12	FSDS	FSDDIAG
999-005-003	Week 12	FSDS	FSDDIAG
999-005-003	ET	Disposition	ENDDATC
…	…	…	…

Create a QUERY dataset to compute Query Rate and Time to Query Resolution KRIs

Input (QueryDetails.txt):

StudyName|SiteId|SubjectName|Folder|Form|Field|QryOpenDate|QryResponseDate|QryClosedDate								
PRJ-999|2|999-002-001|SCREEN|Demographics|VETHICK|2018-05-07|2018-05-16|2018-06-27
PRJ-999|2|999-002-001|DAY1|Vitals|FSDDIAG|2018-06-27|2018-07-11|2018-09-19
PRJ-999|2|999-002-001|W4|GynExam|STARTDTC|2018-11-21|2018-11-21|2018-11-28	
PRJ-999|2|999-002-001|W12|GynExam|SYSBP|2018-09-19|2018-09-19|2019-02-12	
PRJ-999|2|999-002-001|W24|GynExam|DIABP|2018-06-01|2018-06-05|2018-06-05
PRJ-999|2|999-002-002|SCREEN|ScreenFailure|DIABP|2018-06-01|2018-06-05|2018-06-05
PRJ-999|2|999-002-002|DAY1|FSDS|VETHICK|2018-06-13|2018-06-19|2018-06-21
PRJ-999|2|999-003-001|SCREEN|Demographics|FSDDIAG|2018-06-13|2018-06-19|2018-06-21
PRJ-999|2|999-003-001|SCREEN|Medical History|FSDDIAG|2018-05-04|2018-05-04|2018-05-05	
PRJ-999|2|999-003-001|ET|Disposition|ENDDATC|2018-04-26|2018-05-03|2018-05-04

R code:

#Import QueryDetails.txt file into R
query<-read.delim("F:/Studies/ERC-242/1 - DATA CLEANING/RI Calculator/alldata/Stream-QueryDetail.txt",header=TRUE, sep="|", dec=".",fill=TRUE, quote="" )
query<-as.data.frame(query)
#Keep onlt the variable needed for further computing
query<-query[c("SubjectName","Folder","Form","Field","QryOpenDate","QryResponseDate","QryClosedDate")]
#Rename variables for standardization purpose
query<-query %>% rename (SUBJID=SubjectName, VISIT=Folder)
#Assign date format to date variables so they can be used to perform calculation
query<-transform(query,
                 OpenDT=as.Date(QryOpenDate, format="%m/%d/%Y"),
                 ResponseDT=as.Date(QryResponseDate, format="%m/%d/%Y"),
                 ClosedDT=as.Date(QryClosedDate, format="%m/%d/%Y"),
                 #Add a variable corresponding to the current date
                 CurrentDT=as.Date(Sys.Date(),format="%m/%d/%Y")
                 )
#Calculate the number of days each query has been opened for with the following conditions:
query<-transform(query,
                 DAYSOPEN=ifelse(is.na(ResponseDT) & is.na(ClosedDT), OpenDT-CurrentDT,
                          ifelse(is.na(ResponseDT) & !is.na(ClosedDT), 0,
                          ResponseDT-OpenDT
                          )))
#Assign character formats to date variables before import into the RI Calculator
query<-transform(query,
                 OpenDT=as.character.Date(OpenDT),
                 ResponseDT=as.character.Date(ResponseDT),
                 ClosedDT=as.character.Date(ClosedDT)
                 )
#Keep only the variables to be imported in the RI Calculator
query<-query[c("SUBJID","VISIT","Form","OpenDT","ResponseDT","ClosedDT","DAYSOPEN")]
#sort by SUBJID
attach(query)
query<-query[order(SUBJID),]
detach(query)

Output:

SUBJID	VISIT	Form	Field	OpenDT	ResponseDT	ClosedDT	DAYSOPEN
999-002-001	SCREEN	Demographics	VETHICK	2018-05-07	2018-05-16	2018-06-27	9
999-002-001	DAY1	Vitals	FSDDIAG	2018-06-27	2018-07-11	2018-09-19	14
999-002-001	W4	GynExam	STARTDTC	2018-11-21	2018-11-21	2018-11-28	0
999-002-001	W12	GynExam	SYSBP	2018-09-19	2018-09-19	2019-02-12	0
999-002-001	W24	GynExam	DIABP	2018-06-01	2018-06-05	2018-06-05	4
999-002-002	SCREEN	ScreenFailure	DIABP	2018-06-01	2018-06-05	2018-06-05	4
999-002-002	DAY1	FSDS	VETHICK	2018-06-13	2018-06-19	2018-06-21	6
999-003-001	SCREEN	Demographics	FSDDIAG	2018-06-13	2018-06-19	2018-06-21	6
999-003-001	SCREEN	Medical History	FSDDIAG	2018-05-04	2018-05-04	2018-05-05	0
999-003-001	ET	Disposition	ENDDATC	2018-04-26	2018-05-03	2018-05-04	7
...	...	...	...	...	...	...	....

Create a DEV dataset to compute the Deviation Rate KRI

Input (Protocol Deviation Log.xlsx):

SubjectNumber	Deviation Date	Visit Number	Deviation Category	Description	Issue Level
242-007-001	2018-06-28	Week 24	Study_Procedures	Wrong arm used for BP	1
242-011-004	2018-02-20	Unscheduled	Study_Procedures	Subject was not fasting	1
242-026-002	2018-07-19	Day 1	Eligibility_Enrolment	Subject stopped taking IP	2
242-118-027	2018-05-10	Screening	Unnecessary_Procedures	sample collected but not required	2
242-121-022	2018-08-29	Day 1	Eligibility_Enrolment	Ineligle subject randomized	3
242-082-027	2018-11-08	Week 24	Study_Drug_Administration	Box 5 was discarded	1
…	…	…	…	…	…

#Load packages containing functions used in this snippet 
library(openxlsx)

dev<-read.xlsx('C:/PRJ-999 Protocol Deviation log.xlsx')
#Convert excel dates to readable format (if you need them) 
dev<-transform(dev,
               Deviation.Date=convertToDate(Deviation.Date, origin = "1900-01-01"))
#Rename variables for standardization purpose
dev<-dev %>% rename (SUBJID=SubjectNumber, ISSUELEVEL=Issue.Level)
#Keep only the variables to be imported in the RI Calculator
dev<-dev[c("SUBJID","ISSUELEVEL")]
#sort by SUBJID
attach(dev)
dev<-dev[order(SUBJID),]
detach(dev)

Output:

SUBJID	ISSUE_LEVEL
242-007-001	1
242-011-004	1
242-026-002	2
242-118-027	2
242-121-022	3
242-082-027	1
...	...

Create an AE dataset to compute the AE Rate KRI

Input (ae.sas7bdat):

project	Subject	Folder	AETERM	AETERMPT	AESTDAT	AEENDAT
PRJ-999	999-002-001	AE	PLANNED CARPAL TUNNEL SURGERY	Carpal tunnel decompression	31-Dec-18	31-Dec-18
PRJ-999	999-002-005	AE	COLD SYMPTOMS	Viral upper respiratory tract infection	01-Jan-19	05-Jan-19
PRJ-999	999-003-001	AE	TORN RIGHT MENISCUS	Meniscus injury	04-Jan-19
PRJ-999	999-003-001	AE	RIGHT KNEE PAIN	Arthralgia	08-Sep-19	10-Sep-19
PRJ-999	999-003-001	AE	ELEVATED LIVER ENZYMES	Hepatic enzyme increased	09-Jun-18
PRJ-999	999-003-004	AE	BROKEN DENTAL CROWN	Device breakage	01-Jul-19	03-Jul-19
PRJ-999	999-003-005	AE	INFLUENZA	Influenza	12-Nov-18	17-Nov-18
…	…	…	…	…	…	…

#Import AE dataset into R 
ae<-read_sas("C:/ae.sas7bdat")
#Keep only the Variables needed for further computing
ae<-ae[c("Subject","AETERM","AESTDAT")]
#Rename variables for standardization purpose
ae<-ae %>% rename (SUBJID=Subject)
#Assign character format to date variable before import into the RI Calculator
ae<-transform(ae,
              AESTDAT=as.character(as.Date(AESTDAT, format="%Y-%m-%d")))
#Sort by SUBJID
attach(ae)
ae<-ae[order(SUBJID),]
detach(ae)

Output:

Subject	AETERM	AESTDAT
999-002-001	PLANNED CARPAL TUNNEL SURGERY	31-Dec-18
999-002-005	COLD SYMPTOMS	01-Jan-19
999-003-001	TORN RIGHT MENISCUS	04-Jan-19
999-003-001	RIGHT KNEE PAIN	08-Sep-19
999-003-001	ELEVATED LIVER ENZYMES	09-Jun-18
999-003-004	BROKEN DENTAL CROWN	01-Jul-19
999-003-005	INFLUENZA	12-Nov-18
…	…	…

Transfer all datasets created above into the RI Calculator

Input: all datasets created above

R code

#Load the package required to import and manipulate the RI_Calculator
library(XLConnect)
#Import the RI Calculator into R
wb<-loadWorkbook("C:/RICalculator.xlsm", create = TRUE)

#Write datasets created above into the RI Calculator
setStyleAction(wb, XLC$"STYLE_ACTION.NONE")
writeWorksheet(wb, visit, sheet = "VISIT", startRow = 1, startCol = 1)
writeWorksheet(wb, time, sheet = "TIME", startRow = 1, startCol = 1)
writeWorksheet(wb, progress, sheet = "PROG", startRow = 1, startCol = 1)
writeWorksheet(wb, error, sheet = "ERROR", startRow = 1, startCol = 1)
writeWorksheet(wb, query, sheet = "QUERY", startRow = 1, startCol = 1)
writeWorksheet(wb, dev, sheet = "DEV", startRow = 1, startCol = 1)
writeWorksheet(wb, ae, sheet = "AE", startRow = 1, startCol = 1)

#Save the RI calculator as a new updated workbook
saveWorkbook(wb,"C:/RICalculatorUpdated.xlsm")