R Codes
R is an open source programming language that can be used to extract the important information out of datasets and transfer it into the RI Calculator where it can be further analyzed and visualized. RStudio is an open-source integrated development environment for R.
Like the RI Calculator, R codes must be customized to meet the centralized monitoring needs of different projects. Below are examples of R codes that can be used to extract information out of datasets in different formats (e.g.: sas7bdat, txt, xlsx) and transfer this information into the RI Calculator.
Create a VISIT dataset to compute the Enrolment Rate, Screen Failure Rate and Withdrawal Rate KRIs.
Input (visit.sas7bdat):
projectid | Subject | Site | Folder | FolderName | VISITDAT |
PRJ-999 | 999-002-001 | 2 | SCREEN | Screening | 2018-03-21 |
PRJ-999 | 999-002-001 | 2 | DAY1 | Day 1 | 2018-03-29 |
PRJ-999 | 999-002-001 | 2 | W4 | Week 4 | 2018-04-12 |
PRJ-999 | 999-002-001 | 2 | W12 | Week 12 | 2018-07-06 |
PRJ-999 | 999-002-002 | 2 | W24 | Week 24 | 2018-09-13 |
PRJ-999 | 999-002-002 | 2 | SCREEN | Screening | 2018-02-15 |
PRJ-999 | 999-002-002 | 2 | DAY1 | Day 1 | 2018-12-11 |
PRJ-999 | 999-003-001 | 3 | SCREEN | Screening | 2018-08-31 |
PRJ-999 | 999-003-001 | 3 | DAY1 | Day 1 | 2018-10-17 |
PRJ-999 | 999-003-001 | 3 | ET | Early Termination | 2018-01-19 |
… | … | … | … | … | … |
R code:
#Load all packages containing functions used in the different codes snippets
library(haven)
library(dplyr)
library(chron)
library(lubridate)
library(data.table)
#Import visit dataset into R
visit<-read_sas("C:/visit.sas7bdat")
#Keep only the Variables needed for further computing
visit<-visit[c("Subject", "Folder", "VISITDAT")]
#Rename variables for standardization purpose
visit<-visit %>% rename (SUBJID = Subject,
VISIT = Folder,
VISITDT = VISITDAT)
#Import follow-up dataset into R to extract follow-up visits dates
fu<-read_sas("C:/ds_fu.sas7bdat")
#Keep only the Variables needed for further computing
fu<-fu[c("Subject","Folder","DSSTDAT_FU")]
#Rename variables for standardization purpose
fu<-fu %>% rename (SUBJID = Subject,
VISIT = Folder,
VISITDT = DSSTDAT_FU)
#Combine visit and follow-up datasets (stack them)
visit<-rbind(visit,fu)
#Import the enrolment dataset to obtain the subject's enrolment statuses.
status<-read_sas("C:/enroll.sas7bdat")
#Keep only the Variables needed for further computing
status<-status[c("Subject","RUNINYN_STD")]
#Rename variables for standardization purpose and for merging.
status<-status %>% rename (SUBJID = Subject,
SCREENSTATUS = RUNINYN_STD)
#Merge visit with subject status
visit<-merge(visit, status)
#Assign character format to date variable
visit<-transform(visit,
VISITDT=as.character(VISITDT, format=""))
#created new dataset from visit dataset to perform calculations on
delay<-visit
#transpose dataset
SCR<-subset(delay, VISIT=="SCREEN")
SCR<-SCR[c("SUBJID","VISITDT","SCREENSTATUS")]
SCR<-SCR %>% rename (VISITSCR = VISITDT)
DAY1<-subset(delay, VISIT=="DAY1")
DAY1<-DAY1[c("SUBJID","VISITDT")]
DAY1<-DAY1 %>% rename (VISITDAY1 = VISITDT)
W4<-subset(delay, VISIT=="W4")
W4<-W4[c("SUBJID","VISITDT")]
W4<-W4 %>% rename (VISITW4 = VISITDT)
W12<-subset(delay, VISIT=="W12")
W12<-W12[c("SUBJID","VISITDT")]
W12<-W12 %>% rename (VISITW12 = VISITDT)
W24<-subset(delay, VISIT=="W24")
W24<-W24[c("SUBJID","VISITDT")]
W24<-W24 %>% rename (VISITW24 = VISITDT)
ET<-subset(delay, VISIT=="ET")
ET<-ET[c("SUBJID","VISITDT")]
ET<-ET %>% rename (VISITET = VISITDT)
FU<-subset(delay, VISIT=="FU")
FU<-FU[c("SUBJID","VISITDT")]
FU<-FU %>% rename (VISITFU = VISITDT)
#Get Termination date from the DS dataset
DS<-read_sas("F:/Studies/ERC-242/1 - DATA CLEANING/RI Calculator/alldata/ds.sas7bdat")
DS<-DS[c("Subject","DSDECOD","DSSTDAT")]
DS<-DS %>% rename (SUBJID = Subject,
VISITTERM = DSSTDAT)
#Merge datasets to get an horizontal table of subjects visits
delay<-Reduce(function(...) merge(..., all = TRUE, by = "SUBJID"),
list(SCR, DAY1, W4, W12, W24, ET, FU, DS))
#Assign date format to date variables to perform calculations
delay<-transform(delay,
VISITSCR=as.Date(VISITSCR, format="%Y-%m-%d"),
VISITDAY1=as.Date(VISITDAY1, format="%Y-%m-%d"),
VISITW4=as.Date(VISITW4, format="%Y-%m-%d"),
VISITW12=as.Date(VISITW12, format="%Y-%m-%d"),
VISITW24=as.Date(VISITW24, format="%Y-%m-%d"),
VISITET=as.Date(VISITET, format="%Y-%m-%d"),
VISITFU=as.Date(VISITFU, format="%Y-%m-%d"),
VISITTERM=as.Date(VISITTERM, format="%Y-%m-%d"),
CurrentDate=as.Date(Sys.Date(),format="%Y-%m-%d"))
#Create a separate dataset that indicates the number of days since screening as "subject-days" for each subject
subjectdays<-transform(delay,
SUBJECTDAYS=ifelse(SCREENSTATUS=="N",0,
#If an Early Termination date is available, use it to calculate subject days
ifelse(!is.na(VISITET), VISITET-VISITSCR,
#If a last visit (W24) date is available, use it to calculate subject days. Otherwise use the current date.
ifelse(!is.na(VISITW24), VISITW24-VISITSCR, CurrentDate-VISITSCR))))
#Keep only the needed variables
subjectdays<-subjectdays[c("SUBJID","SUBJECTDAYS")]
#Back to the calculation of delays...Determine the target dates for each visit based on the screening date
delay<-transform(delay,
DAY1TARGET=VISITSCR+28,
W4TARGET=VISITDAY1+28,
W12TARGET=VISITDAY1+84,
W24TARGET=VISITDAY1+168,
#Calculate the targetdate for folllow-up based on ET date of last visit date
FUTARGET=if_else(!is.na(VISITET), VISITET+30,
if_else(!is.na(VISITW24), VISITW24+30,
as.Date(NA))))
#Add variable to indicate which visits are overdue or occured out of range (7 days from target)
delay<-transform(delay,
DAY1OUT=ifelse(VISITDAY1<DAY1TARGET-7 | VISITDAY1>DAY1TARGET+7,1,""),
DAY1OVER=ifelse(CurrentDate>DAY1TARGET+7 & is.na(VISITSCR2) & is.na(VISITET) & is.na(VISITTERM),1,""),
W4OUT=ifelse(VISITW4<W4TARGET-7 | VISITW4>W4TARGET+7,1,""),
W4OVER=ifelse(CurrentDate>W4TARGET+7 & is.na(VISITW4) & is.na(VISITET) & is.na(VISITTERM),1,""),
W12OUT=ifelse(VISITW12<W12TARGET-7 | VISITW12>W12TARGET+7,1,""),
W12OVER=ifelse(CurrentDate>W12TARGET+7 & is.na(VISITW12) & is.na(VISITET) & is.na(VISITTERM),1,""),
W24OUT=ifelse(VISITW24>W24TARGET+14,1,""),
W24OVER=ifelse(CurrentDate>W24TARGET+14 & is.na(VISITW24) & is.na(VISITET) & is.na(VISITTERM),1,""),
FUOUT=ifelse(VISITFU>FUTARGET,1,""),
FUOVER=ifelse(CurrentDate>FUTARGET+7 & is.na(VISITFU),1,"")
)
#convert to dataframe for further manipulation
delay<-as.data.frame(delay)
#Assign character format to date variable before import into the RI Calculator
delay<-transform(delay,
VISITSCR=as.character.Date(VISITSCR),
SCR2TARGET=as.character.Date(SCR2TARGET),
VISITDAY1=as.character.Date(VISITDAY1),
DAY1TARGET=as.character.Date(DAY1TARGET),
VISITW4=as.character.Date(VISITW4),
W4TARGET=as.character.Date(W4TARGET),
VISITW12=as.character.Date(VISITW12),
W12TARGET=as.character.Date(W12TARGET),
VISITW24=as.character.Date(VISITW24),
W24TARGET=as.character.Date(W24TARGET),
VISITET=as.character.Date(VISITET),
VISITFU=as.character.Date(VISITFU),
FUTARGET=as.character.Date(FUTARGET),
VISITTERM=as.character.Date(VISITTERM))
#Choose and order final variable into a nice table
delay<-delay[c("SUBJID",
"SCREENSTATUS",
"VISITSCR",
"VISITDAY1",
"DAY1TARGET",
"DAY1OUT",
"DAY1OVER",
"VISITW4",
"W4TARGET",
"W4OUT",
"W4OVER",
"VISITW12",
"W12TARGET",
"W12OUT",
"W12OVER",
"VISITW24",
"W24TARGET",
"W24OUT",
"W24OVER",
"VISITET",
"VISITFU",
"FUTARGET",
"FUOUT",
"FUOVER",
"VISITTERM"
)]
#transpose table back to vertical
SCR<-delay[c("SUBJID","VISITSCR")]
SCR<-transform(SCR,
VISIT = "SCREEN",
TARGET = as.Date(NA),
OUT = as.factor(NA),
OVERDUE = as.factor(NA))
SCR<-SCR %>% rename (VISITDT = VISITSCR)
DAY1<-delay[c("SUBJID","VISITDAY1","DAY1TARGET","DAY1OUT","DAY1OVER")]
DAY1<-transform(DAY1,
VISIT = "DAY1")
DAY1<-DAY1 %>% rename (VISITDT = VISITDAY1,
TARGET = DAY1TARGET,
OUT = DAY1OUT,
OVERDUE = DAY1OVER)
W4<-delay[c("SUBJID","VISITW4","W4TARGET","W4OUT","W4OVER")]
W4<-transform(W4,
VISIT = "W4")
W4<-W4 %>% rename (VISITDT = VISITW4,
TARGET = W4TARGET,
OUT = W4OUT,
OVERDUE = W4OVER)
W12<-delay[c("SUBJID","VISITW12","W12TARGET","W12OUT","W12OVER")]
W12<-transform(W12,
VISIT = "W12")
W12<-W12 %>% rename (VISITDT = VISITW12,
TARGET = W12TARGET,
OUT = W12OUT,
OVERDUE = W12OVER)
W24<-delay[c("SUBJID","VISITW24","W24TARGET","W24OUT","W24OVER")]
W24<-transform(W24,
VISIT = "W24")
W24<-W24 %>% rename (VISITDT = VISITW24,
TARGET = W24TARGET,
OUT = W24OUT,
OVERDUE = W24OVER)
ET<-delay[c("SUBJID","VISITET")]
ET<-transform(ET,
VISIT = "ET",
TARGET = as.Date(NA),
OUT = as.factor(NA),
OVERDUE = as.factor(NA))
ET<-ET %>% rename (VISITDT = VISITET)
#Keep only ET records if an ET visit actually occured
ET<-subset(ET,(!is.na(VISITDT)))
FU<-delay[c("SUBJID","VISITFU","FUTARGET","FUOUT","FUOVER")]
FU<-transform(FU,
VISIT="FU")
FU<-FU %>% rename (VISITDT = VISITFU,
TARGET = FUTARGET,
OUT = FUOUT,
OVERDUE = FUOVER)
TERM<-delay[c("SUBJID","VISITTERM")]
TERM<-transform(TERM,
VISIT="TERMINATION",
TARGET=as.Date(NA),
OUT=as.factor(NA),
OVERDUE=as.factor(NA))
TERM<-TERM %>% rename (VISITDT = VISITTERM)
#stack visit datasets into a single dataset
delay2<-rbind(SCR,DAY1,W4,W12,W24,ET,FU,TERM)
#order final variables
delay2<-delay2[c("SUBJID","VISIT","VISITDT","TARGET","OUT","OVERDUE")]
#Rename the dataset visit
visit<-delay2
#Sort by SUBJID
attach(visit)
visit<-visit[order(SUBJID),]
detach(visit)
Output:
SUBJID | VISIT | VISITDT | TARGET | OUT | OVERDUE |
999-002-001 | SCREEN | 2018-03-21 | |||
999-002-001 | DAY1 | 2018-03-29 | 2018-04-20 | ||
999-002-001 | W4 | 2018-04-12 | 2018-04-26 | ||
999-002-001 | W12 | 2018-07-06 | 2018-06-21 | ||
999-002-001 | W24 | 2018-09-30 | 2018-09-13 | 1 | |
999-002-001 | FU | 2018-10-14 | 2018-10-13 | ||
999-002-002 | SCREEN | 2018-02-15 | |||
999-002-002 | DAY1 | 2018-12-11 | 2018-03-17 | ||
999-002-002 | W4 | 2019-01-08 | 1 | ||
999-003-001 | SCREEN | 2018-08-31 | NA | ||
999-003-001 | DAY1 | 2018-10-17 | 2018-09-30 | ||
999-003-001 | ET | 2018-01-19 | NA | ||
… | … | … | … | … | … |
Create a TIME dataset to compute Time to Data Entry KRI
Input (AuditTrail.txt):
StudyName|SiteId|SubjectName|FolderName|FormId|FormName|FieldName|AuditAction|UserID|AuditActionType|AuditTime
PRJ-999|2|999-002-001|Screening|9190|Demographics|SEX|Record created.|204|Created|10/30/2018 12:40:02 PM
PRJ-999|2|999-002-001|Day 1|9130|Vitals|SYSBP| User entered '120' |204|Entered|11/29/2018 12:40:02 PM
PRJ-999|2|999-002-001|Week 4|9130|GynExam|NATSUR| User entered '1' |204|Entered|12/27/2018 12:40:02 PM
PRJ-999|2|999-002-001|Week 12|9179|GynExam|NATSUR| User entered '1' |204|Entered|21/02/2019 1:42:20 PM
PRJ-999|2|999-002-001|Week 24|9182|GynExam|NATSUR| User entered '2' |204|Entered|16/05/2019 1:42:20 PM
PRJ-999|2|999-002-002|Screening|9167|Screen Failure|VETHICK| User entered '0' reason for change: Data Entry Error|152|EnteredWithChangeCode|11/16/2018 9:17:31 PM
PRJ-999|2|999-002-002|Day 1|9167|FSDS|FSDDIAG| User entered '1' reason for change: Data Entry Error|152|EnteredWithChangeCode|12/13/2018 9:17:31 PM
PRJ-999|3|999-003-001|Screening|9167|Demographics|RACE| User entered '0' |152|Entered|11/16/2018 9:17:43 PM
PRJ-999|3|999-003-001|Screening|9134|Medical History|STROKENY| User entered '001'|238|Entered|11/17/2018 1:20:30 PM
PRJ-999|3|999-003-001|Early Termination|9151|Disposition|ENDDATC|7|192055| User entered 'un UNK 2002' |152|Entered|12/16/2018 11:01:46 PM
R code:
#Import the AuditTrail.txt into R
audittrail<-read.delim('C:/Stream-AuditTrail.txt', header=TRUE, sep="|", dec=".", fill=TRUE, quote="" )
#Keep only the Variables needed for further computing
audittrail<-audittrail[c("SubjectName","AuditAction","AuditActionType","RoleName","AuditTime","FolderName","FormName","FieldName")]
#Filter the audit trail to keep only records corresponding to CRC's data entries
audittrail_entry<-subset(audittrail, RoleName=="Clinical Research Coordinator" & AuditActionType=="Entered")
#Rename variables for standardization purpose and for merging.
audittrail_entry<-audittrail_entry %>% rename (SUBJID = SubjectName)
#Derive the variable VISIT from FolderName
audittrail_entry<-transform(audittrail_entry,
VISIT=ifelse(FolderName=="Screening 1", "SCREEN",
ifelse(FolderName=="Day 1 (1)","DAY1",
ifelse(FolderName=="Week 12 (1)","W12",
ifelse(FolderName=="Week 24 (1)","W24",
ifelse(FolderName=="Follow-Up (1)","FU",
ifelse(FolderName=="Study Completion / Early Termination (1)","SCET",
ifelse(FolderName=="Study Completion","ET",""))))))))
#Keep only the record corresponding to the first datapoint entered per form per visit
audittrail_entry<-as.data.table(audittrail_entry)
audittrail_entry<-audittrail_entry[, ID := sequence(.N), by = c("SUBJID","FolderName","FormName")]
audittrail_entry<-subset(audittrail_entry, ID=="1")
#merge audit trail with VISIT dataset
audittrail_entry<-merge(audittrail_entry,visit)
#Standardize date format so they can be subtracted from one another
time<-transform(audittrail_entry,
AuditTime=as.Date(AuditTime, format="%m/%d/%Y"),
VISITDT=as.Date(VISITDT))
#Compute Time to Data Entry
time<-transform(time,
TIMETODATAENTRY=AuditTime-VISITDT)
#Rename AuditTime to EntryDate for clarification purpose
time<-time %>% rename(EntryDate=AuditTime)
#Assign character format to date variable before import into the RI Calculator
time<-transform(time,
EntryDate=as.character.Date(EntryDate))
#Convert to data.frame for further processing
time<-as.data.frame(time)
#Keep only the variables to be imported in the RI Calculator
time<-time[c("SUBJID","FolderName","FormName","EntryDate","TIMETODATAENTRY")]
#Sort by SUBJID
attach(time)
time<-time[order(SUBJID),]
detach(time)
Output:
SUBJID | VISIT | FormName | EntryDate | TIMETODATAENTRY |
999-002-001 | SCREEN | Demographics | 2018-10-30 | 2 |
999-002-001 | DAY1 | Vitals | 2018-11-29 | 1 |
999-002-001 | W4 | GynExam | 2018-12-27 | 0 |
999-002-001 | W12 | GynExam | 2019-02-21 | 0 |
999-002-001 | W24 | GynExam | 2019-05-16 | 1 |
999-002-002 | SCREEN | ScreenFailure | 2018-11-16 | 2 |
999-002-002 | DAY1 | FSDS | 2018-12-13 | 0 |
999-003-001 | SCREEN | Demographics | 2019-11-16 | 1 |
999-003-001 | SCREEN | Medical History | 2019-11-17 | 14 |
999-003-001 | ET | Disposition | 2018-12-16 | 5 |
… | … | … | … | … |
Create a PROGRESS dataset containing subject-specific Number of forms completed and Number of days under study which are used to compute different rates in the RI Calculator
Input: audittrail R dataset created above
R code:
#Filter the audit trail to keep only records corresponding to CRC's data entries
audittrail_entry<-subset(audittrail, RoleName=="Clinical Research Coordinator" & AuditActionType=="Entered")
#Rename variables for standardization purpose
forms<-forms %>% rename (SUBJID=SubjectName)
#Filter out unique forms (per Subject, per visit, per form) from all data points records.
forms<-as.data.table(forms)
forms<-forms[, ID := sequence(.N), by = c("SUBJID","FolderName","FormName")]
forms<-subset(forms, ID=="1")
#Sum the number of unique form per subject
forms<-forms %>% group_by(SUBJID) %>% mutate(count = sum(ID))
forms<-forms %>% distinct(SUBJID, .keep_all = TRUE)
#Rename variables for standardization purpose
forms<-forms %>% rename (TOTALFORMS=count)
#Keep only the variables to be imported in the RI Calculator
forms<-forms[c("SUBJID","TOTALFORMS")]
#Merge with the datset "subjecdays" previous created in the "VISIT" snippet.
progress<-merge(forms,subjectdays)
Output:
SUBJID | TOTALFORMS | SUBJECTDAYS |
999-002-001 | 9 | 3 |
999-002-002 | 57 | 168 |
999-002-003 | 55 | 168 |
999-002-004 | 9 | 30 |
999-002-005 | 9 | 6 |
999-002-006 | 24 | 112 |
999-002-007 | 9 | 1 |
... | ... | ... |
Create an ERROR dataset to compute the Error Rate KRI in the RI Calculator
Input: audittrail R dataset created above
R code:
#Extract information from the audittrail.txt previously imported
error<-audittrail
error<-subset(error, RoleName=="Clinical Research Coordinator" &
(FormName!="Run-in Study Drug Accountability"|FormName!="Run-in Study Drug Accountability"))
error<-filter(error, grepl("Data Entry Error", AuditAction))
error<-error %>% rename (SUBJID=SubjectName, VISIT=FolderName)
error<-error[c("SUBJID","VISIT","FormName","FieldName")]
#sort by SUBJID
attach(error)
error<-error[order(SUBJID),]
detach(error)
Output:
SUBJID | VISIT | FormName | FieldName |
999-002-002 | Screening | ScreenFailure | VETHICK |
999-002-002 | Day 1 | FSDS | FSDDIAG |
999-004-007 | Screening | Medical History | STARTDTC |
999-004-007 | Day 1 | Vital Signs | SYSBP |
999-004-007 | Day 1 | Vital Signs | DIABP |
999-004-007 | Day 1 | Vital Signs | DIABP |
999-005-002 | Week 4 | ScreenFailure | VETHICK |
999-005-003 | Week 12 | FSDS | FSDDIAG |
999-005-003 | Week 12 | FSDS | FSDDIAG |
999-005-003 | ET | Disposition | ENDDATC |
… | … | … | … |
Create a QUERY dataset to compute Query Rate and Time to Query Resolution KRIs
Input (QueryDetails.txt):
StudyName|SiteId|SubjectName|Folder|Form|Field|QryOpenDate|QryResponseDate|QryClosedDate
PRJ-999|2|999-002-001|SCREEN|Demographics|VETHICK|2018-05-07|2018-05-16|2018-06-27
PRJ-999|2|999-002-001|DAY1|Vitals|FSDDIAG|2018-06-27|2018-07-11|2018-09-19
PRJ-999|2|999-002-001|W4|GynExam|STARTDTC|2018-11-21|2018-11-21|2018-11-28
PRJ-999|2|999-002-001|W12|GynExam|SYSBP|2018-09-19|2018-09-19|2019-02-12
PRJ-999|2|999-002-001|W24|GynExam|DIABP|2018-06-01|2018-06-05|2018-06-05
PRJ-999|2|999-002-002|SCREEN|ScreenFailure|DIABP|2018-06-01|2018-06-05|2018-06-05
PRJ-999|2|999-002-002|DAY1|FSDS|VETHICK|2018-06-13|2018-06-19|2018-06-21
PRJ-999|2|999-003-001|SCREEN|Demographics|FSDDIAG|2018-06-13|2018-06-19|2018-06-21
PRJ-999|2|999-003-001|SCREEN|Medical History|FSDDIAG|2018-05-04|2018-05-04|2018-05-05
PRJ-999|2|999-003-001|ET|Disposition|ENDDATC|2018-04-26|2018-05-03|2018-05-04
R code:
#Import QueryDetails.txt file into R
query<-read.delim("F:/Studies/ERC-242/1 - DATA CLEANING/RI Calculator/alldata/Stream-QueryDetail.txt",header=TRUE, sep="|", dec=".",fill=TRUE, quote="" )
query<-as.data.frame(query)
#Keep onlt the variable needed for further computing
query<-query[c("SubjectName","Folder","Form","Field","QryOpenDate","QryResponseDate","QryClosedDate")]
#Rename variables for standardization purpose
query<-query %>% rename (SUBJID=SubjectName, VISIT=Folder)
#Assign date format to date variables so they can be used to perform calculation
query<-transform(query,
OpenDT=as.Date(QryOpenDate, format="%m/%d/%Y"),
ResponseDT=as.Date(QryResponseDate, format="%m/%d/%Y"),
ClosedDT=as.Date(QryClosedDate, format="%m/%d/%Y"),
#Add a variable corresponding to the current date
CurrentDT=as.Date(Sys.Date(),format="%m/%d/%Y")
)
#Calculate the number of days each query has been opened for with the following conditions:
query<-transform(query,
DAYSOPEN=ifelse(is.na(ResponseDT) & is.na(ClosedDT), OpenDT-CurrentDT,
ifelse(is.na(ResponseDT) & !is.na(ClosedDT), 0,
ResponseDT-OpenDT
)))
#Assign character formats to date variables before import into the RI Calculator
query<-transform(query,
OpenDT=as.character.Date(OpenDT),
ResponseDT=as.character.Date(ResponseDT),
ClosedDT=as.character.Date(ClosedDT)
)
#Keep only the variables to be imported in the RI Calculator
query<-query[c("SUBJID","VISIT","Form","OpenDT","ResponseDT","ClosedDT","DAYSOPEN")]
#sort by SUBJID
attach(query)
query<-query[order(SUBJID),]
detach(query)
Output:
SUBJID | VISIT | Form | Field | OpenDT | ResponseDT | ClosedDT | DAYSOPEN |
999-002-001 | SCREEN | Demographics | VETHICK | 2018-05-07 | 2018-05-16 | 2018-06-27 | 9 |
999-002-001 | DAY1 | Vitals | FSDDIAG | 2018-06-27 | 2018-07-11 | 2018-09-19 | 14 |
999-002-001 | W4 | GynExam | STARTDTC | 2018-11-21 | 2018-11-21 | 2018-11-28 | 0 |
999-002-001 | W12 | GynExam | SYSBP | 2018-09-19 | 2018-09-19 | 2019-02-12 | 0 |
999-002-001 | W24 | GynExam | DIABP | 2018-06-01 | 2018-06-05 | 2018-06-05 | 4 |
999-002-002 | SCREEN | ScreenFailure | DIABP | 2018-06-01 | 2018-06-05 | 2018-06-05 | 4 |
999-002-002 | DAY1 | FSDS | VETHICK | 2018-06-13 | 2018-06-19 | 2018-06-21 | 6 |
999-003-001 | SCREEN | Demographics | FSDDIAG | 2018-06-13 | 2018-06-19 | 2018-06-21 | 6 |
999-003-001 | SCREEN | Medical History | FSDDIAG | 2018-05-04 | 2018-05-04 | 2018-05-05 | 0 |
999-003-001 | ET | Disposition | ENDDATC | 2018-04-26 | 2018-05-03 | 2018-05-04 | 7 |
... | ... | ... | ... | ... | ... | ... | .... |
Create a DEV dataset to compute the Deviation Rate KRI
Input (Protocol Deviation Log.xlsx):
SubjectNumber | Deviation Date | Visit Number | Deviation Category | Description | Issue Level |
242-007-001 | 2018-06-28 | Week 24 | Study_Procedures | Wrong arm used for BP | 1 |
242-011-004 | 2018-02-20 | Unscheduled | Study_Procedures | Subject was not fasting | 1 |
242-026-002 | 2018-07-19 | Day 1 | Eligibility_Enrolment | Subject stopped taking IP | 2 |
242-118-027 | 2018-05-10 | Screening | Unnecessary_Procedures | sample collected but not required | 2 |
242-121-022 | 2018-08-29 | Day 1 | Eligibility_Enrolment | Ineligle subject randomized | 3 |
242-082-027 | 2018-11-08 | Week 24 | Study_Drug_Administration | Box 5 was discarded | 1 |
… | … | … | … | … | … |
#Load packages containing functions used in this snippet
library(openxlsx)
dev<-read.xlsx('C:/PRJ-999 Protocol Deviation log.xlsx')
#Convert excel dates to readable format (if you need them)
dev<-transform(dev,
Deviation.Date=convertToDate(Deviation.Date, origin = "1900-01-01"))
#Rename variables for standardization purpose
dev<-dev %>% rename (SUBJID=SubjectNumber, ISSUELEVEL=Issue.Level)
#Keep only the variables to be imported in the RI Calculator
dev<-dev[c("SUBJID","ISSUELEVEL")]
#sort by SUBJID
attach(dev)
dev<-dev[order(SUBJID),]
detach(dev)
Output:
SUBJID | ISSUE_LEVEL |
242-007-001 | 1 |
242-011-004 | 1 |
242-026-002 | 2 |
242-118-027 | 2 |
242-121-022 | 3 |
242-082-027 | 1 |
... | ... |
Create an AE dataset to compute the AE Rate KRI
Input (ae.sas7bdat):
project | Subject | Folder | AETERM | AETERMPT | AESTDAT | AEENDAT |
PRJ-999 | 999-002-001 | AE | PLANNED CARPAL TUNNEL SURGERY | Carpal tunnel decompression | 31-Dec-18 | 31-Dec-18 |
PRJ-999 | 999-002-005 | AE | COLD SYMPTOMS | Viral upper respiratory tract infection | 01-Jan-19 | 05-Jan-19 |
PRJ-999 | 999-003-001 | AE | TORN RIGHT MENISCUS | Meniscus injury | 04-Jan-19 | |
PRJ-999 | 999-003-001 | AE | RIGHT KNEE PAIN | Arthralgia | 08-Sep-19 | 10-Sep-19 |
PRJ-999 | 999-003-001 | AE | ELEVATED LIVER ENZYMES | Hepatic enzyme increased | 09-Jun-18 | |
PRJ-999 | 999-003-004 | AE | BROKEN DENTAL CROWN | Device breakage | 01-Jul-19 | 03-Jul-19 |
PRJ-999 | 999-003-005 | AE | INFLUENZA | Influenza | 12-Nov-18 | 17-Nov-18 |
… | … | … | … | … | … | … |
#Import AE dataset into R
ae<-read_sas("C:/ae.sas7bdat")
#Keep only the Variables needed for further computing
ae<-ae[c("Subject","AETERM","AESTDAT")]
#Rename variables for standardization purpose
ae<-ae %>% rename (SUBJID=Subject)
#Assign character format to date variable before import into the RI Calculator
ae<-transform(ae,
AESTDAT=as.character(as.Date(AESTDAT, format="%Y-%m-%d")))
#Sort by SUBJID
attach(ae)
ae<-ae[order(SUBJID),]
detach(ae)
Output:
Subject | AETERM | AESTDAT |
999-002-001 | PLANNED CARPAL TUNNEL SURGERY | 31-Dec-18 |
999-002-005 | COLD SYMPTOMS | 01-Jan-19 |
999-003-001 | TORN RIGHT MENISCUS | 04-Jan-19 |
999-003-001 | RIGHT KNEE PAIN | 08-Sep-19 |
999-003-001 | ELEVATED LIVER ENZYMES | 09-Jun-18 |
999-003-004 | BROKEN DENTAL CROWN | 01-Jul-19 |
999-003-005 | INFLUENZA | 12-Nov-18 |
… | … | … |
Transfer all datasets created above into the RI Calculator
Input: all datasets created above
R code
#Load the package required to import and manipulate the RI_Calculator
library(XLConnect)
#Import the RI Calculator into R
wb<-loadWorkbook("C:/RICalculator.xlsm", create = TRUE)
#Write datasets created above into the RI Calculator
setStyleAction(wb, XLC$"STYLE_ACTION.NONE")
writeWorksheet(wb, visit, sheet = "VISIT", startRow = 1, startCol = 1)
writeWorksheet(wb, time, sheet = "TIME", startRow = 1, startCol = 1)
writeWorksheet(wb, progress, sheet = "PROG", startRow = 1, startCol = 1)
writeWorksheet(wb, error, sheet = "ERROR", startRow = 1, startCol = 1)
writeWorksheet(wb, query, sheet = "QUERY", startRow = 1, startCol = 1)
writeWorksheet(wb, dev, sheet = "DEV", startRow = 1, startCol = 1)
writeWorksheet(wb, ae, sheet = "AE", startRow = 1, startCol = 1)
#Save the RI calculator as a new updated workbook
saveWorkbook(wb,"C:/RICalculatorUpdated.xlsm")