library(ggplot2) # Load all packages library(openxlsx) library(ggpubr) library(shiny) library(shinydashboard) library(shinycssloaders) library(shinyjs) library(janitor) library(markdown) library(outliers) library(DT) rval = function(y){ stresid = abs(y - mean(y, na.rm=T))/sd(y, na.rm=T) df = data.frame(y, stresid) r = max(df$stresid, na.rm=T) list(r, df) } # Define UI for application that draws a histogram ui <- dashboardPage(skin=c("blue"), dashboardHeader(title=h4("Grubb's and gESD statistical outlier tests"), titleWidth = 350), # Control panel; dashboardSidebar( sidebarMenu(width=450, actionButton("calc", "Calculate", class = "btn-success"), fileInput(inputId="iFile", label="upload EXCEL", accept = "ms-excel"), uiOutput(outputId="sheet"), uiOutput(outputId = "columns"), helpText("Grubbs type can be 10, 11, 20"), uiOutput(outputId = "Grubbstype") ) ), dashboardBody( tabsetPanel(id= "tsp", type = "tabs", tabPanel("data", DTOutput("dataTable")), tabPanel("Grubb's", uiOutput("Grubbs_allcolm") ), tabPanel("gESD", uiOutput("ESD_allcolm") ) ) ) ) # Define server logic required to draw a histogram server <- function(input, output) { Dat <- reactiveValues() v <- reactiveValues(doPlot = FALSE) res <- reactiveValues() resESD <- reactiveValues() observeEvent(input$calc, { # 0 will be coerced to FALSE # 1+ will be coerced to TRUE v$doPlot <- input$calc }) #browser() # read in the EXCEL file observe({ if (!is.null(input$iFile)) { inFile <- input$iFile #dataread<-read.table(datafile,sep=';',dec=',',header=TRUE) #browser() ext <- tools::file_ext(inFile$name) file.rename(inFile$datapath,paste(inFile$datapath, ".xlsx", sep="")) t.fileLocation<-gsub('\\\\','/',paste(inFile$datapath, ext, sep=".")) sheets <- openxlsx::getSheetNames(t.fileLocation) SheetList <- lapply(sheets,openxlsx::read.xlsx,xlsxFile=t.fileLocation) names(SheetList) <- sheets Dat$wb <- SheetList #print(Dat$wb) names(Dat$wb) <- sheets Dat$sheets <- sheets } }) # select sheet output$sheet <- renderUI({ if (!is.null(Dat$wb)) { print(Dat$sheets) selectInput(inputId = "sheet", label = "Select a sheet:", choices = Dat$sheets) } }) # # get x-columns observe({ if (!is.null(Dat$wb)) { if (!is.null(input$sheet)){ print(input$sheet) dat <- Dat$wb[input$sheet] #sheet=input$sheet print(colnames(dat[[1]])) #print(dat) output$columns <- renderUI({ checkboxGroupInput("columns", "Choose predictor columns (x-variables)", choices = colnames(Dat$wb[input$sheet][[1]])) }) } } }) # Grubb's type observe({ if (!is.null(Dat$wb)) { if (!is.null(input$sheet)){ output$Grubbstype <- renderUI({ selectInput(inputId = "Grubbstype", "Choose Grubb's type", choices = c("10","11", "20"), selected=NULL) }) } } }) observe({ if (!is.null(Dat$wb)) { if (!is.null(input$sheet)){ x=1 df_all <- Dat$wb[input$sheet] dtable <- data.frame(df_all[[1]]) output$dataTable <- DT::renderDataTable({ #Pull data and give it a scrub. dtable }, extensions = 'Buttons', options = list( "dom" = 'T<"clear">lBfrtip', buttons = list('copy','excel',list(extend = 'pdf',pageSize = 'A4', orientation = 'landscape', filename = 'tt'),'print'), pageLength = 100, scrollX = TRUE ) ) } } }) #Grubb's - all columns output$Grubbs_allcolm <- renderUI({ if (v$doPlot == FALSE) return() Dat$datacols <- Dat$wb[input$sheet][[1]][input$columns] # if(is.null(DSI$filenam)) return (NULL) nparam<-ncol(Dat$datacols) columnnames <- colnames(Dat$datacols) #TI_resall<-TI_allparam1(DSI) myTabs <- list() for (iparam in seq(1,nparam)) { param <- columnnames[iparam] df_all <- Dat$wb[input$sheet][[1]] ###render output objects for model choice local({ #variables need to be initialized since otherwise values of last loop run will be used for all plots my_i <- iparam # PI<-param_inputs # PR<-param_results if (!is.null(input$columns)) { output[[paste(sprintf("param%i",my_i),"Grubbs",sep="_")]] <- renderPrint({ if (v$doPlot == FALSE) return() Grubbstype <- input$Grubbstype dat_set <- Dat$datacols colm <- dat_set[,my_i] attribute <- as.numeric(colm) dat_set[,my_i] <- attribute N1 <- length(attribute) res$stnd <- sd(attribute, na.rm=TRUE) res$av <- mean(attribute, na.rm=TRUE) nom <- qt(0.05/(2*N1),N1-2, lower.tail=FALSE)^2 denom <- N1-2+nom res$Gcrit <- ((N1-1)/sqrt(N1))*sqrt(nom/denom) res$Gcrit_unsc <- res$Gcrit*res$stnd colmGrubb <- abs(attribute - res$av)/res$stnd flag <- which(colmGrubb > res$Gcrit) cat("\n Here the results of the NIST code of the Grubb's test:"," \n") cat(" H0: There are no outliers in the data set \n") cat(" Ha: There is exactly one outlier in the data set \n") cat(" sd:", round(res$stnd,4), "mean:", round(res$av,4) , " \n") cat(" Upper Gcrit:", round(res$Gcrit_unsc+res$av,4 ),"Lower Gcrit:", round(-1*(res$Gcrit_unsc - res$av),4 )) cat("\n Grubb's test:", colnames(attribute)," \n") cat(" which suspected outlier:", attribute[flag], "\n ") cat("____________________________________________________________ \n\n\n") cat(" R test for outliers as implemented in the 'outlier' package \n") cat(" Hypothesis situation based on Grubb's type \n") cat("************************************************************\n") print(grubbs.test(attribute,type=Grubbstype)) cat("************************************************************ \n") }) output[[paste(sprintf("param%i",my_i),"plot",sep="_")]] <- renderPlot({ # output$grubbsplot <- renderPlot({ #colm2 <- data.frame(batch=reg, var=colm) dat_set <- Dat$datacols colm <- dat_set[,my_i] attribute <- as.numeric(colm) dat_set[,my_i] <- attribute p <- ggplot(dat_set, aes(x=seq(1, nrow(dat_set)), y=dat_set[,my_i])) + geom_point() + geom_hline(yintercept=res$av+res$Gcrit_unsc, colour="red") + geom_text(data=data.frame(x=0,y=res$av+res$Gcrit_unsc), aes(x, y), label="GCrit", vjust=0) + geom_hline(yintercept=res$av-res$Gcrit_unsc, colour="red") + geom_hline(yintercept=res$av+3*res$stnd, colour="darkorange") + geom_text(data=data.frame(x=0,y=res$av+3*res$stnd), aes(x, y), label="+3s", vjust=0)+ geom_hline(yintercept=res$av-3*res$stnd, colour="darkorange") + geom_hline(yintercept=res$av+2*res$stnd, colour="orange") + geom_text(data=data.frame(x=0,y=res$av+2*res$stnd), aes(x, y), label="+2s", vjust=0)+ geom_hline(yintercept=res$av-2*res$stnd, colour="orange") + geom_hline(yintercept=res$av, colour="blue") + labs(x="index", y=columnnames[my_i]) + theme_minimal() print(p) }) } # if }) # loacl myTabs[[iparam]] <- tabPanel(param, value=paste(sprintf("param%i",iparam),"_Grubbs",sep=""), tabsetPanel( tabPanel( #textOutput('txttest'), plotOutput(paste(sprintf("param%i",iparam),"plot",sep="_"),width="500px", height="250px"), verbatimTextOutput(paste(sprintf("param%i",iparam),"Grubbs",sep="_")), tags$head(tags$style(HTML(" #printout { font-size: 8px; } "))), tags$br() #, # title="model choice", #value=paste(sprintf("param%i",iparam),"_modelchoice",sep="") )#, ) ) } # iparam do.call(tabsetPanel, myTabs) }) # render UI #gESD - all columns separate output$ESD_allcolm <- renderUI({ if (v$doPlot == FALSE) return() Dat$datacols <- Dat$wb[input$sheet][[1]][input$columns] dat_set <- Dat$datacols nparam<-ncol(dat_set) columnnames <- colnames(dat_set) df_all <- Dat$wb[input$sheet][[1]] myTabs <- list() for (iparam in seq(1,nparam)) { param <- columnnames[iparam] ###render output objects for model choice local({ #variables need to be initialized since otherwise values of last loop run will be used for all plots my_i <- iparam if (!is.null(input$columns)) { output[[paste(sprintf("param%i",my_i),"ESD",sep="_")]] <- renderPrint({ if (v$doPlot == FALSE) return() ## Define values and vectors. colm <- dat_set[,my_i] attribute <- as.numeric(colm) n = length(attribute) alpha = 0.05 n_max <- 10 # max Anzahl der Werte die abgezogen werden lambdaCrit = c(1:n_max) R = c(1:n_max) ## Compute test statistic until r=10 values have been ## removed from the sample. # rval = function(y){ # stresid = abs(y - mean(y))/sd(y) # df = data.frame(y, stresid) # r = max(df$stresid) # list(r, df) # } for (i in 1:n_max){ if(i==1){ rt = rval(attribute) R[i] = unlist(rt[1]) df = data.frame(rt[2]) newdf = df[df$stresid!=max(df$stresid, na.rm=T),]} else if(i!=1){ rt = rval(newdf$y) R[i] = unlist(rt[1]) df = data.frame(rt[2]) newdf = df[df$stresid!=max(df$stresid, na.rm=T),]} ## Compute critical value. p = 1 - alpha/(2*(n-i+1)) # p-value corrected for multiple tests t = qt(p,(n-i-1)) # critical value of students-t-distribution lambdaCrit[i] = t*(n-i) / sqrt((n-i-1+t**2)*(n-i+1)) # } ## Print results. resid_y <- data.frame(rval(attribute)[2]) sortresid_y <- resid_y[order(resid_y$stresid, decreasing=T),] res <- which(lambdaCrit < R) if (length(res)!=0) { test <- sortresid_y[1:res,1] resESD$stnd <- sd(subset(attribute, !attribute %in% c(test)) , na.rm=TRUE) resESD$av <- mean(subset(attribute, !attribute %in% c(test)), na.rm = TRUE) resESD$which <- res resESD$Crit <- sortresid_y[resESD$which,2]*resESD$stnd } else { resESD$stnd <- sd(attribute, na.rm=TRUE) resESD$av <- mean(attribute, na.rm = TRUE) resESD$which <- NA resESD$Crit <- NA } newdf = data.frame(c(1:n_max), sortresid_y$y[1:n_max] ,R,lambdaCrit) if (!is.na(resESD$Crit)) { print(paste("sd:", resESD$stnd, "mean:", resESD$av )) } else { print(paste("sd:", resESD$stnd, "mean:", resESD$av , " no oulier")) } cat(colnames(dat_set)[my_i], "\n Last Test Stat. > Critical Val. indicates number of suspected outliers\n") names(newdf)=c("No. Outliers","Resid.sorted_y" ,"Test Stat.", "Critical Val.") print(newdf) if (length(res) == 0) { cat(colnames(dat_set)[my_i],"\n number of suspected outliers: 0 \n") } else { cat(colnames(dat_set)[my_i],"\n number of suspected outliers:", max(res), "\n ") sort_result <- sortresid_y[1:max(res),] colnames(sort_result) <- c("value", "z-score") print(sort_result) cat("\n\n\n ") } }) # render print output[[paste(sprintf("param%i",my_i),"ESDplot",sep="_")]] <- renderPlot({ dat_set <- Dat$datacols colm <- dat_set[,my_i] attribute <- as.numeric(colm) dat_set[,my_i] <- attribute if (!is.na(resESD$Crit)) { p <- ggplot(dat_set, aes(x=seq(1, nrow(dat_set)), y=dat_set[,my_i])) + geom_point() + # geom_hline(yintercept=resESD$av+resESD$Crit, colour="red") + # geom_text(data=data.frame(x=0,y=resESD$av+resESD$Crit), aes(x, y), label="Gcrit", vjust=0)+ # geom_hline(yintercept=resESD$av-resESD$Crit, colour="red") + geom_hline(yintercept=resESD$av+3*resESD$stnd, colour="darkorange") + geom_text(data=data.frame(x=0,y=resESD$av+3*resESD$stnd), aes(x, y), label="+3s", vjust=0)+ geom_hline(yintercept=resESD$av-3*resESD$stnd, colour="darkorange") + geom_hline(yintercept=resESD$av+2*resESD$stnd, colour="orange") + geom_text(data=data.frame(x=0,y=resESD$av+2*resESD$stnd), aes(x, y), label="+2s", vjust=0)+ geom_hline(yintercept=resESD$av-2*resESD$stnd, colour="orange") + geom_hline(yintercept=resESD$av, colour="blue") + labs(x="index", y=columnnames[my_i]) + theme_minimal() print(p) } else { p <- ggplot(dat_set, aes(x=seq(1, nrow(dat_set)), y=dat_set[,my_i])) + geom_point() + geom_hline(yintercept=resESD$av, colour="blue") + geom_hline(yintercept=resESD$av+3*resESD$stnd, colour="darkorange") + geom_text(data=data.frame(x=0,y=resESD$av+3*resESD$stnd), aes(x, y), label="+3s", vjust=0)+ geom_hline(yintercept=resESD$av-3*resESD$stnd, colour="darkorange") + geom_hline(yintercept=resESD$av+2*resESD$stnd, colour="orange") + geom_text(data=data.frame(x=0,y=resESD$av+2*resESD$stnd), aes(x, y), label="+2s", vjust=0)+ geom_hline(yintercept=resESD$av-2*resESD$stnd, colour="orange") + labs(x="index", y=columnnames[my_i]) + theme_minimal() print(p) } }) # render plot } # if input != 0 }) # local myTabs[[iparam]] <- tabPanel(param, value=paste(sprintf("param%i",iparam),"_ESD",sep=""), tabsetPanel( tabPanel( #textOutput('txttest'), plotOutput(paste(sprintf("param%i",iparam),"ESDplot",sep="_"),width="500px", height="250px"), verbatimTextOutput(paste(sprintf("param%i",iparam),"ESD",sep="_")), tags$head(tags$style(HTML(" #printout { font-size: 6px; } "))), tags$br() #, )#, ) ) } # for iparam loop do.call(tabsetPanel, myTabs) }) # renderUI } # Run the application shinyApp(ui = ui, server = server)