From 92b69e698c2ef485a146cabb76c9972c5722dfb8 Mon Sep 17 00:00:00 2001 From: Franz Innerbichler Date: Fri, 7 Nov 2025 20:02:19 +0100 Subject: [PATCH] test app hochladen --- app.R | 437 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 437 insertions(+) create mode 100644 app.R diff --git a/app.R b/app.R new file mode 100644 index 0000000..98a084f --- /dev/null +++ b/app.R @@ -0,0 +1,437 @@ +library(ggplot2) # Load all packages +library(openxlsx) +library(ggpubr) +library(shiny) +library(shinydashboard) +library(shinycssloaders) +library(shinyjs) +library(janitor) +library(markdown) +library(outliers) +library(DT) + +rval = function(y){ + stresid = abs(y - mean(y, na.rm=T))/sd(y, na.rm=T) + df = data.frame(y, stresid) + r = max(df$stresid, na.rm=T) + list(r, df) +} + +# Define UI for application that draws a histogram +ui <- dashboardPage(skin=c("blue"), + + dashboardHeader(title=h4("Grubb's and gESD statistical outlier tests"), titleWidth = 350), + + # Control panel; + dashboardSidebar( + sidebarMenu(width=450, + actionButton("calc", "Calculate", class = "btn-success"), + fileInput(inputId="iFile", label="upload EXCEL", accept = "ms-excel"), + uiOutput(outputId="sheet"), + uiOutput(outputId = "columns"), + helpText("Grubbs type can be 10, 11, 20"), + uiOutput(outputId = "Grubbstype") + + ) + ), + dashboardBody( + tabsetPanel(id= "tsp", + type = "tabs", + tabPanel("data", + DTOutput("dataTable")), + tabPanel("Grubb's", + + uiOutput("Grubbs_allcolm") + + ), + tabPanel("gESD", + + uiOutput("ESD_allcolm") + + + ) + ) + ) + + ) + + +# Define server logic required to draw a histogram +server <- function(input, output) { + + Dat <- reactiveValues() + v <- reactiveValues(doPlot = FALSE) + res <- reactiveValues() + resESD <- reactiveValues() + + observeEvent(input$calc, { + # 0 will be coerced to FALSE + # 1+ will be coerced to TRUE + v$doPlot <- input$calc + }) + +#browser() + # read in the EXCEL file + observe({ + if (!is.null(input$iFile)) { + inFile <- input$iFile + #dataread<-read.table(datafile,sep=';',dec=',',header=TRUE) + #browser() + ext <- tools::file_ext(inFile$name) + file.rename(inFile$datapath,paste(inFile$datapath, ".xlsx", sep="")) + + t.fileLocation<-gsub('\\\\','/',paste(inFile$datapath, ext, sep=".")) + + sheets <- openxlsx::getSheetNames(t.fileLocation) + SheetList <- lapply(sheets,openxlsx::read.xlsx,xlsxFile=t.fileLocation) + names(SheetList) <- sheets + + Dat$wb <- SheetList + #print(Dat$wb) + names(Dat$wb) <- sheets + Dat$sheets <- sheets + } + }) + + + # select sheet + output$sheet <- renderUI({ + if (!is.null(Dat$wb)) { + print(Dat$sheets) + selectInput(inputId = "sheet", label = "Select a sheet:", choices = Dat$sheets) + } + }) + # + # get x-columns + observe({ + if (!is.null(Dat$wb)) { + if (!is.null(input$sheet)){ + print(input$sheet) + dat <- Dat$wb[input$sheet] #sheet=input$sheet + print(colnames(dat[[1]])) + #print(dat) + output$columns <- renderUI({ + checkboxGroupInput("columns", "Choose predictor columns (x-variables)", + choices = colnames(Dat$wb[input$sheet][[1]])) + }) + } + } + }) + + # Grubb's type + observe({ + if (!is.null(Dat$wb)) { + if (!is.null(input$sheet)){ + + output$Grubbstype <- renderUI({ + selectInput(inputId = "Grubbstype", "Choose Grubb's type", + choices = c("10","11", "20"), selected=NULL) + + }) + } + } + }) + + observe({ + if (!is.null(Dat$wb)) { + if (!is.null(input$sheet)){ + x=1 + df_all <- Dat$wb[input$sheet] + dtable <- data.frame(df_all[[1]]) + output$dataTable <- DT::renderDataTable({ + #Pull data and give it a scrub. + dtable + }, + extensions = 'Buttons', + options = list( + "dom" = 'T<"clear">lBfrtip', + buttons = list('copy','excel',list(extend = 'pdf',pageSize = 'A4', + orientation = 'landscape', filename = 'tt'),'print'), + pageLength = 100, + scrollX = TRUE + ) + ) + } + } + }) + + #Grubb's - all columns + output$Grubbs_allcolm <- renderUI({ + if (v$doPlot == FALSE) return() + Dat$datacols <- Dat$wb[input$sheet][[1]][input$columns] + + # if(is.null(DSI$filenam)) return (NULL) + + nparam<-ncol(Dat$datacols) + columnnames <- colnames(Dat$datacols) + #TI_resall<-TI_allparam1(DSI) + + myTabs <- list() + for (iparam in seq(1,nparam)) { + param <- columnnames[iparam] + df_all <- Dat$wb[input$sheet][[1]] + + ###render output objects for model choice + local({ + #variables need to be initialized since otherwise values of last loop run will be used for all plots + my_i <- iparam + # PI<-param_inputs + # PR<-param_results + if (!is.null(input$columns)) { + + output[[paste(sprintf("param%i",my_i),"Grubbs",sep="_")]] <- renderPrint({ + if (v$doPlot == FALSE) return() + Grubbstype <- input$Grubbstype + dat_set <- Dat$datacols + colm <- dat_set[,my_i] + attribute <- as.numeric(colm) + dat_set[,my_i] <- attribute + N1 <- length(attribute) + res$stnd <- sd(attribute, na.rm=TRUE) + res$av <- mean(attribute, na.rm=TRUE) + nom <- qt(0.05/(2*N1),N1-2, lower.tail=FALSE)^2 + denom <- N1-2+nom + res$Gcrit <- ((N1-1)/sqrt(N1))*sqrt(nom/denom) + res$Gcrit_unsc <- res$Gcrit*res$stnd + colmGrubb <- abs(attribute - res$av)/res$stnd + flag <- which(colmGrubb > res$Gcrit) + cat("\n Here the results of the NIST code of the Grubb's test:"," \n") + cat(" H0: There are no outliers in the data set \n") + cat(" Ha: There is exactly one outlier in the data set \n") + cat(" sd:", round(res$stnd,4), "mean:", round(res$av,4) , " \n") + cat(" Upper Gcrit:", round(res$Gcrit_unsc+res$av,4 ),"Lower Gcrit:", round(-1*(res$Gcrit_unsc - res$av),4 )) + cat("\n Grubb's test:", colnames(attribute)," \n") + cat(" which suspected outlier:", attribute[flag], "\n ") + cat("____________________________________________________________ \n\n\n") + cat(" R test for outliers as implemented in the 'outlier' package \n") + cat(" Hypothesis situation based on Grubb's type \n") + cat("************************************************************\n") + print(grubbs.test(attribute,type=Grubbstype)) + cat("************************************************************ \n") + + + }) + output[[paste(sprintf("param%i",my_i),"plot",sep="_")]] <- renderPlot({ # output$grubbsplot <- renderPlot({ + #colm2 <- data.frame(batch=reg, var=colm) + dat_set <- Dat$datacols + colm <- dat_set[,my_i] + attribute <- as.numeric(colm) + dat_set[,my_i] <- attribute + p <- ggplot(dat_set, aes(x=seq(1, nrow(dat_set)), y=dat_set[,my_i])) + + geom_point() + + geom_hline(yintercept=res$av+res$Gcrit_unsc, colour="red") + + geom_text(data=data.frame(x=0,y=res$av+res$Gcrit_unsc), aes(x, y), label="GCrit", vjust=0) + + geom_hline(yintercept=res$av-res$Gcrit_unsc, colour="red") + + geom_hline(yintercept=res$av+3*res$stnd, colour="darkorange") + + geom_text(data=data.frame(x=0,y=res$av+3*res$stnd), aes(x, y), label="+3s", vjust=0)+ + geom_hline(yintercept=res$av-3*res$stnd, colour="darkorange") + + geom_hline(yintercept=res$av+2*res$stnd, colour="orange") + + geom_text(data=data.frame(x=0,y=res$av+2*res$stnd), aes(x, y), label="+2s", vjust=0)+ + geom_hline(yintercept=res$av-2*res$stnd, colour="orange") + + geom_hline(yintercept=res$av, colour="blue") + + labs(x="index", y=columnnames[my_i]) + + theme_minimal() + print(p) + + }) + } # if + }) # loacl + + + myTabs[[iparam]] <- tabPanel(param, + value=paste(sprintf("param%i",iparam),"_Grubbs",sep=""), + + tabsetPanel( + tabPanel( + #textOutput('txttest'), + plotOutput(paste(sprintf("param%i",iparam),"plot",sep="_"),width="500px", height="250px"), + verbatimTextOutput(paste(sprintf("param%i",iparam),"Grubbs",sep="_")), + tags$head(tags$style(HTML(" + #printout { + font-size: 8px; + } + "))), + tags$br() #, + + # title="model choice", + #value=paste(sprintf("param%i",iparam),"_modelchoice",sep="") + )#, + + ) + ) + } # iparam + do.call(tabsetPanel, myTabs) + }) # render UI + + + #gESD - all columns separate + output$ESD_allcolm <- renderUI({ + if (v$doPlot == FALSE) return() + Dat$datacols <- Dat$wb[input$sheet][[1]][input$columns] + dat_set <- Dat$datacols + nparam<-ncol(dat_set) + columnnames <- colnames(dat_set) + df_all <- Dat$wb[input$sheet][[1]] + + myTabs <- list() + for (iparam in seq(1,nparam)) { + param <- columnnames[iparam] + + + ###render output objects for model choice + local({ + #variables need to be initialized since otherwise values of last loop run will be used for all plots + my_i <- iparam + + if (!is.null(input$columns)) { + + output[[paste(sprintf("param%i",my_i),"ESD",sep="_")]] <- renderPrint({ + + if (v$doPlot == FALSE) return() + ## Define values and vectors. + colm <- dat_set[,my_i] + attribute <- as.numeric(colm) + n = length(attribute) + alpha = 0.05 + n_max <- 10 # max Anzahl der Werte die abgezogen werden + lambdaCrit = c(1:n_max) + R = c(1:n_max) + + ## Compute test statistic until r=10 values have been + ## removed from the sample. + # rval = function(y){ + # stresid = abs(y - mean(y))/sd(y) + # df = data.frame(y, stresid) + # r = max(df$stresid) + # list(r, df) + # } + for (i in 1:n_max){ + + if(i==1){ + rt = rval(attribute) + R[i] = unlist(rt[1]) + df = data.frame(rt[2]) + newdf = df[df$stresid!=max(df$stresid, na.rm=T),]} + + else if(i!=1){ + rt = rval(newdf$y) + R[i] = unlist(rt[1]) + df = data.frame(rt[2]) + newdf = df[df$stresid!=max(df$stresid, na.rm=T),]} + + ## Compute critical value. + p = 1 - alpha/(2*(n-i+1)) # p-value corrected for multiple tests + t = qt(p,(n-i-1)) # critical value of students-t-distribution + lambdaCrit[i] = t*(n-i) / sqrt((n-i-1+t**2)*(n-i+1)) # + + } + ## Print results. + resid_y <- data.frame(rval(attribute)[2]) + sortresid_y <- resid_y[order(resid_y$stresid, decreasing=T),] + + res <- which(lambdaCrit < R) + if (length(res)!=0) { + test <- sortresid_y[1:res,1] + resESD$stnd <- sd(subset(attribute, !attribute %in% c(test)) , na.rm=TRUE) + resESD$av <- mean(subset(attribute, !attribute %in% c(test)), na.rm = TRUE) + resESD$which <- res + resESD$Crit <- sortresid_y[resESD$which,2]*resESD$stnd + + } else { + resESD$stnd <- sd(attribute, na.rm=TRUE) + resESD$av <- mean(attribute, na.rm = TRUE) + resESD$which <- NA + resESD$Crit <- NA + } + newdf = data.frame(c(1:n_max), sortresid_y$y[1:n_max] ,R,lambdaCrit) + if (!is.na(resESD$Crit)) { + print(paste("sd:", resESD$stnd, "mean:", resESD$av )) + } else { + print(paste("sd:", resESD$stnd, "mean:", resESD$av , " no oulier")) + } + cat(colnames(dat_set)[my_i], "\n Last Test Stat. > Critical Val. indicates number of suspected outliers\n") + names(newdf)=c("No. Outliers","Resid.sorted_y" ,"Test Stat.", "Critical Val.") + print(newdf) + if (length(res) == 0) { cat(colnames(dat_set)[my_i],"\n number of suspected outliers: 0 \n") + } else { + cat(colnames(dat_set)[my_i],"\n number of suspected outliers:", max(res), "\n ") + sort_result <- sortresid_y[1:max(res),] + colnames(sort_result) <- c("value", "z-score") + print(sort_result) + cat("\n\n\n ") + } + + + }) # render print + output[[paste(sprintf("param%i",my_i),"ESDplot",sep="_")]] <- renderPlot({ + + dat_set <- Dat$datacols + colm <- dat_set[,my_i] + attribute <- as.numeric(colm) + dat_set[,my_i] <- attribute + if (!is.na(resESD$Crit)) { + + p <- ggplot(dat_set, aes(x=seq(1, nrow(dat_set)), y=dat_set[,my_i])) + + geom_point() + + # geom_hline(yintercept=resESD$av+resESD$Crit, colour="red") + + # geom_text(data=data.frame(x=0,y=resESD$av+resESD$Crit), aes(x, y), label="Gcrit", vjust=0)+ + # geom_hline(yintercept=resESD$av-resESD$Crit, colour="red") + + geom_hline(yintercept=resESD$av+3*resESD$stnd, colour="darkorange") + + geom_text(data=data.frame(x=0,y=resESD$av+3*resESD$stnd), aes(x, y), label="+3s", vjust=0)+ + geom_hline(yintercept=resESD$av-3*resESD$stnd, colour="darkorange") + + geom_hline(yintercept=resESD$av+2*resESD$stnd, colour="orange") + + geom_text(data=data.frame(x=0,y=resESD$av+2*resESD$stnd), aes(x, y), label="+2s", vjust=0)+ + geom_hline(yintercept=resESD$av-2*resESD$stnd, colour="orange") + + geom_hline(yintercept=resESD$av, colour="blue") + + labs(x="index", y=columnnames[my_i]) + + theme_minimal() + print(p) + } else { + p <- ggplot(dat_set, aes(x=seq(1, nrow(dat_set)), y=dat_set[,my_i])) + + geom_point() + + geom_hline(yintercept=resESD$av, colour="blue") + + geom_hline(yintercept=resESD$av+3*resESD$stnd, colour="darkorange") + + geom_text(data=data.frame(x=0,y=resESD$av+3*resESD$stnd), aes(x, y), label="+3s", vjust=0)+ + geom_hline(yintercept=resESD$av-3*resESD$stnd, colour="darkorange") + + geom_hline(yintercept=resESD$av+2*resESD$stnd, colour="orange") + + geom_text(data=data.frame(x=0,y=resESD$av+2*resESD$stnd), aes(x, y), label="+2s", vjust=0)+ + geom_hline(yintercept=resESD$av-2*resESD$stnd, colour="orange") + + labs(x="index", y=columnnames[my_i]) + + theme_minimal() + print(p) + } + + }) # render plot + + } # if input != 0 + + }) # local + + + myTabs[[iparam]] <- tabPanel(param, + value=paste(sprintf("param%i",iparam),"_ESD",sep=""), + + tabsetPanel( + tabPanel( + #textOutput('txttest'), + plotOutput(paste(sprintf("param%i",iparam),"ESDplot",sep="_"),width="500px", height="250px"), + verbatimTextOutput(paste(sprintf("param%i",iparam),"ESD",sep="_")), + tags$head(tags$style(HTML(" + #printout { + font-size: 6px; + } + "))), + tags$br() #, + + )#, + + ) + ) + } # for iparam loop + do.call(tabsetPanel, myTabs) + }) # renderUI + +} + +# Run the application +shinyApp(ui = ui, server = server)