test app hochladen

2025-11-07 20:02:19 +01:00
parent 90531a1f38
commit 92b69e698c
1 changed files with 437 additions and 0 deletions
--- a/app.R
+++ b/app.R
@@ -0,0 +1,437 @@
+library(ggplot2)                     # Load all packages
+library(openxlsx)
+library(ggpubr)
+library(shiny)
+library(shinydashboard)
+library(shinycssloaders)
+library(shinyjs)
+library(janitor)
+library(markdown)
+library(outliers)
+library(DT)
+
+rval = function(y){
+  stresid = abs(y - mean(y, na.rm=T))/sd(y, na.rm=T)
+  df = data.frame(y, stresid)
+  r = max(df$stresid, na.rm=T)
+  list(r, df)
+}
+
+# Define UI for application that draws a histogram
+ui <- dashboardPage(skin=c("blue"),
+                          
+      dashboardHeader(title=h4("Grubb's and gESD statistical outlier tests"), titleWidth = 350), 
+ 
+          # Control panel;
+          dashboardSidebar(
+          sidebarMenu(width=450,
+                      actionButton("calc", "Calculate", class = "btn-success"),
+             fileInput(inputId="iFile", label="upload EXCEL", accept = "ms-excel"),
+             uiOutput(outputId="sheet"),
+             uiOutput(outputId = "columns"),
+             helpText("Grubbs type can be 10, 11, 20"),
+             uiOutput(outputId = "Grubbstype")
+             
+           )
+         ),
+         dashboardBody(
+           tabsetPanel(id= "tsp",
+                       type = "tabs",
+                       tabPanel("data",
+                                DTOutput("dataTable")),
+                       tabPanel("Grubb's", 
+                                
+                                uiOutput("Grubbs_allcolm")
+
+                       ),
+                       tabPanel("gESD",
+                                
+                                uiOutput("ESD_allcolm")
+                                
+                                
+               )
+           )
+         )
+                       
+ )
+
+
+# Define server logic required to draw a histogram
+server <- function(input, output) {
+
+  Dat <- reactiveValues()
+  v <- reactiveValues(doPlot = FALSE) 
+  res <- reactiveValues()
+  resESD <- reactiveValues()
+  
+  observeEvent(input$calc, {
+    # 0 will be coerced to FALSE
+    # 1+ will be coerced to TRUE
+    v$doPlot <- input$calc
+  })
+  
+#browser()  
+  # read in the EXCEL file
+  observe({
+    if (!is.null(input$iFile)) {
+      inFile <- input$iFile
+      #dataread<-read.table(datafile,sep=';',dec=',',header=TRUE)
+ #browser()
+      ext <- tools::file_ext(inFile$name)
+      file.rename(inFile$datapath,paste(inFile$datapath, ".xlsx", sep=""))
+
+      t.fileLocation<-gsub('\\\\','/',paste(inFile$datapath, ext, sep="."))
+
+      sheets <- openxlsx::getSheetNames(t.fileLocation)
+      SheetList <- lapply(sheets,openxlsx::read.xlsx,xlsxFile=t.fileLocation)
+      names(SheetList) <- sheets
+
+      Dat$wb <- SheetList
+      #print(Dat$wb)
+      names(Dat$wb) <- sheets
+      Dat$sheets <- sheets
+    }
+  })
+
+
+  # select sheet
+  output$sheet <- renderUI({
+    if (!is.null(Dat$wb)) {
+      print(Dat$sheets)
+      selectInput(inputId = "sheet", label = "Select a sheet:", choices = Dat$sheets)
+    }
+  })
+ #  
+  # get x-columns
+  observe({
+    if (!is.null(Dat$wb)) {
+      if (!is.null(input$sheet)){
+        print(input$sheet)
+        dat <- Dat$wb[input$sheet] #sheet=input$sheet
+        print(colnames(dat[[1]]))
+        #print(dat)
+        output$columns <- renderUI({
+          checkboxGroupInput("columns", "Choose predictor columns (x-variables)",
+                             choices  = colnames(Dat$wb[input$sheet][[1]]))
+        })
+      }
+    }
+  })
+
+  # Grubb's type
+  observe({
+    if (!is.null(Dat$wb)) {
+      if (!is.null(input$sheet)){
+
+        output$Grubbstype <- renderUI({
+          selectInput(inputId = "Grubbstype", "Choose Grubb's type",
+                      choices  = c("10","11", "20"), selected=NULL)
+
+        })
+      }
+    }
+  })
+
+  observe({
+    if (!is.null(Dat$wb)) {
+      if (!is.null(input$sheet)){
+        x=1
+        df_all <- Dat$wb[input$sheet]
+        dtable <- data.frame(df_all[[1]])
+        output$dataTable <- DT::renderDataTable({
+          #Pull data and give it a scrub.
+          dtable
+        },
+        extensions = 'Buttons',
+        options = list(
+          "dom" = 'T<"clear">lBfrtip',
+          buttons = list('copy','excel',list(extend = 'pdf',pageSize = 'A4',
+                                             orientation = 'landscape', filename = 'tt'),'print'),
+          pageLength = 100,
+          scrollX = TRUE
+        )
+        )
+      }
+    }
+  })
+
+  #Grubb's - all columns
+  output$Grubbs_allcolm <- renderUI({
+    if (v$doPlot == FALSE) return()
+    Dat$datacols <- Dat$wb[input$sheet][[1]][input$columns]
+    
+    # if(is.null(DSI$filenam)) return (NULL)
+    
+    nparam<-ncol(Dat$datacols)
+    columnnames <- colnames(Dat$datacols)
+    #TI_resall<-TI_allparam1(DSI)
+    
+    myTabs <- list()
+    for (iparam in seq(1,nparam)) {
+      param <- columnnames[iparam]
+      df_all <- Dat$wb[input$sheet][[1]]
+      
+      ###render output objects for model choice
+      local({
+        #variables need to be initialized since otherwise values of last loop run will be used for all plots
+        my_i <- iparam
+        # PI<-param_inputs
+        # PR<-param_results
+        if (!is.null(input$columns)) {
+          
+          output[[paste(sprintf("param%i",my_i),"Grubbs",sep="_")]] <- renderPrint({
+            if (v$doPlot == FALSE) return() 
+            Grubbstype <- input$Grubbstype
+            dat_set <- Dat$datacols
+            colm <- dat_set[,my_i]
+            attribute <- as.numeric(colm)
+            dat_set[,my_i] <- attribute
+            N1 <- length(attribute)
+            res$stnd <- sd(attribute, na.rm=TRUE)
+            res$av <- mean(attribute, na.rm=TRUE)
+            nom <- qt(0.05/(2*N1),N1-2, lower.tail=FALSE)^2
+            denom <- N1-2+nom
+            res$Gcrit <- ((N1-1)/sqrt(N1))*sqrt(nom/denom)
+            res$Gcrit_unsc <- res$Gcrit*res$stnd
+            colmGrubb <- abs(attribute - res$av)/res$stnd
+            flag <- which(colmGrubb > res$Gcrit)
+            cat("\n Here the results of the NIST code of the Grubb's test:"," \n")
+            cat("  H0:	There are no outliers in the data set \n")
+            cat("  Ha:	There is exactly one outlier in the data set \n")
+            cat(" sd:", round(res$stnd,4), "mean:", round(res$av,4) , " \n")
+            cat(" Upper Gcrit:", round(res$Gcrit_unsc+res$av,4 ),"Lower Gcrit:", round(-1*(res$Gcrit_unsc - res$av),4 ))
+            cat("\n Grubb's test:", colnames(attribute)," \n")
+            cat(" which suspected outlier:", attribute[flag], "\n ") 
+            cat("____________________________________________________________ \n\n\n")
+            cat(" R test for outliers as implemented in the 'outlier' package \n")
+            cat(" Hypothesis situation based on Grubb's type \n")
+            cat("************************************************************\n")
+            print(grubbs.test(attribute,type=Grubbstype))
+            cat("************************************************************ \n")
+            
+            
+          })
+          output[[paste(sprintf("param%i",my_i),"plot",sep="_")]] <- renderPlot({   # output$grubbsplot <- renderPlot({
+            #colm2 <- data.frame(batch=reg, var=colm)
+            dat_set <- Dat$datacols
+            colm <- dat_set[,my_i]
+            attribute <- as.numeric(colm)
+            dat_set[,my_i] <- attribute
+            p <- ggplot(dat_set, aes(x=seq(1, nrow(dat_set)), y=dat_set[,my_i])) +  
+              geom_point() + 
+              geom_hline(yintercept=res$av+res$Gcrit_unsc, colour="red") +
+              geom_text(data=data.frame(x=0,y=res$av+res$Gcrit_unsc), aes(x, y), label="GCrit", vjust=0) +
+              geom_hline(yintercept=res$av-res$Gcrit_unsc, colour="red") +
+              geom_hline(yintercept=res$av+3*res$stnd, colour="darkorange") +
+              geom_text(data=data.frame(x=0,y=res$av+3*res$stnd), aes(x, y), label="+3s", vjust=0)+
+              geom_hline(yintercept=res$av-3*res$stnd, colour="darkorange") + 
+              geom_hline(yintercept=res$av+2*res$stnd, colour="orange") +
+              geom_text(data=data.frame(x=0,y=res$av+2*res$stnd), aes(x, y), label="+2s", vjust=0)+
+              geom_hline(yintercept=res$av-2*res$stnd, colour="orange") +
+              geom_hline(yintercept=res$av, colour="blue") +
+              labs(x="index", y=columnnames[my_i]) +
+              theme_minimal()
+            print(p)
+            
+          })
+        } # if
+      }) # loacl
+      
+      
+      myTabs[[iparam]] <- tabPanel(param,
+                                   value=paste(sprintf("param%i",iparam),"_Grubbs",sep=""),
+                                   
+                                   tabsetPanel(
+                                     tabPanel(
+                                       #textOutput('txttest'),
+                                       plotOutput(paste(sprintf("param%i",iparam),"plot",sep="_"),width="500px", height="250px"),
+                                       verbatimTextOutput(paste(sprintf("param%i",iparam),"Grubbs",sep="_")),
+                                       tags$head(tags$style(HTML("
+                                                            #printout {
+                                                              font-size: 8px;
+                                                            }
+                                                            "))),
+                                       tags$br() #,
+                                       
+                                       # title="model choice",
+                                       #value=paste(sprintf("param%i",iparam),"_modelchoice",sep="")
+                                     )#,
+                                     
+                                   )
+      )
+    } # iparam
+    do.call(tabsetPanel, myTabs)
+  }) # render UI
+
+  
+  #gESD - all columns separate
+  output$ESD_allcolm <- renderUI({
+    if (v$doPlot == FALSE) return()
+    Dat$datacols <- Dat$wb[input$sheet][[1]][input$columns]
+    dat_set <- Dat$datacols
+    nparam<-ncol(dat_set)
+    columnnames <- colnames(dat_set)
+    df_all <- Dat$wb[input$sheet][[1]]
+    
+    myTabs <- list()
+    for (iparam in seq(1,nparam)) {
+      param <- columnnames[iparam]
+      
+      
+      ###render output objects for model choice
+      local({
+        #variables need to be initialized since otherwise values of last loop run will be used for all plots
+        my_i <- iparam
+        
+        if (!is.null(input$columns)) {
+          
+          output[[paste(sprintf("param%i",my_i),"ESD",sep="_")]] <- renderPrint({
+            
+            if (v$doPlot == FALSE) return()
+            ## Define values and vectors.
+            colm <- dat_set[,my_i]
+            attribute <- as.numeric(colm)
+            n = length(attribute)
+            alpha = 0.05
+            n_max <- 10 # max Anzahl der Werte die abgezogen werden
+            lambdaCrit = c(1:n_max)
+            R = c(1:n_max)
+            
+            ## Compute test statistic until r=10 values have been
+            ## removed from the sample.
+            # rval = function(y){
+            #   stresid = abs(y - mean(y))/sd(y)
+            #   df = data.frame(y, stresid)
+            #   r = max(df$stresid)
+            #   list(r, df)
+            # }
+            for (i in 1:n_max){
+              
+              if(i==1){
+                rt = rval(attribute)
+                R[i] = unlist(rt[1])
+                df = data.frame(rt[2])
+                newdf = df[df$stresid!=max(df$stresid, na.rm=T),]}
+              
+              else if(i!=1){
+                rt = rval(newdf$y)
+                R[i] = unlist(rt[1])
+                df = data.frame(rt[2])
+                newdf = df[df$stresid!=max(df$stresid, na.rm=T),]}
+              
+              ## Compute critical value.
+              p = 1 - alpha/(2*(n-i+1))  # p-value corrected for multiple tests
+              t = qt(p,(n-i-1)) # critical value of students-t-distribution
+              lambdaCrit[i] = t*(n-i) / sqrt((n-i-1+t**2)*(n-i+1)) # 
+              
+            }
+            ## Print results.
+            resid_y <- data.frame(rval(attribute)[2])
+            sortresid_y <- resid_y[order(resid_y$stresid, decreasing=T),]
+            
+            res <- which(lambdaCrit < R)
+            if (length(res)!=0) {
+              test <- sortresid_y[1:res,1]
+              resESD$stnd <- sd(subset(attribute, !attribute %in%  c(test)) , na.rm=TRUE)
+              resESD$av <- mean(subset(attribute, !attribute %in%  c(test)), na.rm = TRUE)
+              resESD$which <- res
+              resESD$Crit <- sortresid_y[resESD$which,2]*resESD$stnd
+              
+            } else {
+              resESD$stnd <- sd(attribute, na.rm=TRUE)
+              resESD$av <- mean(attribute, na.rm = TRUE)
+              resESD$which <- NA
+              resESD$Crit <- NA
+            }
+            newdf = data.frame(c(1:n_max), sortresid_y$y[1:n_max] ,R,lambdaCrit)
+            if (!is.na(resESD$Crit)) {
+              print(paste("sd:", resESD$stnd, "mean:", resESD$av )) 
+            } else {
+              print(paste("sd:", resESD$stnd, "mean:", resESD$av , " no oulier"))
+            }
+            cat(colnames(dat_set)[my_i], "\n Last Test Stat. > Critical Val. indicates number of suspected outliers\n") 
+            names(newdf)=c("No. Outliers","Resid.sorted_y" ,"Test Stat.", "Critical Val.")
+            print(newdf)
+            if (length(res) == 0) { cat(colnames(dat_set)[my_i],"\n number of suspected outliers: 0 \n")
+            } else {
+              cat(colnames(dat_set)[my_i],"\n number of suspected outliers:", max(res), "\n ") 
+              sort_result <- sortresid_y[1:max(res),]
+              colnames(sort_result) <- c("value", "z-score")
+              print(sort_result) 
+              cat("\n\n\n ")
+            }
+            
+            
+          }) # render print
+          output[[paste(sprintf("param%i",my_i),"ESDplot",sep="_")]] <- renderPlot({   
+            
+            dat_set <- Dat$datacols
+            colm <- dat_set[,my_i]
+            attribute <- as.numeric(colm)
+            dat_set[,my_i] <- attribute
+            if (!is.na(resESD$Crit)) {
+              
+              p <- ggplot(dat_set, aes(x=seq(1, nrow(dat_set)), y=dat_set[,my_i])) +
+                geom_point() +
+                # geom_hline(yintercept=resESD$av+resESD$Crit, colour="red") +
+                # geom_text(data=data.frame(x=0,y=resESD$av+resESD$Crit), aes(x, y), label="Gcrit", vjust=0)+
+                # geom_hline(yintercept=resESD$av-resESD$Crit, colour="red") + 
+                geom_hline(yintercept=resESD$av+3*resESD$stnd, colour="darkorange") +
+                geom_text(data=data.frame(x=0,y=resESD$av+3*resESD$stnd), aes(x, y), label="+3s", vjust=0)+
+                geom_hline(yintercept=resESD$av-3*resESD$stnd, colour="darkorange") + 
+                geom_hline(yintercept=resESD$av+2*resESD$stnd, colour="orange") +
+                geom_text(data=data.frame(x=0,y=resESD$av+2*resESD$stnd), aes(x, y), label="+2s", vjust=0)+
+                geom_hline(yintercept=resESD$av-2*resESD$stnd, colour="orange") +
+                geom_hline(yintercept=resESD$av, colour="blue") +
+                labs(x="index", y=columnnames[my_i]) +
+                theme_minimal() 
+              print(p)
+            } else {
+              p <- ggplot(dat_set, aes(x=seq(1, nrow(dat_set)), y=dat_set[,my_i])) +
+                geom_point() +
+                geom_hline(yintercept=resESD$av, colour="blue") +
+                geom_hline(yintercept=resESD$av+3*resESD$stnd, colour="darkorange") +
+                geom_text(data=data.frame(x=0,y=resESD$av+3*resESD$stnd), aes(x, y), label="+3s", vjust=0)+
+                geom_hline(yintercept=resESD$av-3*resESD$stnd, colour="darkorange") + 
+                geom_hline(yintercept=resESD$av+2*resESD$stnd, colour="orange") +
+                geom_text(data=data.frame(x=0,y=resESD$av+2*resESD$stnd), aes(x, y), label="+2s", vjust=0)+
+                geom_hline(yintercept=resESD$av-2*resESD$stnd, colour="orange") +
+                labs(x="index", y=columnnames[my_i]) +
+                theme_minimal() 
+              print(p)
+            }
+            
+          }) # render plot
+          
+        } # if input != 0   
+        
+      }) # local
+      
+      
+      myTabs[[iparam]] <- tabPanel(param,
+                                   value=paste(sprintf("param%i",iparam),"_ESD",sep=""),
+                                   
+                                   tabsetPanel(
+                                     tabPanel(
+                                       #textOutput('txttest'),
+                                       plotOutput(paste(sprintf("param%i",iparam),"ESDplot",sep="_"),width="500px", height="250px"),
+                                       verbatimTextOutput(paste(sprintf("param%i",iparam),"ESD",sep="_")),
+                                       tags$head(tags$style(HTML("
+                                                                 #printout {
+                                                                 font-size: 6px;
+                                                                  }
+                                                                 "))),
+                                       tags$br() #,
+                                       
+                                     )#,
+                                     
+                                   )
+      )
+    } # for iparam loop
+    do.call(tabsetPanel, myTabs)
+  }) # renderUI
+  
+}
+
+# Run the application 
+shinyApp(ui = ui, server = server)