Files
plateflow/app.R

438 lines
17 KiB
R
Raw Normal View History

2025-11-07 20:02:19 +01:00
library(ggplot2) # Load all packages
library(openxlsx)
library(ggpubr)
library(shiny)
library(shinydashboard)
library(shinycssloaders)
library(shinyjs)
library(janitor)
library(markdown)
library(outliers)
library(DT)
rval = function(y){
stresid = abs(y - mean(y, na.rm=T))/sd(y, na.rm=T)
df = data.frame(y, stresid)
r = max(df$stresid, na.rm=T)
list(r, df)
}
# Define UI for application that draws a histogram
ui <- dashboardPage(skin=c("blue"),
dashboardHeader(title=h4("Grubb's and gESD statistical outlier tests"), titleWidth = 350),
# Control panel;
dashboardSidebar(
sidebarMenu(width=450,
actionButton("calc", "Calculate", class = "btn-success"),
fileInput(inputId="iFile", label="upload EXCEL", accept = "ms-excel"),
uiOutput(outputId="sheet"),
uiOutput(outputId = "columns"),
helpText("Grubbs type can be 10, 11, 20"),
uiOutput(outputId = "Grubbstype")
)
),
dashboardBody(
tabsetPanel(id= "tsp",
type = "tabs",
tabPanel("data",
DTOutput("dataTable")),
tabPanel("Grubb's",
uiOutput("Grubbs_allcolm")
),
tabPanel("gESD",
uiOutput("ESD_allcolm")
)
)
)
)
# Define server logic required to draw a histogram
server <- function(input, output) {
Dat <- reactiveValues()
v <- reactiveValues(doPlot = FALSE)
res <- reactiveValues()
resESD <- reactiveValues()
observeEvent(input$calc, {
# 0 will be coerced to FALSE
# 1+ will be coerced to TRUE
v$doPlot <- input$calc
})
#browser()
# read in the EXCEL file
observe({
if (!is.null(input$iFile)) {
inFile <- input$iFile
#dataread<-read.table(datafile,sep=';',dec=',',header=TRUE)
#browser()
ext <- tools::file_ext(inFile$name)
file.rename(inFile$datapath,paste(inFile$datapath, ".xlsx", sep=""))
t.fileLocation<-gsub('\\\\','/',paste(inFile$datapath, ext, sep="."))
sheets <- openxlsx::getSheetNames(t.fileLocation)
SheetList <- lapply(sheets,openxlsx::read.xlsx,xlsxFile=t.fileLocation)
names(SheetList) <- sheets
Dat$wb <- SheetList
#print(Dat$wb)
names(Dat$wb) <- sheets
Dat$sheets <- sheets
}
})
# select sheet
output$sheet <- renderUI({
if (!is.null(Dat$wb)) {
print(Dat$sheets)
selectInput(inputId = "sheet", label = "Select a sheet:", choices = Dat$sheets)
}
})
#
# get x-columns
observe({
if (!is.null(Dat$wb)) {
if (!is.null(input$sheet)){
print(input$sheet)
dat <- Dat$wb[input$sheet] #sheet=input$sheet
print(colnames(dat[[1]]))
#print(dat)
output$columns <- renderUI({
checkboxGroupInput("columns", "Choose predictor columns (x-variables)",
choices = colnames(Dat$wb[input$sheet][[1]]))
})
}
}
})
# Grubb's type
observe({
if (!is.null(Dat$wb)) {
if (!is.null(input$sheet)){
output$Grubbstype <- renderUI({
selectInput(inputId = "Grubbstype", "Choose Grubb's type",
choices = c("10","11", "20"), selected=NULL)
})
}
}
})
observe({
if (!is.null(Dat$wb)) {
if (!is.null(input$sheet)){
x=1
df_all <- Dat$wb[input$sheet]
dtable <- data.frame(df_all[[1]])
output$dataTable <- DT::renderDataTable({
#Pull data and give it a scrub.
dtable
},
extensions = 'Buttons',
options = list(
"dom" = 'T<"clear">lBfrtip',
buttons = list('copy','excel',list(extend = 'pdf',pageSize = 'A4',
orientation = 'landscape', filename = 'tt'),'print'),
pageLength = 100,
scrollX = TRUE
)
)
}
}
})
#Grubb's - all columns
output$Grubbs_allcolm <- renderUI({
if (v$doPlot == FALSE) return()
Dat$datacols <- Dat$wb[input$sheet][[1]][input$columns]
# if(is.null(DSI$filenam)) return (NULL)
nparam<-ncol(Dat$datacols)
columnnames <- colnames(Dat$datacols)
#TI_resall<-TI_allparam1(DSI)
myTabs <- list()
for (iparam in seq(1,nparam)) {
param <- columnnames[iparam]
df_all <- Dat$wb[input$sheet][[1]]
###render output objects for model choice
local({
#variables need to be initialized since otherwise values of last loop run will be used for all plots
my_i <- iparam
# PI<-param_inputs
# PR<-param_results
if (!is.null(input$columns)) {
output[[paste(sprintf("param%i",my_i),"Grubbs",sep="_")]] <- renderPrint({
if (v$doPlot == FALSE) return()
Grubbstype <- input$Grubbstype
dat_set <- Dat$datacols
colm <- dat_set[,my_i]
attribute <- as.numeric(colm)
dat_set[,my_i] <- attribute
N1 <- length(attribute)
res$stnd <- sd(attribute, na.rm=TRUE)
res$av <- mean(attribute, na.rm=TRUE)
nom <- qt(0.05/(2*N1),N1-2, lower.tail=FALSE)^2
denom <- N1-2+nom
res$Gcrit <- ((N1-1)/sqrt(N1))*sqrt(nom/denom)
res$Gcrit_unsc <- res$Gcrit*res$stnd
colmGrubb <- abs(attribute - res$av)/res$stnd
flag <- which(colmGrubb > res$Gcrit)
cat("\n Here the results of the NIST code of the Grubb's test:"," \n")
cat(" H0: There are no outliers in the data set \n")
cat(" Ha: There is exactly one outlier in the data set \n")
cat(" sd:", round(res$stnd,4), "mean:", round(res$av,4) , " \n")
cat(" Upper Gcrit:", round(res$Gcrit_unsc+res$av,4 ),"Lower Gcrit:", round(-1*(res$Gcrit_unsc - res$av),4 ))
cat("\n Grubb's test:", colnames(attribute)," \n")
cat(" which suspected outlier:", attribute[flag], "\n ")
cat("____________________________________________________________ \n\n\n")
cat(" R test for outliers as implemented in the 'outlier' package \n")
cat(" Hypothesis situation based on Grubb's type \n")
cat("************************************************************\n")
print(grubbs.test(attribute,type=Grubbstype))
cat("************************************************************ \n")
})
output[[paste(sprintf("param%i",my_i),"plot",sep="_")]] <- renderPlot({ # output$grubbsplot <- renderPlot({
#colm2 <- data.frame(batch=reg, var=colm)
dat_set <- Dat$datacols
colm <- dat_set[,my_i]
attribute <- as.numeric(colm)
dat_set[,my_i] <- attribute
p <- ggplot(dat_set, aes(x=seq(1, nrow(dat_set)), y=dat_set[,my_i])) +
geom_point() +
geom_hline(yintercept=res$av+res$Gcrit_unsc, colour="red") +
geom_text(data=data.frame(x=0,y=res$av+res$Gcrit_unsc), aes(x, y), label="GCrit", vjust=0) +
geom_hline(yintercept=res$av-res$Gcrit_unsc, colour="red") +
geom_hline(yintercept=res$av+3*res$stnd, colour="darkorange") +
geom_text(data=data.frame(x=0,y=res$av+3*res$stnd), aes(x, y), label="+3s", vjust=0)+
geom_hline(yintercept=res$av-3*res$stnd, colour="darkorange") +
geom_hline(yintercept=res$av+2*res$stnd, colour="orange") +
geom_text(data=data.frame(x=0,y=res$av+2*res$stnd), aes(x, y), label="+2s", vjust=0)+
geom_hline(yintercept=res$av-2*res$stnd, colour="orange") +
geom_hline(yintercept=res$av, colour="blue") +
labs(x="index", y=columnnames[my_i]) +
theme_minimal()
print(p)
})
} # if
}) # loacl
myTabs[[iparam]] <- tabPanel(param,
value=paste(sprintf("param%i",iparam),"_Grubbs",sep=""),
tabsetPanel(
tabPanel(
#textOutput('txttest'),
plotOutput(paste(sprintf("param%i",iparam),"plot",sep="_"),width="500px", height="250px"),
verbatimTextOutput(paste(sprintf("param%i",iparam),"Grubbs",sep="_")),
tags$head(tags$style(HTML("
#printout {
font-size: 8px;
}
"))),
tags$br() #,
# title="model choice",
#value=paste(sprintf("param%i",iparam),"_modelchoice",sep="")
)#,
)
)
} # iparam
do.call(tabsetPanel, myTabs)
}) # render UI
#gESD - all columns separate
output$ESD_allcolm <- renderUI({
if (v$doPlot == FALSE) return()
Dat$datacols <- Dat$wb[input$sheet][[1]][input$columns]
dat_set <- Dat$datacols
nparam<-ncol(dat_set)
columnnames <- colnames(dat_set)
df_all <- Dat$wb[input$sheet][[1]]
myTabs <- list()
for (iparam in seq(1,nparam)) {
param <- columnnames[iparam]
###render output objects for model choice
local({
#variables need to be initialized since otherwise values of last loop run will be used for all plots
my_i <- iparam
if (!is.null(input$columns)) {
output[[paste(sprintf("param%i",my_i),"ESD",sep="_")]] <- renderPrint({
if (v$doPlot == FALSE) return()
## Define values and vectors.
colm <- dat_set[,my_i]
attribute <- as.numeric(colm)
n = length(attribute)
alpha = 0.05
n_max <- 10 # max Anzahl der Werte die abgezogen werden
lambdaCrit = c(1:n_max)
R = c(1:n_max)
## Compute test statistic until r=10 values have been
## removed from the sample.
# rval = function(y){
# stresid = abs(y - mean(y))/sd(y)
# df = data.frame(y, stresid)
# r = max(df$stresid)
# list(r, df)
# }
for (i in 1:n_max){
if(i==1){
rt = rval(attribute)
R[i] = unlist(rt[1])
df = data.frame(rt[2])
newdf = df[df$stresid!=max(df$stresid, na.rm=T),]}
else if(i!=1){
rt = rval(newdf$y)
R[i] = unlist(rt[1])
df = data.frame(rt[2])
newdf = df[df$stresid!=max(df$stresid, na.rm=T),]}
## Compute critical value.
p = 1 - alpha/(2*(n-i+1)) # p-value corrected for multiple tests
t = qt(p,(n-i-1)) # critical value of students-t-distribution
lambdaCrit[i] = t*(n-i) / sqrt((n-i-1+t**2)*(n-i+1)) #
}
## Print results.
resid_y <- data.frame(rval(attribute)[2])
sortresid_y <- resid_y[order(resid_y$stresid, decreasing=T),]
res <- which(lambdaCrit < R)
if (length(res)!=0) {
test <- sortresid_y[1:res,1]
resESD$stnd <- sd(subset(attribute, !attribute %in% c(test)) , na.rm=TRUE)
resESD$av <- mean(subset(attribute, !attribute %in% c(test)), na.rm = TRUE)
resESD$which <- res
resESD$Crit <- sortresid_y[resESD$which,2]*resESD$stnd
} else {
resESD$stnd <- sd(attribute, na.rm=TRUE)
resESD$av <- mean(attribute, na.rm = TRUE)
resESD$which <- NA
resESD$Crit <- NA
}
newdf = data.frame(c(1:n_max), sortresid_y$y[1:n_max] ,R,lambdaCrit)
if (!is.na(resESD$Crit)) {
print(paste("sd:", resESD$stnd, "mean:", resESD$av ))
} else {
print(paste("sd:", resESD$stnd, "mean:", resESD$av , " no oulier"))
}
cat(colnames(dat_set)[my_i], "\n Last Test Stat. > Critical Val. indicates number of suspected outliers\n")
names(newdf)=c("No. Outliers","Resid.sorted_y" ,"Test Stat.", "Critical Val.")
print(newdf)
if (length(res) == 0) { cat(colnames(dat_set)[my_i],"\n number of suspected outliers: 0 \n")
} else {
cat(colnames(dat_set)[my_i],"\n number of suspected outliers:", max(res), "\n ")
sort_result <- sortresid_y[1:max(res),]
colnames(sort_result) <- c("value", "z-score")
print(sort_result)
cat("\n\n\n ")
}
}) # render print
output[[paste(sprintf("param%i",my_i),"ESDplot",sep="_")]] <- renderPlot({
dat_set <- Dat$datacols
colm <- dat_set[,my_i]
attribute <- as.numeric(colm)
dat_set[,my_i] <- attribute
if (!is.na(resESD$Crit)) {
p <- ggplot(dat_set, aes(x=seq(1, nrow(dat_set)), y=dat_set[,my_i])) +
geom_point() +
# geom_hline(yintercept=resESD$av+resESD$Crit, colour="red") +
# geom_text(data=data.frame(x=0,y=resESD$av+resESD$Crit), aes(x, y), label="Gcrit", vjust=0)+
# geom_hline(yintercept=resESD$av-resESD$Crit, colour="red") +
geom_hline(yintercept=resESD$av+3*resESD$stnd, colour="darkorange") +
geom_text(data=data.frame(x=0,y=resESD$av+3*resESD$stnd), aes(x, y), label="+3s", vjust=0)+
geom_hline(yintercept=resESD$av-3*resESD$stnd, colour="darkorange") +
geom_hline(yintercept=resESD$av+2*resESD$stnd, colour="orange") +
geom_text(data=data.frame(x=0,y=resESD$av+2*resESD$stnd), aes(x, y), label="+2s", vjust=0)+
geom_hline(yintercept=resESD$av-2*resESD$stnd, colour="orange") +
geom_hline(yintercept=resESD$av, colour="blue") +
labs(x="index", y=columnnames[my_i]) +
theme_minimal()
print(p)
} else {
p <- ggplot(dat_set, aes(x=seq(1, nrow(dat_set)), y=dat_set[,my_i])) +
geom_point() +
geom_hline(yintercept=resESD$av, colour="blue") +
geom_hline(yintercept=resESD$av+3*resESD$stnd, colour="darkorange") +
geom_text(data=data.frame(x=0,y=resESD$av+3*resESD$stnd), aes(x, y), label="+3s", vjust=0)+
geom_hline(yintercept=resESD$av-3*resESD$stnd, colour="darkorange") +
geom_hline(yintercept=resESD$av+2*resESD$stnd, colour="orange") +
geom_text(data=data.frame(x=0,y=resESD$av+2*resESD$stnd), aes(x, y), label="+2s", vjust=0)+
geom_hline(yintercept=resESD$av-2*resESD$stnd, colour="orange") +
labs(x="index", y=columnnames[my_i]) +
theme_minimal()
print(p)
}
}) # render plot
} # if input != 0
}) # local
myTabs[[iparam]] <- tabPanel(param,
value=paste(sprintf("param%i",iparam),"_ESD",sep=""),
tabsetPanel(
tabPanel(
#textOutput('txttest'),
plotOutput(paste(sprintf("param%i",iparam),"ESDplot",sep="_"),width="500px", height="250px"),
verbatimTextOutput(paste(sprintf("param%i",iparam),"ESD",sep="_")),
tags$head(tags$style(HTML("
#printout {
font-size: 6px;
}
"))),
tags$br() #,
)#,
)
)
} # for iparam loop
do.call(tabsetPanel, myTabs)
}) # renderUI
}
# Run the application
shinyApp(ui = ui, server = server)