From 41772c904bc56bb7d5a58bf291eced3cb17e64a8 Mon Sep 17 00:00:00 2001 From: Franz Innerbichler Date: Mon, 26 Jan 2026 19:51:40 +0100 Subject: [PATCH] CI file and PseudoRepFile added PLAscraper updated --- ConfidenceIntervals.R | 313 ++++++++++++++++++++++++++++++ PseudoreplicatesTrueRepsIdeal.R | 332 ++++++++++++++++++++++++++++++++ appPLAscraper.R | 76 +++++--- 3 files changed, 694 insertions(+), 27 deletions(-) create mode 100644 ConfidenceIntervals.R create mode 100644 PseudoreplicatesTrueRepsIdeal.R diff --git a/ConfidenceIntervals.R b/ConfidenceIntervals.R new file mode 100644 index 0000000..0149667 --- /dev/null +++ b/ConfidenceIntervals.R @@ -0,0 +1,313 @@ +#_______________________________________________________________________________________________________________________ +library(tidyverse) +library(gslnls) +library(ggplot2) +library(reshape2) +library(drc) + +all_data <- data.frame(Conc= c( 80000.000, 10666.667, 1422.2222, 189.62963, 25.28395, 3.37119, 0.44949, 0.05993), + ST_1 = c(414356, 402828, 405696, 510624 ,844864, 995120, 984952, 1076052), + ST_2 = c(407280, 398916, 410140, 530372, 851828, 997708, 1001036, 1069060), + ST_3 = c(371408, 424764, 397564, 498328, 808188, 910116, 995972, 974076), + REF_70_1 = c( 402432, 413852, 422292, 579012, 896164, 1029552, 1048636 ,994528), + REF_70_2 = c(401548, 406580, 429472, 560256, 895536, 976660, 945068, 1025772), + REF_70_3 = c(390632, 386176, 418596, 536832, 865956, 888640, 985340, 1053968)) + +# PLA GHZ250925nM2.docx +all_data <- data.frame(Conc= c( 80000.000, 10666.667, 1422.2222, 189.62963, 25.28395, 3.37119, 0.44949, 0.05993), + ST_1 = c(138743, 138450, 156121, 262387, 561683, 652307, 701813, 730525), + ST_2 = c(130197, 137202, 145935, 271407, 580252, 645236, 700294, 727128), + ST_3 = c(127476, 130984, 150794, 252283, 559926, 648084, 701467, 683004), + REF_70_1 = c(141259, 136659, 157568, 255747, 557256, 693976, 683018, 687692), + REF_70_2 = c(132591, 135163, 151050, 258229, 535896, 615267, 672638, 697947), + REF_70_3 = c(129988, 124818, 142684, 237172, 514545, 622616, 668689, 652239)) + +# PLA GHZ339_20251022_1MAK +all_data <- data.frame(Conc= c( 80000.000, 10666.667, 1422.2222, 189.62963, 25.28395, 3.37119, 0.44949, 0.05993), + GHZ339.01_REF_1 = c( 141179, 137438, 156331, 220605, 457358, 561383, 583467, 599980), + GHZ339.01_REF_2= c( 142449, 145681, 151674, 242837, 458059, 575359, 596014, 612299), + GHZ339.01_REF_3= c( 142608, 137862, 149732, 238484, 467783, 564427, 620924, 634909), + GHZ339DS_1 = c( 137179, 156595, 155272, 227545, 460416, 569905, 610124, 592936), + GHZ339DS_2= c( 139982, 144452, 161241, 243495, 458981, 564540, 613269, 605241), + GHZ339DS_3 = c( 153642, 145111, 158542, 229435, 458629, 556951, 612850, 614633)) + + + + +# for information: 4-pl-model evaluated with drc-package +all_l <- melt(all_data, id.vars= "Conc", variable.name="replname", value.name="readout") +all_l$logreadout <- log(all_l$readout) + +# add sample ID +all_l$sample <- c(rep(0,nrow(all_l)/2), rep(1,nrow(all_l)/2)) +all_l$isRef <- c(rep(1,nrow(all_l)/2), rep(0,nrow(all_l)/2)) +#all_l$sample <- c(rep("A",24), rep("B",24)) +all_l$log_dose <- log(all_l$Conc) +# ' Achtung: logreadout' +pot <- drm(readout ~ Conc, sample, data = all_l, fct = LL.4(names = c("b", "d", "a", "c")), + pmodels = data.frame(1, 1, 1, sample)) +potcomp <- drc::compParm(pot, "c", display = T) +confint(pot, display = T) +summary(pot) +supot <- summary(pot)$coefficients +var_log_s <- (supot[4,2]/supot[4,1])^2 # (se_b0 / b0)^2 # Approx variance of log(b) +var_log_t <- (supot[5,2]/supot[5,1])^2 #(se_b1 / b1)^2 # Approx variance of log(b) +#CoVarlog <- (CoSE/mean(c(b0,b1)))^2 +# Simplified CI for log(b1/b2) (ignoring covariance for simplicity) +se_log_ratio <- sqrt(var_log_s + var_log_t) #-2*CoVarlog) + +# log-pot +supot[4,1]/supot[5,1] +loguCI <- log(supot[4,1]) -log(supot[5,1])+qt(0.975,32)*se_log_ratio +loglCI <- log(supot[4,1]) -log(supot[5,1])-qt(0.975,32)*se_log_ratio +exp(c(loglCI,loguCI)) + +################################################################################ +# calculation from GHZ PLA printout +# 1.00903 (0.90187 1.12894) +################################################################################ + +EDcomp(pot, percVec=c(50,50), interval="delta", display=T) +EDcomp(pot, percVec=c(50,50), interval="fieller", display=T) +#Estimate Lower Upper +#0/1:50/50 1.13150 0.99098 1.27201 +#SE_ <- -(0.804-0.902186)/qt(0.975,44) +exp(potcomp[1]+qt(0.975, 43)*potcomp[2]) # = delta methode +#[1] 1.272015 +# Delta method for ratios +deltaS <- 1/supot[5,1]^2*(vcov(pot)[4,4]-2*potcomp[1,1]*vcov(pot)[4,5]+potcomp[1,1]^2*vcov(pot)[5,5]) +potcomp[1,1]+qt(0.975,48-5)*sqrt(deltaS) +potcomp[1,1]-qt(0.975,48-5)*sqrt(deltaS) +# Fieller method +EDcomp(pot, percVec=c(50,50), interval="fieller", display=T) +#Estimate Lower Upper +#0/1:50/50 1.13150 0.99848 1.28281 + +potcomp[1]+qnorm(0.975)*potcomp[2] +modelFit(pot) +# Lack-of-fit test +# +# ModelDf RSS Df F value p value +# ANOVA 32 0.050984 +# DRC model 43 0.197391 11 8.3538 0.0000 + +startlist <- list(a = min(all_l$readout), b = -1.1, cs=mean(all_l$log_dose), + d = max(all_l$readout), r = 0) +mr <- gsl_nls(fn = readout ~ a + (d - a)/(1 + exp(b*(log_dose - (cs-r*sample)))), + data = all_l, + start = startlist, algorithm = "lmaccel", + control = gsl_nls_control(xtol = 1e-6, ftol = 1e-6, gtol = 1e-6)) +s_mr <- summary(mr)$coefficients +(s_mr) +#sum(pot$predres[,2]^2) +# [1] 0.197391 + +RelPot_mr <- exp(s_mr[5,1]) # r exponentiated +print(paste("Potency restricted",RelPot_mr)) + +(POTr_CI <- exp(confint(mr, "r", method = "asymptotic"))) +# mit profile Likelihood: (POTr_CI <- exp(confint(mr, "r", method = "profile"))) +potcomp +potcomp[1]+qt(0.975,43)*potcomp[2] +potcomp[1]-qt(0.975,43)*potcomp[2] +print(POTr_CI) +# 2.5 % 97.5 % +# r 0.9994816 1.280957 # exact wie Stegmann +s_mr[5,1]+qt(0.975,43)*s_mr[5,2] +#[1] 0.2476073 +exp(s_mr[5,1]+qt(0.975,43)*s_mr[5,2]) +#[1] 1.280957 # exact wie Stegmann +exp(s_mr[5,1]-qt(0.975,43)*s_mr[5,2]) +#[1] 0.9994816 # exact wie Stegmann + +plot(all_l$log_dose, all_l$readout) + +# weighting + +w <- aggregate(all_l$readout, by=list(all_l$log_dose), FUN=mean) + +splt <- split(all_l, f=c(all_l$log_dose)) + +Res <- sapply(1:8, function(x) splt[[x]]['readout']-w[,2][x]) +SD <- aggregate(all_l$readout, by=list(all_l$log_dose), FUN=sd) + +ResW <- sapply(1:8, function(x) Res[[x]] / SD[,2][x]) + + +# PLA does not do weighting +# mr <- gsl_nls(fn = readout ~ a + (d - a)/(1 + exp(b*(log_dose - (cs-r*sample)))), +# data = all_l, +# start = startlist, weights=c(rep(SD$x/100,6)), +# control = gsl_nls_control(xtol = 1e-6, ftol = 1e-6, gtol = 1e-6)) +# s_mr <- summary(mr)$coefficients +# (s_mr) +# exp(s_mr[5,1]) + + +library(calculus) +jac <- calculus::jacobian(f="a+(d-a)/(1+exp(b*(c-r*sample-x)))" , var=c("d","b","c","a","r")) + +resjac <- matrix(NA,nrow=48, ncol=5) +for (n in 1:nrow(all_l)) { + + Vec <- c(a=s_mr[4,1], d=s_mr[1,1], c=s_mr[3,1], b=s_mr[2,1], r=s_mr[5,1], sample=all_l$sample[n], x=all_l$log_dose[n]) # + resjac[n,] <- evaluate(jac, var=Vec) + +} +identical(resjac, mr$m$gradient()) # eigentlich gleich, aber col3 und 4 vertauscht; +resjac- mr$m$gradient() + +#_______________________________________________________________________________ +colnames(resjac) <- c("dd", "db", "ddc", "da", "dr") + +V_V <- solve(t(resjac)%*%resjac) +RMSE <- sqrt(sum(mr$m$resid()^2)/43) +VCOV <- V_V*RMSE^2 + +SEs <- sqrt(diag(V_V)*RMSE^2) # stimmen mit summary(mr)$coefficients ueberein!! +SEs2 <- sqrt(diag(vcov(mr))) +#_______________________________________________________________________________ + +FitAnova <- anova(lm(readout ~ factor(Conc)*sample, all_l)) +# Pure error +pureSS <- FitAnova[4,2] +pureSS_df <- FitAnova[4,1] +meanPureErr <- FitAnova[4,3] +SEsPure <- sqrt(diag(V_V)*meanPureErr) + +# PLA software: 1.0252 - 1.4242 +exp(s_mr[5,1]+qt(0.975,43)*SEsPure[5]) +# dr +# 1.422001 +exp(s_mr[5,1]-qt(0.975,43)*SEsPure[5]) +# dr +# 1.027014 + +# Was PLA macht: 16 df wegen 8+8 Mittelwerte +exp(s_mr[5,1]+qt(0.975,48-16)*SEsPure[5]) +exp(s_mr[5,1]-qt(0.975,48-16)*SEsPure[5]) + +# rel CIs +exp(s_mr[5,1]+qt(0.975,48-16)*SEsPure[5])/exp(s_mr[5,1]) +exp(s_mr[5,1]-qt(0.975,48-16)*SEsPure[5])/exp(s_mr[5,1]) + + +CORro <- cor(all_l$log_dose[1:8], all_l$readout[1:8]) +if (CORro<0) B <- -1 else B <- 1 +startlstmu <- list(as = min(all_l$readout), bs = B, cs = mean(all_l$log_dose), ds = max(all_l$readout), + at = min(all_l$readout), bt = B, ct = mean(all_l$log_dose), dt = max(all_l$readout)) +mu <- tryCatch({ + gsl_nls(readout ~ as*isRef + at*sample + (ds*isRef + dt*sample - as*isRef - at*sample)/ + (1 + isRef*exp(bs*(cs -log_dose)) + sample*exp(bt*(ct -log_dose))), + data = all_l, + start = startlstmu, + control = gsl_nls_control(xtol = 1e-10, ftol = 1e-10, gtol = 1e-10)) +}, error = function(msg){ + return(0) +} ) +if (length(mu)>1) { + smu_coef <- tryCatch({ + summary(mu)$coefficients + }, error = function(msg){ + return(rep(NA, 8)) + } ) +} +su <- summary(mu) +# Folgende CIs sind symmetrisch, also nicht im log berechnet +gslnls::confintd(mu, quote(dt/ds)) +gslnls::confintd(mu, quote(bt/bs), level = 0.95) +# fit lwr upr +# bt/bs 1.009691 0.8629895 1.156392 + +potU <- drm(readout ~ Conc, sample, data = all_l, fct = LL.4(names = c("b", "d", "a", "c")), + pmodels = data.frame(sample, sample, sample, sample)) + +b0 <- coef(potU)[1] +b1 <- coef(potU)[2] +vcov(potU) +se_b0 <- sqrt(vcov(potU)[1, 1]) +se_b1 <- sqrt(vcov(potU)[2, 2]) +CoSE <- sqrt(abs(vcov(potU)[1, 2])) + +# Calculate log(b) and its variance +log_b0 <- log(b0) +log_b1 <- log(b1) +var_log_b0 <- (se_b0 / b0)^2 # Approx variance of log(b) +var_log_b1 <- (se_b1 / b1)^2 # Approx variance of log(b) +CoVarlog <- (CoSE/mean(c(b0,b1)))^2 +# Simplified CI for log(b1/b2) (ignoring covariance for simplicity) +se_log_ratio <- sqrt(var_log_b0 + var_log_b1 -2*CoVarlog) +z_score <- 1.96 # For 95% CI +lower_log_ratio <- log_b1 - log_b0 - z_score * se_log_ratio +upper_log_ratio <- log_b1 - log_b0 + z_score * se_log_ratio + +# CI for the ratio itself +ci_ratio <- exp(c(lower_log_ratio, upper_log_ratio)) +print(ci_ratio) +# b:1 b:1 +# 0.8708895 1.1705629 + +# gsl_nls package +b0 <- coef(mu)[2] +b1 <- coef(mu)[6] +vcov(mu) +se_b0 <- sqrt(vcov(mu)[2, 2]) +se_b1 <- sqrt(vcov(mu)[6, 6]) +#CoSE <- sqrt(abs(vcov(mu)[6, 2])) + +# Calculate log(b) and its variance +log_b0 <- log(abs(b0)) +log_b1 <- log(abs(b1)) +var_log_b0 <- (se_b0 / b0)^2 # Approx variance of log(b) +var_log_b1 <- (se_b1 / b1)^2 # Approx variance of log(b) +#CoVarlog <- (CoSE/mean(c(b0,b1)))^2 +# Simplified CI for log(b1/b2) (ignoring covariance for simplicity) +se_log_ratio <- sqrt(var_log_b0 + var_log_b1) #-2*CoVarlog) +z_score <- 1.96 # For 95% CI +lower_log_ratio <- log_b1 - log_b0 - z_score * se_log_ratio +upper_log_ratio <- log_b1 - log_b0 + z_score * se_log_ratio + +# CI for the ratio itself +ci_ratio <- exp(c(lower_log_ratio, upper_log_ratio)) +print(ci_ratio) +# bt bt +# 0.876991 1.162469 + + +# Pure error +FitAnova <- anova(lm(readout ~ factor(Conc)*sample, all_l)) +# Pure error +pureSS <- FitAnova[4,2] +pureSS_df <- FitAnova[4,1] +meanPureErr <- FitAnova[4,3] +V_V <- vcov(mu)/su$sigma^2 +SEsPure <- sqrt(diag(V_V)*meanPureErr) +VCOVpure <- V_V*meanPureErr + +se_b0 <- sqrt(VCOVpure[2, 2]) +se_b1 <- sqrt(VCOVpure[6, 6]) +#CoSE <- sqrt(abs(vcov(mu)[6, 2])) + +# Calculate log(b) and its variance +log_b0 <- log(abs(b0)) +log_b1 <- log(abs(b1)) +var_log_b0 <- (se_b0 / b0)^2 # Approx variance of log(b) +var_log_b1 <- (se_b1 / b1)^2 # Approx variance of log(b) +#CoVarlog <- (CoSE/mean(c(b0,b1)))^2 +# Simplified CI for log(b1/b2) (ignoring covariance for simplicity) +se_log_ratio <- sqrt(var_log_b0 + var_log_b1) #-2*CoVarlog) +z_score <- 1.96 # For 95% CI + +lower_log_ratio <- log_b1 - log_b0 - qt(0.975,32)* se_log_ratio +upper_log_ratio <- log_b1 - log_b0 + qt(0.975,32) * se_log_ratio + +# ------------------------------------------------------------------------------ +ci_ratio <- exp(c(lower_log_ratio, upper_log_ratio)) +print(ci_ratio) +# BINGOOOO +# 1.009691 +# bt bt +# 0.8937963 1.1406122 +# im Vergleich: PLA printout +# 0.89354 - 1.14077, aber point est. 1.00977 + diff --git a/PseudoreplicatesTrueRepsIdeal.R b/PseudoreplicatesTrueRepsIdeal.R new file mode 100644 index 0000000..3930704 --- /dev/null +++ b/PseudoreplicatesTrueRepsIdeal.R @@ -0,0 +1,332 @@ +library(tidyverse) +library(gslnls) +library(ggplot2) +library(reshape2) +library(drc) +library(ggridges) + +library(car) + + + +# Pseudoreplication +# simulation of replication strategies + +Calc_DilRes <- function(as = 10, bs = 1, cs = -6, ds = 110, at = 10, bt = 1, r=0.01, + ct = cs, dt = 110, sd_fac=0.1, gt=1, gs=1, log_concREF, log_concTEST, # -r + THEOconc , + heteroNoise=FALSE, noDilSeries, noDils) { + + # browser() + yAxfac <- (as-ds) + log_cREF_l <- melt(log_concREF) + log_doseREF <- log_cREF_l$value + log_cTEST_l <- melt(log_concTEST) + log_doseTEST <- log_cTEST_l$value + isRef <- rep(c(1,0),1,each=length(log_concREF)) + isSample <- rep(c(0,1),1,each=length(log_concTEST)) + #um with equal sample without + ro_jit <- as*isRef + at*isSample + (ds*isRef + dt*isSample - as*isRef - at*isSample)/ + (1 + isRef*exp(bs*(cs - log_doseREF)) + isSample*exp(bt*(ct -log_doseTEST))) + + ro_jit_ <- abs(ro_jit) + all_var <- rbind(log_cREF_l, log_cTEST_l) + + ro_jit2 <- cbind(all_var, ro_jit_, rep(THEOconc, 6), isRef, isSample) + # if (noDilSeries==3) { + # colnames(ro_jit2) <- c("R_dil1","R_dil2","R_dil3","T_dil1","T_dil2","T_dil3", "log_dose") + # } else if (noDilSeries==2) { + # colnames(ro_jit2) <- c("R_dil1","R_dil2","T_dil1","T_dil2", "log_dose") + # } + + colnames(ro_jit2) <- c("dil_no","repl","trueLogConc" ,"readout","log_dose","isRef","isSample") + return(ro_jit2) +} + + +RestrMF <- function(data, M) { + + all_l <- data + #plot(all_l$log_dose, all_l$readout, col=all_l$sample) + CORro <- cor(all_l$log_dose[1:8], all_l$readout[1:8]) + if (CORro<0) B <- -1 else B <- 1 + + # browser() + startlist <- list(a = min(all_l$readout), b = B, cs=mean(all_l$log_dose), + r=0.0001, d = max(all_l$readout)) + mr <- tryCatch({ + gsl_nls(fn = readout ~ a + (d - a)/(1 + exp(b*((cs-r*isSample)-log_dose))), + data = all_l, + start = startlist, #trace=T, + control = gsl_nls_control(xtol = 1e-6, ftol = 1e-6, gtol = 1e-6)) + }, error = function(msg){ + return(0) + } ) + + if (length(mr)>1) { + s_mr_coef <- summary(mr)$coefficients + s_mr <- summary(mr) + } else { s_mr_coef <- rep(0,4); s_mr <- 0 } + + if (M == "R") { + return(s_mr_coef) + } else if (M =="CI") { + if (length(mr)==1) return(rep(0,4)) + (RelPot_mr <- exp(s_mr_coef[4,1])) # r exponentiated + + POTr_CI <- exp(confint(mr, "r", method = "asymptotic", level=0.95)) + # Pure error + #browser() + suppressWarnings(FitAnova <- anova(lm(readout ~ factor(log_dose)*isSample, all_l))) + meanPureErr <- FitAnova[4,3] + SEsPure <- sqrt(diag(vcov(mr)/s_mr$sigma^2)*meanPureErr) + + # PLA software: 1.0252 - 1.4242 + lCIpure <- exp(s_mr_coef[4,1]-qt(0.975,nrow(all_l)-16)*SEsPure[4]) + uCIpure <- exp(s_mr_coef[4,1]+qt(0.975,nrow(all_l)-16)*SEsPure[4]) + if (is.infinite(uCIpure)) uCIpure <- NA + if (is.infinite(POTr_CI[2])) POTr_CI[2] <- NA + relCIpure <- c(lCIpure/RelPot_mr, uCIpure/RelPot_mr) + CIsDF <- c(RelPot_mr,POTr_CI, lCIpure, uCIpure) + names(CIsDF) <- c("potency","lCI","uCI","lpureCI","upureCI") + return(CIsDF) + } else if (M=="U") { + if (length(mr)==1) return(rep(0,4)) + + startlstmu <- list(as = min(all_l$readout), bs = B, cs = mean(all_l$log_dose), ds = max(all_l$readout), + at = min(all_l$readout), bt = B, ct = mean(all_l$log_dose), dt = max(all_l$readout)) + mu <- tryCatch({ + gsl_nls(readout ~ as*isRef + at*isSample + (ds*isRef + dt*isSample - as*isRef - at*isSample)/ + (1 + isRef*exp(bs*(cs -log_dose)) + isSample*exp(bt*(ct -log_dose))), + data = all_l, + start = startlstmu, trace=T, + control = gsl_nls_control(xtol = 1e-10, ftol = 1e-10, gtol = 1e-10)) + }, error = function(msg){ + return(0) + } ) + if (length(mu)>1) { + smu_coef <- tryCatch({ + summary(mu)$coefficients + }, error = function(msg){ + return(rep(NA, 8)) + } ) + + #s_mu <- summary(mu) + return(smu_coef) + } else return(rep(0,8)) + + } else return() +} + + + + +Dil8F <- function(Cn1) { + CSer <- matrix(NA, nrow=8, ncol=3) + for (i in 1:8) { + Cn1 <- Cn1/dilfactor+rnorm(3,0,SD*Cn1) + CSer[i,] <- Cn1 + } + return(CSer) +} + +DilDirF <- function(OrigMenge) { + CDir <- matrix(NA, nrow=10, ncol=3) + for (DilExp in 1:10) { + C_d <- OrigMenge/dilfactor^DilExp+rnorm(3,0,SD*OrigMenge/dilfactor^DilExp) + CDir[DilExp,] <- C_d + } + return(CDir) +} + +homhetCIF <- function(AssD_) { + # Confidence level + alpha <- 0.05 + z <- qnorm(1 - alpha/2) # 1.96 for 95% CI + + ### 1. Homogeneously Weighted CI ### + mean_logRP <- mean(AssD_$logRP) + var_mean <- mean(AssD_$SE^2) / nrow(AssD_) + SE_mean <- sqrt(var_mean) + + CI_homo <- c(mean_logRP - z * SE_mean, mean_logRP + z * SE_mean) + CI_homo_exp <- exp(CI_homo) + # cat("Homogeneous weighting:\n") + # cat(round(exp(mean_logRP),3), "95% CI =", round(CI_homo_exp[1], 3), "-", round(CI_homo_exp[2], 3), "\n\n") + + ### 2. Heterogeneously Weighted CI ### + weights <- 1 / (AssD_$SE^2) + weighted_mean <- sum(weights * AssD_$logRP) / sum(weights) + var_weighted <- 1 / sum(weights) + SE_weighted <- sqrt(var_weighted) + + CI_hetero <- c(weighted_mean - z * SE_weighted, weighted_mean + z * SE_weighted) + CI_hetero_exp <- exp(CI_hetero) + res <- c(exp(mean_logRP) , CI_homo_exp, exp(weighted_mean), CI_hetero_exp ) + return(res) + + # cat("Heterogeneous weighting:\n") + # cat("Combined RP =", round(exp(weighted_mean), 3), "\n") + # cat("95% CI =", round(CI_hetero_exp[1], 6), "-", round(CI_hetero_exp[2], 6), "\n") +} + +Conc <- c(1) +for (x in 1:10) Conc <- c(Conc, (1/3^x)) + + +N <- 10000 +OrigMenge <- 1 +dilfactor <- 3 +SD <- 0.02 + +rm(CIsM_all, DevTargC3_) +for (x in 1:3) { + if (x==1) { Repl1 <- x; Repl2 <- x } # Anzahl Replikate für C2 und C3 + if (x==2) { Repl1 <- 1; Repl2 <- 3 } + if (x==3) { Repl1 <- x; Repl2 <- x } + + Flags <- vector() + SDs1 <- SDs2 <- POT <- DevTargC3 <- hhCIV <- c() + for (nTrials in 1:N) { + C2 <- OrigMenge/dilfactor+rnorm(Repl1,0,SD*OrigMenge) + C3 <- C2/dilfactor+rnorm(Repl1,0,C2*SD) + CSer <- Dil8F(C3) + + # "true" replication + C2_1 <- OrigMenge/dilfactor+rnorm(Repl2,0,SD*OrigMenge) + C3_1 <- C2_1/dilfactor+rnorm(Repl2,0,SD*C2_1) + CSer_3 <-Dil8F(C3_1) + + + # Flag <- sd(CSer)0) /N*100)) + print(paste("pure CI coverage", sum(log(te$lpureCI)<0 & log(te$upureCI)>0) /N*100)) + print(cor(te$potency, (te$C3_DIFF))) + te$logpot <- log(te$potency) + # plot(te$C3_DIFF,te$potency, pch=19, xlab="Difference at C3") + # abline(te$potency ~ te$C3_DIFF, col = "blue") + # lines(lowess(te$potency ~ te$C3_DIFF), col = "green") + scatterplot(logpot ~ C3_DIFF, data=te) +} + +plot(te$C3_DIFF,te$logpot, pch=19, xlab="Difference at C3") +abline(te$logpot ~ te$C3_DIFF, col = "blue") +lines(lowess(te$logpot ~ te$C3_DIFF), col = "green") +scatterplot(logpot ~ C3_DIFF, data=te) + +colnames(hhCIDF_) <- c("homomean", "lhomoCI", "uhomoCI","heteromean","lheteroCI","uheteroCI") +te <- cbind(CIsM_allDF, hhCIDF_) + + +p_histCI <- ggplot(CIsM_allDF, aes(x=widthCI, y=as.factor(scheme), fill=as.factor(scheme))) + + geom_density_ridges() + + theme_ridges() +p_histCI + +p_histCI2 <- ggplot(CIsM_allDF, aes(x=widthCI, y=as.factor(scheme), fill=as.factor(scheme))) + + geom_density_ridges() + + theme_ridges() +p_histCI2 + + +sum(Flags)/N*100 +mean(SDs1) +mean(SDs2) + +ro_new <- Calc_DilRes(sd_fac=0.1, log_concREF=log_concREF, + log_concTEST=log_concTEST,THEOconc = log(Conc)[4:11]) + + diff --git a/appPLAscraper.R b/appPLAscraper.R index 19db31e..dedca81 100644 --- a/appPLAscraper.R +++ b/appPLAscraper.R @@ -11,7 +11,7 @@ library(pdftools) library(stringr) library(openxlsx) library(evd) -# library(fitdistrplus) +library(fitdistrplus) RestrMF <- function(data, M) { all_l <- melt(data, id.vars="uniDoses", variable.name="replname", value.name="readout") @@ -152,7 +152,7 @@ server <- function(input, output, session) { filenames <- Dat$FNames POTlist <- FINALresponses <- SampleNamesL <- list() DilSerVec <- vector() - hide_spinner() + #hide_spinner() for (Nfile in 1:length(PDFs)) { tt = PDFs[[Nfile]] PageI <- strg <- POT <- vector(); resIND <- matrix(NA, ncol=2,nrow=3) @@ -160,6 +160,7 @@ server <- function(input, output, session) { for (Seite in 1:length(tt)) { Zeilen <- strsplit(tt[Seite], "\n") + #Zeilen <- Zeilen[Zeilen != ""] REGEXloc <- grep(pattern='Dose .alue', Zeilen[[1]]) if (length(REGEXloc)>0) { #### get Standard ---- @@ -227,16 +228,27 @@ server <- function(input, output, session) { for (x in ProdsinPDF) { ZeilenR <- strsplit(tt[resIND[x,1]], "\n")[[1]] ZeilenREF <- ZeilenR[resIND[x,2]:length(ZeilenR)] + #ZeilenREF <- ZeilenREF[ZeilenREF != ""] grepDoses <- grep("Dose .alue", ZeilenREF) Doses <- unlist(regmatches(c(ZeilenREF[grepDoses[1]], ZeilenREF[grepDoses[2]]), gregexpr("([0-9]+)\\.?([0-9]+)", c(ZeilenREF[grepDoses[1]], ZeilenREF[grepDoses[2]])))) uniDoses <- as.numeric(unique(Doses)) Ndoses <- length(uniDoses) + grepResponses <- grep(".esponse", ZeilenREF) grepMean <- grep(".ean", ZeilenREF) NoResp <- grepMean[1]-grepResponses[1] - + # test for cancelled outlier + grepAUSS <- grep("T",ZeilenREF[grepMean[1]:grepResponses[1]]) + if (length(grepAUSS)>0) { + AnzResp <- grepMean-grepResponses + AnzResp <- AnzResp[AnzResp > 0] + AnzResp2 <- c(AnzResp, DilSerVec) + AnzResp3 <- sort(table(AnzResp2),decreasing = T)[1] + NoResp <- as.numeric(names(AnzResp3)) + } + IND <- c(grepDoses) StepIX <- IND+1 INDresp1 <- grepResponses @@ -250,7 +262,7 @@ server <- function(input, output, session) { } Step <- unlist(regmatches(ZeilenREF[StepIX[1]], gregexpr("([0-9]+)", ZeilenREF[StepIX[1]]))) maxStep <- max(as.numeric(Step)) - + browser() resStep <- Ndoses-maxStep if (maxStep== (Ndoses-1)) { ref_data <- data.frame(c(Responses[[1]][1:maxStep], Responses[[2]][1]), @@ -269,7 +281,7 @@ server <- function(input, output, session) { if (x==1) colnames(ref_data) <- c(paste0(REF, "_1"), paste0(REF, "_2"), paste0(REF, "_3")) if (x==2) colnames(ref_data) <- c(paste0(TEST1, "_1"), paste0(TEST1, "_2"), paste0(TEST1, "_3")) if (x==3) colnames(ref_data) <- c(paste0(TEST2, "_1"), paste0(TEST2, "_2"), paste0(TEST2, "_3")) - + if (!exists("ALLE3")) { ALLE3 <- ref_data } else { ALLE3 <- cbind(ALLE3, ref_data) } @@ -326,7 +338,7 @@ server <- function(input, output, session) { PotM1_ <- as.matrix(PotM1) colnames(PotM1_) <- names(PotM1) - PotM1 <- cbind(rep(filenames[pdfInd], nrow(PotM1_)), PotM1_) + PotM1_ <- cbind(rep(filenames[pdfInd], nrow(PotM1_)), PotM1_) CalcPot[[pdfInd]] <- PotM1_ if (!exists("CalcPotDF")) CalcPotDF <- PotM1_ else CalcPotDF <- rbind(CalcPotDF, PotM1_) @@ -367,7 +379,7 @@ server <- function(input, output, session) { if (!exists("SIGtestDF")) { if (exists("dfPlotsigTest2")) SIGtestDF <- rbind(dfPlotsigTest1, dfPlotsigTest2) else SIGtestDF <- dfPlotsigTest1 } else { - if (exists("dfPlotsigTest2")) SIGtestDF <- rbind(SIGtestDF,dfPlotsigTest1, dfPlotsigTest2) else SIGtestDF <- rbind(dfPlotsigTest1, dfPlotsigTest2) + if (exists("dfPlotsigTest2")) SIGtestDF <- rbind(SIGtestDF,dfPlotsigTest1, dfPlotsigTest2) else SIGtestDF <- rbind(SIGtestDF, dfPlotsigTest1) } } # pdfInd @@ -391,7 +403,7 @@ server <- function(input, output, session) { x_UA <- max(X); x_LA <- min(X) } else { x_UA <- min(X); x_LA <- max(X) } - p1 <- ggplot(SIGrefDF, aes(x_X, y=sigRef, col=as.factor(Prod))) + + p1 <- ggplot(SIGrefDF, aes(x=X, y=sigRef, col=as.factor(Prod))) + geom_line() + annotate("text", label="x", x=x_UA, y=UasREF, alpha=0.2) + annotate("text", label="o", x=x_LA, y=LasREF, alpha=0.2) + @@ -406,7 +418,7 @@ server <- function(input, output, session) { PLOTS$sigPlotREF <- p1 - p2 <- ggplot(SIGtestDF, aes(x_X, y=sigTest, col=as.factor(Prod))) + + p2 <- ggplot(SIGtestDF, aes(x=X, y=sigTest, col=as.factor(Prod))) + geom_line() + #annotate("text", label="x", x=x_UA, y=UasREF, alpha=0.2) + #annotate("text", label="o", x=x_LA, y=LasREF, alpha=0.2) + @@ -438,7 +450,7 @@ server <- function(input, output, session) { #### Calculated Potencies table ---- - colnames(CalcPotDF) <- c("pdf","rel. potency","lowerCI","upperCI","lower CI puerErr","upper CI pureErr") + colnames(CalcPotDF) <- c("pdf","rel. potency","lowerCI","upperCI","lowerCI pureErr","upperCI pureErr") SampleNamesL <- Dat$SampleNamesL SampleNames <- unlist(SampleNamesL) if (length(SampleNames)/length(SampleNamesL) == 3) { @@ -459,14 +471,14 @@ server <- function(input, output, session) { output$CalcPotDF <- renderTable({ CalcPotDF2 }) #### CI plots ---- CalcPotDF_ <- CalcPotDF[,c(1,7,8)] - all_lPot <- melt(as.data.frame(CalcPotDF_, id.vars="pdf",variable.name="var", value.name="Climit")) + all_lPot <- melt(as.data.frame(CalcPotDF_), id.vars="pdf",variable.name="var", value.name="Climit") all_lPot[,3] <- as.numeric(all_lPot[,3]) all_lPot[,3][all_lPot[,3] > 5] <- NA all_lPot[,3][all_lPot[,3] < 0.1] <- NA P_histCI <- ggplot(all_lPot, aes(x=Climit, fill=var)) + geom_histogram(color="#e9ecef", alpha=0.6, position = "identity") + - scale-fill-manual(values=c("darkgreen","darkblue","salmon2","tomato3")) + + scale_fill_manual(values=c("darkgreen","darkblue","salmon2","tomato3")) + ggtitle("Histogram of relative potencies, standard RMSEs") + scale_x_continuous( breaks=seq(trunc(min(all_lPot$Climit, na.rm=T)*10)/10, max(all_lPot$Climit, na.rm=T)*1.1, by=0.2), @@ -480,14 +492,14 @@ server <- function(input, output, session) { CalcPotDF_Pure <- CalcPotDF[,c(1,9,10)] - all_lPotPure <- melt(as.data.frame(CalcPotDF_Pure, id.vars="pdf",variable.name="var", value.name="Climit")) + all_lPotPure <- melt(as.data.frame(CalcPotDF_Pure), id.vars="pdf",variable.name="var", value.name="Climit") all_lPotPure[,3][all_lPotPure[,3] == 0] <- NA all_lPotPure[,3][all_lPotPure[,3] > 5] <- NA all_lPotPure[,3][all_lPotPure[,3] < 0.1] <- NA P_histCIPure <- ggplot(all_lPotPure, aes(x=Climit, fill=var)) + geom_histogram(color="#e9ecef", alpha=0.6, position = "identity") + - scale-fill-manual(values=c("darkgreen","darkblue","salmon2","tomato3")) + + scale_fill_manual(values=c("darkgreen","darkblue","salmon2","tomato3")) + ggtitle("Histogram of relative potencies, pure error") + scale_x_continuous( breaks=seq(trunc(min(all_lPotPure$Climit, na.rm=T)*10)/10, max(all_lPotPure$Climit, na.rm=T)*1.1, by=0.2), @@ -514,23 +526,33 @@ server <- function(input, output, session) { if (inherits(r, "try-error")) return(NA) return(r) } - uCIGumbel <- fitdistr(logCPDF_[,3], dg,start=list(loc=1,scale=1)) - lCIGumbel <- fitdistr(logCPDF_[,2], dg,start=list(loc=1,scale=1)) - Gumbel <- c(rev(dgumbel(seq(0,0.5,0.01), loc=uCIGumbel$extimate[1], scale=uCIGumbel$extimate[2])), dgumbel(seq(0,0.5,0.01), loc=uCIGumbel$extimate[1], scale=uCIGumbel$extimate[2])) - - GumbelDF <- as.data.frame(cbind(X=seq(-0.5,0.5,0.01), Gumbel)) - all_lPotPure <- melt(as.data.frame(logCPDF_), id.vars="pdf",variable.name = "var",value.name="Climit") - all_lPotPure[,3][all_lPotPure[,3] == 0] <- NA - - uEAC099 <- exp(qgumbel(0.99, uCIGumbel$estimate[1],uCIGumbel$estimate[2]))*100 - lEAC001 <- exp(-qgumbel(0.99, uCIGumbel$estimate[1],uCIGumbel$estimate[2]))*100 - uEAC095 <- exp(qgumbel(0.95, uCIGumbel$estimate[1],uCIGumbel$estimate[2]))*100 - lEAC005 <- exp(-qgumbel(0.95, uCIGumbel$estimate[1],uCIGumbel$estimate[2]))*100 + uCIGumbel <- tryCatch({MASS::fitdistr(logCPDF_[,3], dg,start=list(loc=1,scale=1)) }, + error = function(msg) { + return(-1) + }) + if (length(uCIGumbel)>1) { + #lCIGumbel <- fitdistr(logCPDF_[,2], dg,start=list(loc=1,scale=1)) + Gumbel <- c(rev(dgumbel(seq(0,0.5,0.01), loc=uCIGumbel$extimate[1], scale=uCIGumbel$extimate[2])), dgumbel(seq(0,0.5,0.01), loc=uCIGumbel$extimate[1], scale=uCIGumbel$extimate[2])) + + GumbelDF <- as.data.frame(cbind(X=seq(-0.5,0.5,0.01), Gumbel)) + all_lPotPure <- melt(as.data.frame(logCPDF_), id.vars="pdf",variable.name = "var",value.name="Climit") + all_lPotPure[,3][all_lPotPure[,3] == 0] <- NA + + uEAC099 <- exp(qgumbel(0.99, uCIGumbel$estimate[1],uCIGumbel$estimate[2]))*100 + lEAC001 <- exp(-qgumbel(0.99, uCIGumbel$estimate[1],uCIGumbel$estimate[2]))*100 + uEAC095 <- exp(qgumbel(0.95, uCIGumbel$estimate[1],uCIGumbel$estimate[2]))*100 + lEAC005 <- exp(-qgumbel(0.95, uCIGumbel$estimate[1],uCIGumbel$estimate[2]))*100 + } else { + uEAC099 <- exp(quantile(0.99, logCPDF_[,3]))*100 + lEAC001 <- exp(-quantile(0.99, logCPDF_[,3]))*100 + uEAC095 <- exp(quantile(0.95, logCPDF_[,3]))*100 + uEAC099 <- exp(-quantile(0.99, logCPDF_[,3]))*100 + } output$GumbelPlot <- renderPlot({ hist(all_lPotPure$Climit, breaks = 50, xlab="log(relative CLs)", main=paste("calculated EACs 99%:",round(lEAC001,0)), "-", round(uEAC099,0), " 95%", round(lEAC005,0), "-",round(uEAC095,0)) - lines(GumbelDF$X, GumbelDF$Gumbel) + if (length(uCIGumbel)>1) { lines(GumbelDF$X, GumbelDF$Gumbel) } })