options(echo = FALSE) ########################################################################## ## ## Program to simulate bias from inverse-variance weighted averaging ## of Poisson distributed random deviates. ## ## F. J. Masci, 2 / 4 / 2008 ## ## Execute: /usr/bin/R -q --no-restore --no-save < PoissonBias.r ## ## N.B., Poisson density is derived using Loader's algorithm: ## http://www.herine.net/stat/papers/dbinom.pdf ## Also: Ahrens, J. H. and Dieter, U. (1982). Computer generation of Poisson ## deviates from modified normal distributions. ACM Transactions on ## Mathematical Software, 8, 163-179. ## ########################################################################## # Vector of true expectation values (means): expcounts <- c(10,30,50,100,300,500,1000,3000,5000,10000,15000,20000,30000); # Number of random values to draw from each Poisson distribution defined # by mean values in vector "expcounts": N <- 10000000; # Only retain random values within +/- T*sigma of the true expectation value # for inclusion in the inverse-variance weighted averaging: T <- 3; # Remember to change name and title of plot below too. #------------------------------------------------------------------------ # In following construction, the confidence interval (total probability) # is kept asymmetric about the mean. This is due to the non-zero skewness # in the Poisson distribution. randnew <- 0; invmean <- 0; invmeanunc <- 0; pbias_asym <- 0; write(paste("\n======== Prob. Asymmetric ========"), file=""); for( i in 1:length(expcounts) ) { rand <- rpois(N, expcounts[[i]]); randnew <- rand[(rand >= (expcounts[[i]] - T*sqrt(expcounts[[i]]))) & (rand <= (expcounts[[i]] + T*sqrt(expcounts[[i]])))]; # Inverse-Poisson-Variance weighted mean of all values within +/- T*sigma # of the expected mean - N.B: not really optimal in the Poisson regime, # but let's assume it's blindly applied no matter how small expcounts. invmean[[i]] <- length(randnew) / sum(1/randnew); # Uncertainty in invmean: invmeanunc[[i]] <- sqrt(1 / sum(1/randnew)); # Bias relative to (true) expected mean in %: pbias_asym[[i]] = 100 * (invmean[[i]] - expcounts[[i]]) / expcounts[[i]]; formatstring <- sprintf(" %d %12.6f %10.6f %5.3f %5.3f", expcounts[[i]], invmean[[i]], pbias_asym[[i]], T, T); write(formatstring, file=""); } #------------------------------------------------------------------------ # In the following construction, the confidence interval (total probability) # is made symmetric about the mean. This is done by first selecting an # upper threshold T (as defined above), then finding the lower # threshold S such that: Pr[mu-S*sigma --> mu] = Pr[mu --> mu+T*sigma] # Only random draws within the new interval mu-S*sigma --> mu+T*sigma # are considered for weighted averaging. S <- 0; randnew <- 0; invmean <- 0; invmeanunc <- 0; pbias_sym <- 0; write(paste("\n======== Prob. Symmetric ========"), file=""); for( i in 1:length(expcounts) ) { rand <- rpois(N, expcounts[[i]]); # Lower threshold such that total probability balanced about mean: S[[i]] <- (expcounts[[i]] - qpois((2*ppois(expcounts[[i]],expcounts[[i]])- ppois((expcounts[[i]]+T*sqrt(expcounts[[i]])),expcounts[[i]])), expcounts[[i]]))/sqrt(expcounts[[i]]); randnew <- rand[(rand >= (expcounts[[i]] - S[[i]]*sqrt(expcounts[[i]]))) & (rand <= (expcounts[[i]] + T*sqrt(expcounts[[i]])))]; # Inverse-Poisson-Variance weighted mean of all values within # mean - S*sigma to mean + T*sigma. N.B: not really optimal in # the Poisson regime, but let's assume it's blindly applied # no matter how small expcounts. invmean[[i]] <- length(randnew) / sum(1/randnew); # Uncertainty in invmean: invmeanunc[[i]] <- sqrt(1 / sum(1/randnew)); # Bias relative to (true) expected mean in %: pbias_sym[[i]] = 100 * (invmean[[i]] - expcounts[[i]]) / expcounts[[i]]; formatstring <- sprintf(" %d %12.6f %10.6f %5.3f %5.3f", expcounts[[i]], invmean[[i]], pbias_sym[[i]], S[[i]], T, invmeanunc[[i]]); write(formatstring, file=""); } #------------------------------------------------------------------------ # Plot Bias vs. true expectation value for asymmetric and symmetric # probability cases: pdf(file="3sigma.pdf", pointsize=12); plot(log10(expcounts), pbias_asym, type="l", main=" +/- 3sigma fluctuations", xlab="Log10[ ]", ylab="Bias (%)", col="blue", lty=1, lwd=1, xlim=c(1,4.5), ylim=c(-5,1)); lines(log10(expcounts), pbias_sym, col="red", lty=1, lwd=1); abline(0,0); legend(2.0, -3.0, c("Asymmetric Confidence Int.", "Symmetric Confidence Int."), col = c("blue", "red"), text.col="black", lty = c(1, 1), lwd = c(1, 1), pch = c(" ", " "), merge=FALSE, bty='n'); q();