options(echo = FALSE)

##########################################################################
##
## Program to simulate bias from inverse-variance weighted averaging
## of Poisson distributed random deviates.
##
## F. J. Masci, 2 / 4 / 2008
##
## Execute: /usr/bin/R -q --no-restore --no-save < PoissonBias.r
##
## N.B., Poisson density is derived using Loader's algorithm:
## http://www.herine.net/stat/papers/dbinom.pdf
## Also: Ahrens, J. H. and Dieter, U. (1982). Computer generation of Poisson 
## deviates from modified normal distributions. ACM Transactions on
## Mathematical Software, 8, 163-179.
##
##########################################################################

# Vector of true expectation values (means):

expcounts <- c(10,30,50,100,300,500,1000,3000,5000,10000,15000,20000,30000);

# Number of random values to draw from each Poisson distribution defined
# by mean values in vector "expcounts":

N <- 10000000;

# Only retain random values within +/- T*sigma of the true expectation value
# for inclusion in the inverse-variance weighted averaging:

T <- 3;
# Remember to change name and title of plot below too.

#------------------------------------------------------------------------
# In following construction, the confidence interval (total probability)
# is kept asymmetric about the mean. This is due to the non-zero skewness
# in the Poisson distribution.

randnew <- 0;
invmean <- 0;
invmeanunc <- 0;
pbias_asym <- 0;

write(paste("\n======== Prob. Asymmetric ========"), file="");

for( i in 1:length(expcounts) ) { 

  rand <- rpois(N, expcounts[[i]]);

  randnew <- rand[(rand >= (expcounts[[i]] - T*sqrt(expcounts[[i]]))) & 
                  (rand <= (expcounts[[i]] + T*sqrt(expcounts[[i]])))];

  # Inverse-Poisson-Variance weighted mean of all values within +/- T*sigma
  # of the expected mean - N.B: not really optimal in the Poisson regime,
  # but let's assume it's blindly applied no matter how small expcounts.

  invmean[[i]] <- length(randnew) / sum(1/randnew);

  # Uncertainty in invmean:

  invmeanunc[[i]] <- sqrt(1 / sum(1/randnew));

  # Bias relative to (true) expected mean in  %:

  pbias_asym[[i]] = 100 * (invmean[[i]] - expcounts[[i]]) / expcounts[[i]]; 

  formatstring <- sprintf("   %d     %12.6f      %10.6f      %5.3f      %5.3f",
  expcounts[[i]], invmean[[i]], pbias_asym[[i]], T, T);
  write(formatstring, file="");
}


#------------------------------------------------------------------------
# In the following construction, the confidence interval (total probability)
# is made symmetric about the mean. This is done by first selecting an
# upper threshold T (as defined above), then finding the lower
# threshold S such that: Pr[mu-S*sigma --> mu] = Pr[mu --> mu+T*sigma]
# Only random draws within the new interval mu-S*sigma --> mu+T*sigma
# are considered for weighted averaging.

S <- 0;
randnew <- 0;
invmean <- 0;
invmeanunc <- 0;
pbias_sym <- 0;

write(paste("\n======== Prob. Symmetric ========"), file="");

for( i in 1:length(expcounts) ) {
 
  rand <- rpois(N, expcounts[[i]]);

  # Lower threshold such that total probability balanced about mean:

  S[[i]] <- (expcounts[[i]] - qpois((2*ppois(expcounts[[i]],expcounts[[i]])-
        ppois((expcounts[[i]]+T*sqrt(expcounts[[i]])),expcounts[[i]])),
        expcounts[[i]]))/sqrt(expcounts[[i]]);

  randnew <- rand[(rand >= (expcounts[[i]] - S[[i]]*sqrt(expcounts[[i]]))) & 
                  (rand <= (expcounts[[i]] + T*sqrt(expcounts[[i]])))];

  # Inverse-Poisson-Variance weighted mean of all values within
  # mean - S*sigma to mean + T*sigma. N.B: not really optimal in
  # the Poisson regime, but let's assume it's blindly applied 
  # no matter how small expcounts.

  invmean[[i]] <- length(randnew) / sum(1/randnew);

  # Uncertainty in invmean:

  invmeanunc[[i]] <- sqrt(1 / sum(1/randnew));

  # Bias relative to (true) expected mean in  %:

  pbias_sym[[i]] = 100 * (invmean[[i]] - expcounts[[i]]) / expcounts[[i]]; 

  formatstring <- sprintf("   %d     %12.6f      %10.6f      %5.3f      %5.3f",
  expcounts[[i]], invmean[[i]], pbias_sym[[i]], S[[i]], T, invmeanunc[[i]]);
  write(formatstring, file="");
}


#------------------------------------------------------------------------
# Plot Bias vs. true expectation value for asymmetric and symmetric
# probability cases:

pdf(file="3sigma.pdf", pointsize=12);

plot(log10(expcounts), pbias_asym, type="l",
     main="<N> +/- 3sigma fluctuations", xlab="Log10[ <N> ]",
     ylab="Bias (%)", col="blue", lty=1, lwd=1,
     xlim=c(1,4.5), ylim=c(-5,1));

lines(log10(expcounts), pbias_sym, col="red", lty=1, lwd=1);

abline(0,0);

legend(2.0, -3.0, c("Asymmetric Confidence Int.", "Symmetric Confidence Int."),
       col = c("blue", "red"),
       text.col="black",
       lty = c(1, 1),
       lwd = c(1, 1),
       pch = c(" ", " "),
       merge=FALSE, bty='n');

q();