#' Data driven score tests for testing goodness of fit of the Poisson distribution. #' #' Two variants of the score test, i.e. ML-test and MS-test, are implemented. #' The procedure is intended for moderate size samples, i.e. \eqn{25 <= n <= 500}. #' #' @author Tadeusz Inglot and Adam ZagdaƄski #' #' @references For more information see: #' T.Inglot (2017), Data Driven Efficient Score Test for Poissonity, Probability and Mathematical Statistics. #' #' @param x a numeric vector containing sample from a discrete distribution #' @param D an integer number used in selection rule for ML-test (default value: D=3). #' @param delta a small real number from (0,1) used in selection rule for ML-test (default value: delta=0.05) #' @param method either "ML" or "MS" #' #' @return The list containing: H - decision regarding null hypothesis for standard significance level \eqn{\alpha = 0.05}, #' i.e. H=0 (accept) or H=1 (reject), and test.stat - the computed test statistic. #' #' @examples #' x <- rpois(n = 50, lambda = 1) #' scorePois.test(x) #' scorePois.test(x, method = "MS") #' scorePois.test <- function(x, D=3, delta=0.05, method=c("ML", "MS")) { # load required package if(!require(orthopolynom)) { install.packages("orthopolynom") library(orthopolynom) } method <- match.arg(method) if (!method %in% c("ML", "MS")) stop("Only 'ML' or 'MS' method can be used") if((delta<0)|(delta>1)) stop("Parametr delta should be selected from (0,1)") # sample size n <- length(x) if((n<25)|(n>500)) stop("The procedure works only for sample size: 25 <= n <= 500") # estimated lambda lambda <- mean(x) if(lambda>70) stop("The procedure works only for sample mean smaller than 70") # adding random sample from U[0,1] y <- x + runif(n) # auxiliary function to compute F(y,lambda) F <- function(y, lambda) { j <- floor(y) ppois(j-1, lambda=lambda) + (y-j)*dpois(x=j, lambda=lambda) } F.y.lambda <- F(y, lambda) # number of Legendre polynomials d.n <- floor(3*n^(1/7)) if((D<1)|(D>=d.n)) stop(paste0("Parameter D should be selected from {1, 2,...,", d.n-1,"}")) j <- 3:d.n # number of terms used to compute J r.max <- 100 # pi_0(lambda), ... , pi_r(lambda) pi.lambda <- dpois(x = 0:r.max, lambda = lambda) pi.lambda.cumsum <- cumsum(pi.lambda) if (lambda < 1.8) { # Legendre polynomials on [-1,1] legendre.d.n <- legendre.polynomials(n = 2*d.n+2, normalized = TRUE)[c(6, 4, 2*j+2) + 1] # compute l = (l_1, l_2, ..., l_{d(n)}) legendre.d.n.val <- polynomial.values(legendre.d.n, F.y.lambda) l <- sapply(legendre.d.n.val, function(u) sum(u*sqrt(2))/sqrt(n)) # Legendre polynomials integrals legendre.d.n.integrals <- polynomial.integrals(legendre.d.n) legendre.d.n.integrals.vals <- sapply(polynomial.values(legendre.d.n.integrals, pi.lambda.cumsum), function(x) x*sqrt(2)) - sapply(polynomial.values(legendre.d.n.integrals, rep(0, r.max + 1)), function(x) x*sqrt(2)) } else # lambda >= 1.8 { # Legendre polynomials on [-1,1] legendre.d.n <- legendre.polynomials(n = d.n+1, normalized = TRUE)[c(3, 2, j+1) + 1] # compute l = (l_1, l_2, ..., l_{d(n)}) legendre.d.n.val <- polynomial.values(legendre.d.n, 2 * F.y.lambda - 1) l <- sapply(legendre.d.n.val, function(u) sum(u*sqrt(2))/sqrt(n)) # Legendre polynomials integrals legendre.d.n.integrals <- polynomial.integrals(legendre.d.n) legendre.d.n.integrals.vals <- sapply(polynomial.values(legendre.d.n.integrals, 2*pi.lambda.cumsum-1), function(x) x/sqrt(2)) - sapply(polynomial.values(legendre.d.n.integrals, rep(-1, r.max+1)), function(x) x/sqrt(2)) } # compute J J <- -colSums(legendre.d.n.integrals.vals) # effective score statistic N <- cumsum(l^2) + cumsum(J*l)^2 / (lambda-cumsum(J^2)) if(method == "ML") { # selection rule q <- sum(J*l)/(lambda-sum(J^2)+sqrt(lambda*(lambda-sum(J^2)))) v.tilde <- sort((l+q*J)^2) z <- sapply(1:D, function(j){1-.5*(delta*factorial(j)/(D*prod(seq(from=d.n, to=d.n-j+1, by=-1))))^(1/j)}) c <- qnorm(z)^2 v.tilde.selected <- v.tilde[seq(from=d.n, to=d.n-D+1, by=-1)] rho <- ifelse(all(v.tilde.selected < c), log(n), 2) L <- min(which(N - (1:d.n)*rho == max(N - (1:d.n)*rho))) # test statistic test.stat <- N[L] # approximated critical values a <- c(6.342, 7.092, 7.592) } else if(method == "MS") { # selection rule S <- min(which(N - (1:d.n)*log(n) == max(N - (1:d.n)*log(n)))) # test statistic test.stat <- N[S] # approximated critical values a <- c(4.288, 5.910, 6.842) } else stop("Method not supported") w <- ifelse(n > 50, (a[1]*log(n/50)+a[2]*log(500/n))/log(10), (a[2]*log(n/25)+a[3]*log(50/n))/log(2)) H <- as.numeric(test.stat > w) # (H==1) --> reject null hypothesis results <- list(H = H, test.stat = test.stat, method = method) return(results) }