;;-----------------------------------------------------------------------;; ;; Algorithm AS 181 Appl. Statist. (1982) Vol. 31, No. 2 ;; ;; ;; ;; Calculates Shapiro and Wilk's W statistic and its sig. level ;; ;; ;; ;; Royston, J. P. (1982), An extension of Shapiro and Wilk's W test for ;; ;; normality to large samples, ApplStat, 31, 115-124 ;; ;; Royston, J. P. (1982), [Algorithm AS 181] The W test for normality, ;; ;; ApplStat, 31, 176-180 ;; ;; ;; ;; Lisp translation by Luca Scrucca, april 1999 ;; ;;-----------------------------------------------------------------------;; (provide "as181") (defparameter *wilk-shapiro-test* t "If T activates the Shapiro-Wilk test for normality.") (defun shapiro-wilk-w (x) "Arg: (x) Given a list of values x calculates the Shapiro and Wilk's W statistic and its p-value for testing the hypothesis of normality. Return a list of with two elements: - W - p-value References: Royston, J. P. (1982), An extension of Shapiro and Wilk's W test for; normality to large samples, ApplStat, 31, 115-124 Royston, J. P. (1982), [Algorithm AS 181] The W test for normality, ApplStat, 31, 176-180 ---------------------------------------------------------------------------- Lisp translation from Algorithm AS 181, Appl. Statist. (1982) Vol. 31, No. 2 by Luca Scrucca, april 1999." ;; (if (<= (length x) 2) (error "length of x <= 2")) (if (> (length x)2000) (error "length of x > 2000")) (flet ((poly (c x) (sum (* c (^ x (iseq (length c)))))) ) (let* ((y (sort-data x)) (n (length y)) (ssq (sum (^ (- y (mean y)) 2))) (wcoef (shapiro-wilk-coef n)) (A (first wcoef)) (eps (second wcoef)) (n2 (if (evenp n) (/ n 2) (/ (- n 1) 2))) ;; table 2 (wa (list 0.118898 0.133414 0.327907)) (wb (list -0.37542 -0.492145 -1.124332 -0.199422)) (wc (list -3.15805 0.729399 3.01855 1.558776)) (wd (list 0.480385 0.318828 0.0 -0.0241665 0.00879701 0.002989646)) (we (list -1.91487 -1.37888 -0.04183209 0.1066339 -0.03513666 -0.01504614)) (wf (list -3.73538 -1.015807 -0.331885 0.1773538 -0.01638782 -0.03215018 0.003852646)) ;; table 3 (C1 (list '(-1.26233 1.87969 0.0649583 -0.0475604 -0.0139682) '(-2.28135 2.26186 0.0 0.0 -0.00865763) '(-3.30623 2.76287 -0.83484 1.20857 -0.507590))) (C2 (list '(-0.287696 1.78953 -0.180114 0.0 0.0) '(-1.63638 5.60924 -3.63738 1.08439 0.0) '(-5.991908 21.04575 -24.58061 13.78661 -2.835295))) ;; (UNL (list -3.8 -3.0 -1.0)) (UNH (list 8.6 5.8 5.4)) (NC1 (list 5 5 5)) (NC2 (list 3 4 5)) (p-value 0) (w 0) (ww 0) (c 0) (nc 0) ) ; ; Compute W statistic (setf W (/ (^ (inner-product A y) 2) ssq)) ;;;;;;;;;;;;;;;;;; (if (>= w 1) (return-from shapiro-wilk-w (list 1 1))) ; ; Get significance level of W ; (cond ((> n 6) ; ; N between 7 and 2000 ... Transform W to Y, get mean and sd, ; standardize and get significance level ; (cond ((<= n 20) (setf lambda (poly wa (- (log n) 3))) (setf ybar (exp (poly wb (- (log n) 3)))) (setf sdy (exp (poly wc (- (log n) 3))))) (t (setf lambda (poly wd (- (log n) 5))) (setf ybar (exp (poly we (- (log n) 5)))) (setf sdy (exp (poly wf (- (log n) 5))))) ) (setf z (/ (- (^ (- 1 W) lambda) ybar) sdy)) (setf p-value (- 1 (normal-cdf z))) ) ; ; Deal with N less than 7 (Exact significance level for N = 3). ; ((<= W eps) (setf p-value 0)) (t (setf WW W) (when (not (= n 3)) (setf UN (log (/ (- W eps) (- 1 w)))) (setf n3 (- n 4)) (if (< UN (select UNL n3)) (return-from shapiro-wilk-w (list w 0))) (cond ((> UN 1.4) (if (> UN (select UNH n3)) (return-from shapiro-wilk-w (list w 1))) (setf nc (select NC2 n3)) (setf c (select (select c2 n3) (iseq nc))) (setf EU3 (exp (exp (poly c (log UN))))) ) (t (setf nc (select NC1 n3)) (setf c (select (select c1 n3) (iseq nc))) (setf EU3 (exp (poly c UN))) ) ) (setf WW (/ (+ EU3 (/ 3 4)) (+ 1 EU3))) ) (setf p-value (* (/ 6 pi) (- (asin (sqrt WW)) (asin (sqrt (/ 3 4)))))) ) ) (list W p-value) ) ) ) ;;-----------------------------------------------------------------------;; ;; ;; ;; Algorithm AS 181.1 Appl. Statist. (1982) Vol. 31, No. 2 ;; ;; ;; ;; Obtain array A of weights for calculating W ;; ;;-----------------------------------------------------------------------;; (defun shapiro-wilk-coef (n) "Arg: (n) Given the sample size n returns a list of weights used by the function shapiro-wilk-w to calculate the Shapiro and Wilk's W statistic." (if (<= n 2) (error "n <= 2")) (if (> n 2000) (error "n > 2000")) (flet ((g (n) (/ (gamma (* 0.5 (+ n 1))) (sqrt (* 2 (gamma (+ (* 0.5 n) 1)))))) ) (let* ((c3 (list (- (/ (sqrt 2) 2)) 0 (/ (sqrt 2) 2))) (c4 (list -0.6869 -0.1678 0.1678 0.6869 )) (c5 (list -0.6647 -0.2412 0 0.2412 0.6647)) (c6 (list -0.6431 -0.2806 -0.0875 0.0875 0.2806 0.6431)) (n2 (if (evenp n) (/ n 2) (/ (- n 1) 2))) (m (exp-order-stat n)) (a (reverse (select m (which (> m 0))))) (eps 0) ;(a1 0) (a 0) ) (tagbody (if (<= n 6) (go exact-values)) ; n > 6 computes rankits using approximate expected order statistics (setf sastar 0) (setf sastar (* 8 (sum (^ (select a (iseq 1 (- n2 1))) 2)))) (setf nn n) (if (<= n 20) (setf nn (- nn 1))) (setf an nn) (setf a1sq (exp (+ (log (+ (* 6 an) 7)) (- (log (+ (* 6 an) 13))) (* .5 (+ 1 (* (- an 2) (log (+ an 1))) (- (* (- an 1) (log (+ an 2))))))))) (setf a1star (/ sastar (- (/ 1 a1sq) 2))) (setf sastar (sqrt (+ sastar (* 2 a1star)))) (setf (select a 0) (/ (sqrt a1star) sastar)) (setf (select a (iseq 1 (- n2 1))) (/ (* 2 (select a (iseq 1 (- n2 1)))) sastar)) (setf a (if (evenp n) (combine (- a) (reverse a)) (combine (- a) 0 (reverse a)))) (go end) exact-values ; n < 6 Use exact values for weights (cond ((= n 3) (setf A c3)) ((= n 4) (setf A c4)) ((= n 5) (setf A c5)) ((= n 6) (setf A c6))) end (setf eps (/ (^ (first (select a (which (> a 0)))) 2) (- 1 (/ 1 n)))) ) (list a eps) ) ) ) (defun exp-order-stat2 (n &optional (r (iseq 1 n))) "Args: (n &optional (r (iseq 1 n))) Returns the approximate expected value of the r-th largest ordered statistics. If r is not provided, then it returns the expected values for r = 1,...,n. Bloom formula." (let ((alpha 0.375)) (normal-quant (/ (- r alpha) (+ n (* -2 alpha) 1))) )) (defun exp-order-stat (n) "Args: (n) Returns the approximate expected values of normal ordered statistics. (Harter, Biometrika, 1961)." (flet ((falpha_n (n) (+ 0.314195 (* 0.063336 (log n 10)) (* -0.010895 (^ (log n 10) 2)))) (falpha_1n (n) (+ 0.315065 (* 0.057974 (log n 10)) (* -0.009776 (^ (log n 10) 2)))) (falpha_2n (n) (+ 0.327511 (* 0.058212 (log n 10)) (* -0.007909 (^ (log n 10) 2)))) ) (let* ((alpha nil) (nn (list 2 4 6 8 10 15 20 25 50 100 200 400)) (alpha_n (list .330 .349 .359 .364 .368 .374 .378 .381 .389 .396 .402 .407)) (alpha_1n (list .330 .347 .355 .360 .364 .370 .374 .377 .384 .391 .396 .401)) (alpha_2n (list nil .359 .368 .374 .378 .385 .390 .394 .403 .412 .419 .426)) (pos (position n nn)) (expord nil) (r (iseq 1 (floor (/ n 2)))) ) ;; set the values of alpha for the 1,2, and rest from the above values ;; or obtain an estimate from the above equations (cond ((null pos) (setf alpha (combine (falpha_1n n) (falpha_2n n) (falpha_n n)))) (t (setf alpha (combine (select alpha_1n pos) (select alpha_2n pos) (select alpha_n pos)))) ) ;; compute the set of n/2 smallest ordered statistics (setf expord (mapcar (lambda (r) (cond ((= r 1) (normal-quant (/ (- r (select alpha 0)) (+ n (* -2 (select alpha 0)) 1)))) ((= r 2) (normal-quant (/ (- r (select alpha 1)) (+ n (* -2 (select alpha 1)) 1)))) (t (normal-quant (/ (- r (select alpha 2)) (+ n (* -2 (select alpha 2)) 1)))) )) r)) (values (if (evenp n) (combine expord (reverse (- expord))) (combine expord 0 (reverse (- expord))))) )))