{-# LANGUAGE FlexibleContexts #-}{-# LANGUAGE BangPatterns #-}-- |-- Module : Statistics.Sample-- Copyright : (c) 2008 Don Stewart, 2009 Bryan O'Sullivan-- License : BSD3---- Maintainer : bos@serpentine.com-- Stability : experimental-- Portability : portable---- Commonly used sample statistics, also known as descriptive-- statistics.moduleStatistics.Sample(-- * TypesSample ,WeightedSample -- * Descriptive functions,range -- * Statistics of location,expectation ,mean ,welfordMean ,meanWeighted ,harmonicMean ,geometricMean -- * Statistics of dispersion-- $variance-- ** Functions over central moments,centralMoment ,centralMoments ,skewness ,kurtosis -- ** Two-pass functions (numerically robust)-- $robust,variance ,varianceUnbiased ,meanVariance ,meanVarianceUnb ,stdDev ,varianceWeighted ,stdErrMean -- ** Single-pass functions (faster, less safe)-- $cancellation,fastVariance ,fastVarianceUnbiased ,fastStdDev -- * Joint distributions,covariance ,correlation ,covariance2 ,correlation2 ,pair -- * References-- $references)whereimportStatistics.Function (minMax ,square )importStatistics.Sample.Internal (robustSumVar ,sum )importStatistics.Types.Internal (Sample ,WeightedSample )importqualifiedData.VectorasVimportqualifiedData.Vector.GenericasGimportqualifiedData.Vector.UnboxedasUimportNumeric.Sum(kbn,Summation(zero,add))-- Operator ^ will be overriddenimportPreludehiding((^),sum)-- | /O(n)/ Range. The difference between the largest and smallest-- elements of a sample.range ::(G.Vectorv Double)=>v Double->Doublerange :: forall (v :: * -> *). Vector v Double => v Double -> Double range v Double s =Double hi Double -> Double -> Double forall a. Num a => a -> a -> a -Double lo where(Double lo ,Double hi )=v Double -> (Double, Double) forall (v :: * -> *). Vector v Double => v Double -> (Double, Double) minMax v Double s {-# INLINErange #-}-- | /O(n)/ Compute expectation of function over for sample. This is-- simply @mean . map f@ but won't create intermediate vector.expectation ::(G.Vectorv a )=>(a ->Double)->v a ->Doubleexpectation :: forall (v :: * -> *) a. Vector v a => (a -> Double) -> v a -> Double expectation a -> Double f v a xs =KBNSum -> Double kbn((KBNSum -> a -> KBNSum) -> KBNSum -> v a -> KBNSum forall (v :: * -> *) b a. Vector v b => (a -> b -> a) -> a -> v b -> a G.foldl'(\KBNSum s ->KBNSum -> Double -> KBNSum forall s. Summation s => s -> Double -> s addKBNSum s (Double -> KBNSum) -> (a -> Double) -> a -> KBNSum forall b c a. (b -> c) -> (a -> b) -> a -> c .a -> Double f )KBNSum forall s. Summation s => s zerov a xs )Double -> Double -> Double forall a. Fractional a => a -> a -> a /Int -> Double forall a b. (Integral a, Num b) => a -> b fromIntegral(v a -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.lengthv a xs ){-# INLINEexpectation #-}-- | /O(n)/ Arithmetic mean. This uses Kahan-Babuška-Neumaier-- summation, so is more accurate than 'welfordMean' unless the input-- values are very large. This function is not subject to stream-- fusion.mean ::(G.Vectorv Double)=>v Double->Doublemean :: forall (v :: * -> *). Vector v Double => v Double -> Double mean v Double xs =v Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double sum v Double xs Double -> Double -> Double forall a. Fractional a => a -> a -> a /Int -> Double forall a b. (Integral a, Num b) => a -> b fromIntegral(v Double -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.lengthv Double xs ){-# SPECIALIZEmean ::U.VectorDouble->Double#-}{-# SPECIALIZEmean ::V.VectorDouble->Double#-}-- | /O(n)/ Arithmetic mean. This uses Welford's algorithm to provide-- numerical stability, using a single pass over the sample data.---- Compared to 'mean', this loses a surprising amount of precision-- unless the inputs are very large.welfordMean ::(G.Vectorv Double)=>v Double->DoublewelfordMean :: forall (v :: * -> *). Vector v Double => v Double -> Double welfordMean =T -> Double fini (T -> Double) -> (v Double -> T) -> v Double -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .(T -> Double -> T) -> T -> v Double -> T forall (v :: * -> *) b a. Vector v b => (a -> b -> a) -> a -> v b -> a G.foldl'T -> Double -> T go (Double -> Int -> T T Double 0Int 0)wherefini :: T -> Double fini (T Double a Int _)=Double a go :: T -> Double -> T go (T Double m Int n )Double x =Double -> Int -> T T Double m' Int n' wherem' :: Double m' =Double m Double -> Double -> Double forall a. Num a => a -> a -> a +(Double x Double -> Double -> Double forall a. Num a => a -> a -> a -Double m )Double -> Double -> Double forall a. Fractional a => a -> a -> a /Int -> Double forall a b. (Integral a, Num b) => a -> b fromIntegralInt n' n' :: Int n' =Int n Int -> Int -> Int forall a. Num a => a -> a -> a +Int 1{-# SPECIALIZEwelfordMean ::U.VectorDouble->Double#-}{-# SPECIALIZEwelfordMean ::V.VectorDouble->Double#-}-- | /O(n)/ Arithmetic mean for weighted sample. It uses a single-pass-- algorithm analogous to the one used by 'welfordMean'.meanWeighted ::(G.Vectorv (Double,Double))=>v (Double,Double)->DoublemeanWeighted :: forall (v :: * -> *). Vector v (Double, Double) => v (Double, Double) -> Double meanWeighted =V -> Double fini (V -> Double) -> (v (Double, Double) -> V) -> v (Double, Double) -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .(V -> (Double, Double) -> V) -> V -> v (Double, Double) -> V forall (v :: * -> *) b a. Vector v b => (a -> b -> a) -> a -> v b -> a G.foldl'V -> (Double, Double) -> V go (Double -> Double -> V V Double 0Double 0)wherefini :: V -> Double fini (V Double a Double _)=Double a go :: V -> (Double, Double) -> V go (V Double m Double w )(Double x ,Double xw )=Double -> Double -> V V Double m' Double w' wherem' :: Double m' |Double w' Double -> Double -> Bool forall a. Eq a => a -> a -> Bool ==Double 0=Double 0|Bool otherwise=Double m Double -> Double -> Double forall a. Num a => a -> a -> a +Double xw Double -> Double -> Double forall a. Num a => a -> a -> a *(Double x Double -> Double -> Double forall a. Num a => a -> a -> a -Double m )Double -> Double -> Double forall a. Fractional a => a -> a -> a /Double w' w' :: Double w' =Double w Double -> Double -> Double forall a. Num a => a -> a -> a +Double xw {-# INLINEmeanWeighted #-}-- | /O(n)/ Harmonic mean. This algorithm performs a single pass over-- the sample.harmonicMean ::(G.Vectorv Double)=>v Double->DoubleharmonicMean :: forall (v :: * -> *). Vector v Double => v Double -> Double harmonicMean =T -> Double fini (T -> Double) -> (v Double -> T) -> v Double -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .(T -> Double -> T) -> T -> v Double -> T forall (v :: * -> *) b a. Vector v b => (a -> b -> a) -> a -> v b -> a G.foldl'T -> Double -> T go (Double -> Int -> T T Double 0Int 0)wherefini :: T -> Double fini (T Double b Int a )=Int -> Double forall a b. (Integral a, Num b) => a -> b fromIntegralInt a Double -> Double -> Double forall a. Fractional a => a -> a -> a /Double b go :: T -> Double -> T go (T Double x Int y )Double n =Double -> Int -> T T (Double x Double -> Double -> Double forall a. Num a => a -> a -> a +(Double 1Double -> Double -> Double forall a. Fractional a => a -> a -> a /Double n ))(Int y Int -> Int -> Int forall a. Num a => a -> a -> a +Int 1){-# INLINEharmonicMean #-}-- | /O(n)/ Geometric mean of a sample containing no negative values.geometricMean ::(G.Vectorv Double)=>v Double->DoublegeometricMean :: forall (v :: * -> *). Vector v Double => v Double -> Double geometricMean =Double -> Double forall a. Floating a => a -> a exp(Double -> Double) -> (v Double -> Double) -> v Double -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .(Double -> Double) -> v Double -> Double forall (v :: * -> *) a. Vector v a => (a -> Double) -> v a -> Double expectation Double -> Double forall a. Floating a => a -> a log{-# INLINEgeometricMean #-}-- | Compute the /k/th central moment of a sample. The central moment-- is also known as the moment about the mean.---- This function performs two passes over the sample, so is not subject-- to stream fusion.---- For samples containing many values very close to the mean, this-- function is subject to inaccuracy due to catastrophic cancellation.centralMoment ::(G.Vectorv Double)=>Int->v Double->DoublecentralMoment :: forall (v :: * -> *). Vector v Double => Int -> v Double -> Double centralMoment Int a v Double xs |Int a Int -> Int -> Bool forall a. Ord a => a -> a -> Bool <Int 0=[Char] -> Double forall a. HasCallStack => [Char] -> a error[Char] "Statistics.Sample.centralMoment: negative input"|Int a Int -> Int -> Bool forall a. Eq a => a -> a -> Bool ==Int 0=Double 1|Int a Int -> Int -> Bool forall a. Eq a => a -> a -> Bool ==Int 1=Double 0|Bool otherwise=(Double -> Double) -> v Double -> Double forall (v :: * -> *) a. Vector v a => (a -> Double) -> v a -> Double expectation Double -> Double go v Double xs wherego :: Double -> Double go Double x =(Double x Double -> Double -> Double forall a. Num a => a -> a -> a -Double m )Double -> Int -> Double ^ Int a m :: Double m =v Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double mean v Double xs {-# SPECIALIZEcentralMoment ::Int->U.VectorDouble->Double#-}{-# SPECIALIZEcentralMoment ::Int->V.VectorDouble->Double#-}-- | Compute the /k/th and /j/th central moments of a sample.---- This function performs two passes over the sample, so is not subject-- to stream fusion.---- For samples containing many values very close to the mean, this-- function is subject to inaccuracy due to catastrophic cancellation.centralMoments ::(G.Vectorv Double)=>Int->Int->v Double->(Double,Double)centralMoments :: forall (v :: * -> *). Vector v Double => Int -> Int -> v Double -> (Double, Double) centralMoments Int a Int b v Double xs |Int a Int -> Int -> Bool forall a. Ord a => a -> a -> Bool <Int 2Bool -> Bool -> Bool ||Int b Int -> Int -> Bool forall a. Ord a => a -> a -> Bool <Int 2=(Int -> v Double -> Double forall (v :: * -> *). Vector v Double => Int -> v Double -> Double centralMoment Int a v Double xs ,Int -> v Double -> Double forall (v :: * -> *). Vector v Double => Int -> v Double -> Double centralMoment Int b v Double xs )|Bool otherwise=V -> (Double, Double) fini (V -> (Double, Double)) -> (v Double -> V) -> v Double -> (Double, Double) forall b c a. (b -> c) -> (a -> b) -> a -> c .(V -> Double -> V) -> V -> v Double -> V forall (v :: * -> *) b a. Vector v b => (a -> b -> a) -> a -> v b -> a G.foldl'V -> Double -> V go (Double -> Double -> V V Double 0Double 0)(v Double -> (Double, Double)) -> v Double -> (Double, Double) forall a b. (a -> b) -> a -> b $v Double xs wherego :: V -> Double -> V go (V Double i Double j )Double x =Double -> Double -> V V (Double i Double -> Double -> Double forall a. Num a => a -> a -> a +Double d Double -> Int -> Double ^ Int a )(Double j Double -> Double -> Double forall a. Num a => a -> a -> a +Double d Double -> Int -> Double ^ Int b )whered :: Double d =Double x Double -> Double -> Double forall a. Num a => a -> a -> a -Double m fini :: V -> (Double, Double) fini (V Double i Double j )=(Double i Double -> Double -> Double forall a. Fractional a => a -> a -> a /Double n ,Double j Double -> Double -> Double forall a. Fractional a => a -> a -> a /Double n )m :: Double m =v Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double mean v Double xs n :: Double n =Int -> Double forall a b. (Integral a, Num b) => a -> b fromIntegral(v Double -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.lengthv Double xs ){-# SPECIALIZEcentralMoments ::Int->Int->U.VectorDouble->(Double,Double)#-}{-# SPECIALIZEcentralMoments ::Int->Int->V.VectorDouble->(Double,Double)#-}-- | Compute the skewness of a sample. This is a measure of the-- asymmetry of its distribution.---- A sample with negative skew is said to be /left-skewed/. Most of-- its mass is on the right of the distribution, with the tail on the-- left.---- > skewness $ U.to [1,100,101,102,103]-- > ==> -1.497681449918257---- A sample with positive skew is said to be /right-skewed/.---- > skewness $ U.to [1,2,3,4,100]-- > ==> 1.4975367033335198---- A sample's skewness is not defined if its 'variance' is zero.---- This function performs two passes over the sample, so is not subject-- to stream fusion.---- For samples containing many values very close to the mean, this-- function is subject to inaccuracy due to catastrophic cancellation.skewness ::(G.Vectorv Double)=>v Double->Doubleskewness :: forall (v :: * -> *). Vector v Double => v Double -> Double skewness v Double xs =Double c3 Double -> Double -> Double forall a. Num a => a -> a -> a *Double c2 Double -> Double -> Double forall a. Floating a => a -> a -> a **(-Double 1.5)where(Double c3 ,Double c2 )=Int -> Int -> v Double -> (Double, Double) forall (v :: * -> *). Vector v Double => Int -> Int -> v Double -> (Double, Double) centralMoments Int 3Int 2v Double xs {-# SPECIALIZEskewness ::U.VectorDouble->Double#-}{-# SPECIALIZEskewness ::V.VectorDouble->Double#-}-- | Compute the excess kurtosis of a sample. This is a measure of-- the \"peakedness\" of its distribution. A high kurtosis indicates-- that more of the sample's variance is due to infrequent severe-- deviations, rather than more frequent modest deviations.---- A sample's excess kurtosis is not defined if its 'variance' is-- zero.---- This function performs two passes over the sample, so is not subject-- to stream fusion.---- For samples containing many values very close to the mean, this-- function is subject to inaccuracy due to catastrophic cancellation.kurtosis ::(G.Vectorv Double)=>v Double->Doublekurtosis :: forall (v :: * -> *). Vector v Double => v Double -> Double kurtosis v Double xs =Double c4 Double -> Double -> Double forall a. Fractional a => a -> a -> a /(Double c2 Double -> Double -> Double forall a. Num a => a -> a -> a *Double c2 )Double -> Double -> Double forall a. Num a => a -> a -> a -Double 3where(Double c4 ,Double c2 )=Int -> Int -> v Double -> (Double, Double) forall (v :: * -> *). Vector v Double => Int -> Int -> v Double -> (Double, Double) centralMoments Int 4Int 2v Double xs {-# SPECIALIZEkurtosis ::U.VectorDouble->Double#-}{-# SPECIALIZEkurtosis ::V.VectorDouble->Double#-}-- $variance---- The variance — and hence the standard deviation — of a-- sample of fewer than two elements are both defined to be zero.-- $robust---- These functions use the compensated summation algorithm of Chan et-- al. for numerical robustness, but require two passes over the-- sample data as a result.---- Because of the need for two passes, these functions are /not/-- subject to stream fusion.dataV =V {-# UNPACK#-}!Double{-# UNPACK#-}!Double-- | Maximum likelihood estimate of a sample's variance. Also known-- as the population variance, where the denominator is /n/.variance ::(G.Vectorv Double)=>v Double->Doublevariance :: forall (v :: * -> *). Vector v Double => v Double -> Double variance v Double samp |Int n Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >Int 1=Double -> v Double -> Double forall (v :: * -> *). Vector v Double => Double -> v Double -> Double robustSumVar (v Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double mean v Double samp )v Double samp Double -> Double -> Double forall a. Fractional a => a -> a -> a /Int -> Double forall a b. (Integral a, Num b) => a -> b fromIntegralInt n |Bool otherwise=Double 0wheren :: Int n =v Double -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.lengthv Double samp {-# SPECIALIZEvariance ::U.VectorDouble->Double#-}{-# SPECIALIZEvariance ::V.VectorDouble->Double#-}-- | Unbiased estimate of a sample's variance. Also known as the-- sample variance, where the denominator is /n/-1.varianceUnbiased ::(G.Vectorv Double)=>v Double->DoublevarianceUnbiased :: forall (v :: * -> *). Vector v Double => v Double -> Double varianceUnbiased v Double samp |Int n Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >Int 1=Double -> v Double -> Double forall (v :: * -> *). Vector v Double => Double -> v Double -> Double robustSumVar (v Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double mean v Double samp )v Double samp Double -> Double -> Double forall a. Fractional a => a -> a -> a /Int -> Double forall a b. (Integral a, Num b) => a -> b fromIntegral(Int n Int -> Int -> Int forall a. Num a => a -> a -> a -Int 1)|Bool otherwise=Double 0wheren :: Int n =v Double -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.lengthv Double samp {-# SPECIALIZEvarianceUnbiased ::U.VectorDouble->Double#-}{-# SPECIALIZEvarianceUnbiased ::V.VectorDouble->Double#-}-- | Calculate mean and maximum likelihood estimate of variance. This-- function should be used if both mean and variance are required-- since it will calculate mean only once.meanVariance ::(G.Vectorv Double)=>v Double->(Double,Double)meanVariance :: forall (v :: * -> *). Vector v Double => v Double -> (Double, Double) meanVariance v Double samp |Int n Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >Int 1=(Double m ,Double -> v Double -> Double forall (v :: * -> *). Vector v Double => Double -> v Double -> Double robustSumVar Double m v Double samp Double -> Double -> Double forall a. Fractional a => a -> a -> a /Int -> Double forall a b. (Integral a, Num b) => a -> b fromIntegralInt n )|Bool otherwise=(Double m ,Double 0)wheren :: Int n =v Double -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.lengthv Double samp m :: Double m =v Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double mean v Double samp {-# SPECIALIZEmeanVariance ::U.VectorDouble->(Double,Double)#-}{-# SPECIALIZEmeanVariance ::V.VectorDouble->(Double,Double)#-}-- | Calculate mean and unbiased estimate of variance. This-- function should be used if both mean and variance are required-- since it will calculate mean only once.meanVarianceUnb ::(G.Vectorv Double)=>v Double->(Double,Double)meanVarianceUnb :: forall (v :: * -> *). Vector v Double => v Double -> (Double, Double) meanVarianceUnb v Double samp |Int n Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >Int 1=(Double m ,Double -> v Double -> Double forall (v :: * -> *). Vector v Double => Double -> v Double -> Double robustSumVar Double m v Double samp Double -> Double -> Double forall a. Fractional a => a -> a -> a /Int -> Double forall a b. (Integral a, Num b) => a -> b fromIntegral(Int n Int -> Int -> Int forall a. Num a => a -> a -> a -Int 1))|Bool otherwise=(Double m ,Double 0)wheren :: Int n =v Double -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.lengthv Double samp m :: Double m =v Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double mean v Double samp {-# SPECIALIZEmeanVarianceUnb ::U.VectorDouble->(Double,Double)#-}{-# SPECIALIZEmeanVarianceUnb ::V.VectorDouble->(Double,Double)#-}-- | Standard deviation. This is simply the square root of the-- unbiased estimate of the variance.stdDev ::(G.Vectorv Double)=>v Double->DoublestdDev :: forall (v :: * -> *). Vector v Double => v Double -> Double stdDev =Double -> Double forall a. Floating a => a -> a sqrt(Double -> Double) -> (v Double -> Double) -> v Double -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .v Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double varianceUnbiased {-# SPECIALIZEstdDev ::U.VectorDouble->Double#-}{-# SPECIALIZEstdDev ::V.VectorDouble->Double#-}-- | Standard error of the mean. This is the standard deviation-- divided by the square root of the sample size.stdErrMean ::(G.Vectorv Double)=>v Double->DoublestdErrMean :: forall (v :: * -> *). Vector v Double => v Double -> Double stdErrMean v Double samp =v Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double stdDev v Double samp Double -> Double -> Double forall a. Fractional a => a -> a -> a /(Double -> Double forall a. Floating a => a -> a sqrt(Double -> Double) -> (v Double -> Double) -> v Double -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .Int -> Double forall a b. (Integral a, Num b) => a -> b fromIntegral(Int -> Double) -> (v Double -> Int) -> v Double -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .v Double -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.length)v Double samp {-# SPECIALIZEstdErrMean ::U.VectorDouble->Double#-}{-# SPECIALIZEstdErrMean ::V.VectorDouble->Double#-}robustSumVarWeighted ::(G.Vectorv (Double,Double))=>v (Double,Double)->V robustSumVarWeighted :: forall (v :: * -> *). Vector v (Double, Double) => v (Double, Double) -> V robustSumVarWeighted v (Double, Double) samp =(V -> (Double, Double) -> V) -> V -> v (Double, Double) -> V forall (v :: * -> *) b a. Vector v b => (a -> b -> a) -> a -> v b -> a G.foldl'V -> (Double, Double) -> V go (Double -> Double -> V V Double 0Double 0)v (Double, Double) samp wherego :: V -> (Double, Double) -> V go (V Double s Double w )(Double x ,Double xw )=Double -> Double -> V V (Double s Double -> Double -> Double forall a. Num a => a -> a -> a +Double xw Double -> Double -> Double forall a. Num a => a -> a -> a *Double d Double -> Double -> Double forall a. Num a => a -> a -> a *Double d )(Double w Double -> Double -> Double forall a. Num a => a -> a -> a +Double xw )whered :: Double d =Double x Double -> Double -> Double forall a. Num a => a -> a -> a -Double m m :: Double m =v (Double, Double) -> Double forall (v :: * -> *). Vector v (Double, Double) => v (Double, Double) -> Double meanWeighted v (Double, Double) samp {-# INLINErobustSumVarWeighted #-}-- | Weighted variance. This is biased estimation.varianceWeighted ::(G.Vectorv (Double,Double))=>v (Double,Double)->DoublevarianceWeighted :: forall (v :: * -> *). Vector v (Double, Double) => v (Double, Double) -> Double varianceWeighted v (Double, Double) samp |v (Double, Double) -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.lengthv (Double, Double) samp Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >Int 1=V -> Double fini (V -> Double) -> V -> Double forall a b. (a -> b) -> a -> b $v (Double, Double) -> V forall (v :: * -> *). Vector v (Double, Double) => v (Double, Double) -> V robustSumVarWeighted v (Double, Double) samp |Bool otherwise=Double 0wherefini :: V -> Double fini (V Double s Double w )=Double s Double -> Double -> Double forall a. Fractional a => a -> a -> a /Double w {-# SPECIALIZEvarianceWeighted ::U.Vector(Double,Double)->Double#-}{-# SPECIALIZEvarianceWeighted ::V.Vector(Double,Double)->Double#-}-- $cancellation---- The functions prefixed with the name @fast@ below perform a single-- pass over the sample data using Knuth's algorithm. They usually-- work well, but see below for caveats. These functions are subject-- to array fusion.---- /Note/: in cases where most sample data is close to the sample's-- mean, Knuth's algorithm gives inaccurate results due to-- catastrophic cancellation.fastVar ::(G.Vectorv Double)=>v Double->T1 fastVar :: forall (v :: * -> *). Vector v Double => v Double -> T1 fastVar =(T1 -> Double -> T1) -> T1 -> v Double -> T1 forall (v :: * -> *) b a. Vector v b => (a -> b -> a) -> a -> v b -> a G.foldl'T1 -> Double -> T1 go (Int -> Double -> Double -> T1 T1 Int 0Double 0Double 0)wherego :: T1 -> Double -> T1 go (T1 Int n Double m Double s )Double x =Int -> Double -> Double -> T1 T1 Int n' Double m' Double s' wheren' :: Int n' =Int n Int -> Int -> Int forall a. Num a => a -> a -> a +Int 1m' :: Double m' =Double m Double -> Double -> Double forall a. Num a => a -> a -> a +Double d Double -> Double -> Double forall a. Fractional a => a -> a -> a /Int -> Double forall a b. (Integral a, Num b) => a -> b fromIntegralInt n' s' :: Double s' =Double s Double -> Double -> Double forall a. Num a => a -> a -> a +Double d Double -> Double -> Double forall a. Num a => a -> a -> a *(Double x Double -> Double -> Double forall a. Num a => a -> a -> a -Double m' )d :: Double d =Double x Double -> Double -> Double forall a. Num a => a -> a -> a -Double m -- | Maximum likelihood estimate of a sample's variance.fastVariance ::(G.Vectorv Double)=>v Double->DoublefastVariance :: forall (v :: * -> *). Vector v Double => v Double -> Double fastVariance =T1 -> Double fini (T1 -> Double) -> (v Double -> T1) -> v Double -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .v Double -> T1 forall (v :: * -> *). Vector v Double => v Double -> T1 fastVar wherefini :: T1 -> Double fini (T1 Int n Double _m Double s )|Int n Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >Int 1=Double s Double -> Double -> Double forall a. Fractional a => a -> a -> a /Int -> Double forall a b. (Integral a, Num b) => a -> b fromIntegralInt n |Bool otherwise=Double 0{-# INLINEfastVariance #-}-- | Unbiased estimate of a sample's variance.fastVarianceUnbiased ::(G.Vectorv Double)=>v Double->DoublefastVarianceUnbiased :: forall (v :: * -> *). Vector v Double => v Double -> Double fastVarianceUnbiased =T1 -> Double fini (T1 -> Double) -> (v Double -> T1) -> v Double -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .v Double -> T1 forall (v :: * -> *). Vector v Double => v Double -> T1 fastVar wherefini :: T1 -> Double fini (T1 Int n Double _m Double s )|Int n Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >Int 1=Double s Double -> Double -> Double forall a. Fractional a => a -> a -> a /Int -> Double forall a b. (Integral a, Num b) => a -> b fromIntegral(Int n Int -> Int -> Int forall a. Num a => a -> a -> a -Int 1)|Bool otherwise=Double 0{-# INLINEfastVarianceUnbiased #-}-- | Standard deviation. This is simply the square root of the-- maximum likelihood estimate of the variance.fastStdDev ::(G.Vectorv Double)=>v Double->DoublefastStdDev :: forall (v :: * -> *). Vector v Double => v Double -> Double fastStdDev =Double -> Double forall a. Floating a => a -> a sqrt(Double -> Double) -> (v Double -> Double) -> v Double -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .v Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double fastVariance {-# INLINEfastStdDev #-}-- | Covariance of sample of pairs. For empty sample it's set to-- zerocovariance ::(G.Vectorv (Double,Double))=>v (Double,Double)->Doublecovariance :: forall (v :: * -> *). Vector v (Double, Double) => v (Double, Double) -> Double covariance v (Double, Double) xy |Int n Int -> Int -> Bool forall a. Eq a => a -> a -> Bool ==Int 0=Double 0|Bool otherwise=((Double, Double) -> Double) -> v (Double, Double) -> Double forall (v :: * -> *) a. Vector v a => (a -> Double) -> v a -> Double expectation (\(Double x ,Double y )->(Double x Double -> Double -> Double forall a. Num a => a -> a -> a -Double muX )Double -> Double -> Double forall a. Num a => a -> a -> a *(Double y Double -> Double -> Double forall a. Num a => a -> a -> a -Double muY ))v (Double, Double) xy wheren :: Int n =v (Double, Double) -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.lengthv (Double, Double) xy muX :: Double muX =((Double, Double) -> Double) -> v (Double, Double) -> Double forall (v :: * -> *) a. Vector v a => (a -> Double) -> v a -> Double expectation (Double, Double) -> Double forall a b. (a, b) -> a fstv (Double, Double) xy muY :: Double muY =((Double, Double) -> Double) -> v (Double, Double) -> Double forall (v :: * -> *) a. Vector v a => (a -> Double) -> v a -> Double expectation (Double, Double) -> Double forall a b. (a, b) -> b sndv (Double, Double) xy {-# SPECIALIZEcovariance ::U.Vector(Double,Double)->Double#-}{-# SPECIALIZEcovariance ::V.Vector(Double,Double)->Double#-}-- | Correlation coefficient for sample of pairs. Also known as-- Pearson's correlation. For empty sample it's set to zero.correlation ::(G.Vectorv (Double,Double))=>v (Double,Double)->Doublecorrelation :: forall (v :: * -> *). Vector v (Double, Double) => v (Double, Double) -> Double correlation v (Double, Double) xy |Int n Int -> Int -> Bool forall a. Eq a => a -> a -> Bool ==Int 0=Double 0|Bool otherwise=Double cov Double -> Double -> Double forall a. Fractional a => a -> a -> a /Double -> Double forall a. Floating a => a -> a sqrt(Double varX Double -> Double -> Double forall a. Num a => a -> a -> a *Double varY )wheren :: Int n =v (Double, Double) -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.lengthv (Double, Double) xy muX :: Double muX =((Double, Double) -> Double) -> v (Double, Double) -> Double forall (v :: * -> *) a. Vector v a => (a -> Double) -> v a -> Double expectation (\(Double x ,Double _)->Double x )v (Double, Double) xy muY :: Double muY =((Double, Double) -> Double) -> v (Double, Double) -> Double forall (v :: * -> *) a. Vector v a => (a -> Double) -> v a -> Double expectation (\(Double _,Double y )->Double y )v (Double, Double) xy varX :: Double varX =((Double, Double) -> Double) -> v (Double, Double) -> Double forall (v :: * -> *) a. Vector v a => (a -> Double) -> v a -> Double expectation (\(Double x ,Double _)->Double -> Double square (Double x Double -> Double -> Double forall a. Num a => a -> a -> a -Double muX ))v (Double, Double) xy varY :: Double varY =((Double, Double) -> Double) -> v (Double, Double) -> Double forall (v :: * -> *) a. Vector v a => (a -> Double) -> v a -> Double expectation (\(Double _,Double y )->Double -> Double square (Double y Double -> Double -> Double forall a. Num a => a -> a -> a -Double muY ))v (Double, Double) xy cov :: Double cov =((Double, Double) -> Double) -> v (Double, Double) -> Double forall (v :: * -> *) a. Vector v a => (a -> Double) -> v a -> Double expectation (\(Double x ,Double y )->(Double x Double -> Double -> Double forall a. Num a => a -> a -> a -Double muX )Double -> Double -> Double forall a. Num a => a -> a -> a *(Double y Double -> Double -> Double forall a. Num a => a -> a -> a -Double muY ))v (Double, Double) xy {-# SPECIALIZEcorrelation ::U.Vector(Double,Double)->Double#-}{-# SPECIALIZEcorrelation ::V.Vector(Double,Double)->Double#-}-- | Covariance of two samples. Both vectors must be of the same-- length. If both are empty it's set to zerocovariance2 ::(G.Vectorv Double)=>v Double->v Double->Doublecovariance2 :: forall (v :: * -> *). Vector v Double => v Double -> v Double -> Double covariance2 v Double xs v Double ys |Int nx Int -> Int -> Bool forall a. Eq a => a -> a -> Bool /=Int ny =[Char] -> Double forall a. HasCallStack => [Char] -> a error([Char] -> Double) -> [Char] -> Double forall a b. (a -> b) -> a -> b $[Char] "Statistics.Sample.covariance2: both samples must have same length"|Int nx Int -> Int -> Bool forall a. Eq a => a -> a -> Bool ==Int 0=Double 0|Bool otherwise=v Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double sum ((Double -> Double -> Double) -> v Double -> v Double -> v Double forall (v :: * -> *) a b c. (Vector v a, Vector v b, Vector v c) => (a -> b -> c) -> v a -> v b -> v c G.zipWith(\Double x Double y ->(Double x Double -> Double -> Double forall a. Num a => a -> a -> a -Double muX )Double -> Double -> Double forall a. Num a => a -> a -> a *(Double y Double -> Double -> Double forall a. Num a => a -> a -> a -Double muY ))v Double xs v Double ys )Double -> Double -> Double forall a. Fractional a => a -> a -> a /Int -> Double forall a b. (Integral a, Num b) => a -> b fromIntegralInt nx wherenx :: Int nx =v Double -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.lengthv Double xs ny :: Int ny =v Double -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.lengthv Double ys muX :: Double muX =v Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double mean v Double xs muY :: Double muY =v Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double mean v Double ys {-# SPECIALIZEcovariance2 ::U.VectorDouble->U.VectorDouble->Double#-}{-# SPECIALIZEcovariance2 ::V.VectorDouble->V.VectorDouble->Double#-}-- | Correlation coefficient for two samples. Both vector must have-- same length Also known as Pearson's correlation. For empty sample-- it's set to zero.correlation2 ::(G.Vectorv Double)=>v Double->v Double->Doublecorrelation2 :: forall (v :: * -> *). Vector v Double => v Double -> v Double -> Double correlation2 v Double xs v Double ys |Int nx Int -> Int -> Bool forall a. Eq a => a -> a -> Bool /=Int ny =[Char] -> Double forall a. HasCallStack => [Char] -> a error([Char] -> Double) -> [Char] -> Double forall a b. (a -> b) -> a -> b $[Char] "Statistics.Sample.correlation2: both samples must have same length"|Int nx Int -> Int -> Bool forall a. Eq a => a -> a -> Bool ==Int 0=Double 0|Bool otherwise=Double cov Double -> Double -> Double forall a. Fractional a => a -> a -> a /Double -> Double forall a. Floating a => a -> a sqrt(Double varX Double -> Double -> Double forall a. Num a => a -> a -> a *Double varY )wherenx :: Int nx =v Double -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.lengthv Double xs ny :: Int ny =v Double -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.lengthv Double ys (Double muX ,Double varX )=v Double -> (Double, Double) forall (v :: * -> *). Vector v Double => v Double -> (Double, Double) meanVariance v Double xs (Double muY ,Double varY )=v Double -> (Double, Double) forall (v :: * -> *). Vector v Double => v Double -> (Double, Double) meanVariance v Double ys cov :: Double cov =v Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double sum ((Double -> Double -> Double) -> v Double -> v Double -> v Double forall (v :: * -> *) a b c. (Vector v a, Vector v b, Vector v c) => (a -> b -> c) -> v a -> v b -> v c G.zipWith(\Double x Double y ->(Double x Double -> Double -> Double forall a. Num a => a -> a -> a -Double muX )Double -> Double -> Double forall a. Num a => a -> a -> a *(Double y Double -> Double -> Double forall a. Num a => a -> a -> a -Double muY ))v Double xs v Double ys )Double -> Double -> Double forall a. Fractional a => a -> a -> a /Int -> Double forall a b. (Integral a, Num b) => a -> b fromIntegralInt nx {-# SPECIALIZEcorrelation2 ::U.VectorDouble->U.VectorDouble->Double#-}{-# SPECIALIZEcorrelation2 ::V.VectorDouble->V.VectorDouble->Double#-}-- | Pair two samples. It's like 'G.zip' but requires that both-- samples have equal size.pair ::(G.Vectorv a ,G.Vectorv b ,G.Vectorv (a ,b ))=>v a ->v b ->v (a ,b )pair :: forall (v :: * -> *) a b. (Vector v a, Vector v b, Vector v (a, b)) => v a -> v b -> v (a, b) pair v a va v b vb |v a -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.lengthv a va Int -> Int -> Bool forall a. Eq a => a -> a -> Bool ==v b -> Int forall (v :: * -> *) a. Vector v a => v a -> Int G.lengthv b vb =v a -> v b -> v (a, b) forall (v :: * -> *) a b. (Vector v a, Vector v b, Vector v (a, b)) => v a -> v b -> v (a, b) G.zipv a va v b vb |Bool otherwise=[Char] -> v (a, b) forall a. HasCallStack => [Char] -> a error[Char] "Statistics.Sample.pair: vector must have same length"{-# INLINEpair #-}-------------------------------------------------------------------------- Helper code. Monomorphic unpacked accumulators.-- (^) operator from Prelude is just slow.(^) ::Double->Int->DoubleDouble x0 ^ :: Double -> Int -> Double ^ Int n0 =Int -> Double -> Double forall {t}. (Eq t, Num t) => t -> Double -> Double go (Int n0 Int -> Int -> Int forall a. Num a => a -> a -> a -Int 1)Double x0 wherego :: t -> Double -> Double go t 0!Double acc =Double acc go t n Double acc =t -> Double -> Double go (t n t -> t -> t forall a. Num a => a -> a -> a -t 1)(Double acc Double -> Double -> Double forall a. Num a => a -> a -> a *Double x0 ){-# INLINE(^)#-}-- don't support polymorphism, as we can't get unboxed returns if we use it.dataT =T {-# UNPACK#-}!Double{-# UNPACK#-}!IntdataT1 =T1 {-# UNPACK#-}!Int{-# UNPACK#-}!Double{-# UNPACK#-}!Double{- Consider this core: with data T a = T !a !Int $wfold :: Double# -> Int# -> Int# -> (# Double, Int# #) and without, $wfold :: Double# -> Int# -> Int# -> (# Double#, Int# #) yielding to boxed returns and heap checks. -}-- $references---- * Chan, T. F.; Golub, G.H.; LeVeque, R.J. (1979) Updating formulae-- and a pairwise algorithm for computing sample-- variances. Technical Report STAN-CS-79-773, Department of-- Computer Science, Stanford-- University. <ftp://reports.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf>---- * Knuth, D.E. (1998) The art of computer programming, volume 2:-- seminumerical algorithms, 3rd ed., p. 232.---- * Welford, B.P. (1962) Note on a method for calculating corrected-- sums of squares and products. /Technometrics/-- 4(3):419–420. <http://www.jstor.org/stable/1266577>---- * West, D.H.D. (1979) Updating mean and variance estimates: an-- improved method. /Communications of the ACM/-- 22(9):532–535. <http://doi.acm.org/10.1145/359146.359153>