{-# LANGUAGE MultiParamTypeClasses #-}{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}-- |-- Module : Statistics.Distribution-- Copyright : (c) 2009 Bryan O'Sullivan-- License : BSD3---- Maintainer : bos@serpentine.com-- Stability : experimental-- Portability : portable---- Type classes for probability distributionsmoduleStatistics.Distribution(-- * Type classesDistribution (..),DiscreteDistr (..),ContDistr (..)-- ** Distribution statistics,MaybeMean (..),Mean (..),MaybeVariance (..),Variance (..),MaybeEntropy (..),Entropy (..),FromSample (..)-- ** Random number generation,ContGen (..),DiscreteGen (..),genContinuous -- * Helper functions,findRoot ,sumProbabilities )whereimportPreludehiding(sum)importStatistics.Function (square )importStatistics.Sample.Internal (sum )importSystem.Random.Stateful(StatefulGen,uniformDouble01M)importqualifiedData.Vector.UnboxedasUimportqualifiedData.Vector.GenericasG-- | Type class common to all distributions. Only c.d.f. could be-- defined for both discrete and continuous distributions.classDistribution d where-- | Cumulative distribution function. The probability that a-- random variable /X/ is less or equal than /x/,-- i.e. P(/X/≤/x/). Cumulative should be defined for-- infinities as well:---- > cumulative d +∞ = 1-- > cumulative d -∞ = 0cumulative ::d ->Double->Doublecumulative d d Double x =Double 1Double -> Double -> Double forall a. Num a => a -> a -> a -d -> Double -> Double forall d. Distribution d => d -> Double -> Double complCumulative d d Double x -- | One's complement of cumulative distribution:---- > complCumulative d x = 1 - cumulative d x---- It's useful when one is interested in P(/X/>/x/) and-- expression on the right side begin to lose precision. This-- function have default implementation but implementors are-- encouraged to provide more precise implementation.complCumulative ::d ->Double->DoublecomplCumulative d d Double x =Double 1Double -> Double -> Double forall a. Num a => a -> a -> a -d -> Double -> Double forall d. Distribution d => d -> Double -> Double cumulative d d Double x {-# MINIMAL(cumulative |complCumulative )#-}-- | Discrete probability distribution.classDistribution d =>DiscreteDistr d where-- | Probability of n-th outcome.probability ::d ->Int->Doubleprobability d d =Double -> Double forall a. Floating a => a -> a exp(Double -> Double) -> (Int -> Double) -> Int -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .d -> Int -> Double forall d. DiscreteDistr d => d -> Int -> Double logProbability d d -- | Logarithm of probability of n-th outcomelogProbability ::d ->Int->DoublelogProbability d d =Double -> Double forall a. Floating a => a -> a log(Double -> Double) -> (Int -> Double) -> Int -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .d -> Int -> Double forall d. DiscreteDistr d => d -> Int -> Double probability d d {-# MINIMAL(probability |logProbability )#-}-- | Continuous probability distribution.---- Minimal complete definition is 'quantile' and either 'density' or-- 'logDensity'.classDistribution d =>ContDistr d where-- | Probability density function. Probability that random-- variable /X/ lies in the infinitesimal interval-- [/x/,/x+/δ/x/) equal to /density(x)/⋅δ/x/density ::d ->Double->Doubledensity d d =Double -> Double forall a. Floating a => a -> a exp(Double -> Double) -> (Double -> Double) -> Double -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .d -> Double -> Double forall d. ContDistr d => d -> Double -> Double logDensity d d -- | Natural logarithm of density.logDensity ::d ->Double->DoublelogDensity d d =Double -> Double forall a. Floating a => a -> a log(Double -> Double) -> (Double -> Double) -> Double -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .d -> Double -> Double forall d. ContDistr d => d -> Double -> Double density d d -- | Inverse of the cumulative distribution function. The value-- /x/ for which P(/X/≤/x/) = /p/. If probability is outside-- of [0,1] range function should call 'error'quantile ::d ->Double->Doublequantile d d Double x =d -> Double -> Double forall d. ContDistr d => d -> Double -> Double complQuantile d d (Double 1Double -> Double -> Double forall a. Num a => a -> a -> a -Double x )-- | 1-complement of @quantile@:---- > complQuantile x ≡ quantile (1 - x)complQuantile ::d ->Double->DoublecomplQuantile d d Double x =d -> Double -> Double forall d. ContDistr d => d -> Double -> Double quantile d d (Double 1Double -> Double -> Double forall a. Num a => a -> a -> a -Double x ){-# MINIMAL(density |logDensity ),(quantile |complQuantile )#-}-- | Type class for distributions with mean. 'maybeMean' should return-- 'Nothing' if it's undefined for current value of dataclassDistribution d =>MaybeMean d wheremaybeMean ::d ->MaybeDouble-- | Type class for distributions with mean. If a distribution has-- finite mean for all valid values of parameters it should be-- instance of this type class.classMaybeMean d =>Mean d wheremean ::d ->Double-- | Type class for distributions with variance. If variance is-- undefined for some parameter values both 'maybeVariance' and-- 'maybeStdDev' should return Nothing.---- Minimal complete definition is 'maybeVariance' or 'maybeStdDev'classMaybeMean d =>MaybeVariance d wheremaybeVariance ::d ->MaybeDoublemaybeVariance =(Double -> Double) -> Maybe Double -> Maybe Double forall a b. (a -> b) -> Maybe a -> Maybe b forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmapDouble -> Double square (Maybe Double -> Maybe Double) -> (d -> Maybe Double) -> d -> Maybe Double forall b c a. (b -> c) -> (a -> b) -> a -> c .d -> Maybe Double forall d. MaybeVariance d => d -> Maybe Double maybeStdDev maybeStdDev ::d ->MaybeDoublemaybeStdDev =(Double -> Double) -> Maybe Double -> Maybe Double forall a b. (a -> b) -> Maybe a -> Maybe b forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmapDouble -> Double forall a. Floating a => a -> a sqrt(Maybe Double -> Maybe Double) -> (d -> Maybe Double) -> d -> Maybe Double forall b c a. (b -> c) -> (a -> b) -> a -> c .d -> Maybe Double forall d. MaybeVariance d => d -> Maybe Double maybeVariance {-# MINIMAL(maybeVariance |maybeStdDev )#-}-- | Type class for distributions with variance. If distribution have-- finite variance for all valid parameter values it should be-- instance of this type class.---- Minimal complete definition is 'variance' or 'stdDev'class(Mean d ,MaybeVariance d )=>Variance d wherevariance ::d ->Doublevariance d d =Double -> Double square (d -> Double forall d. Variance d => d -> Double stdDev d d )stdDev ::d ->DoublestdDev =Double -> Double forall a. Floating a => a -> a sqrt(Double -> Double) -> (d -> Double) -> d -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .d -> Double forall d. Variance d => d -> Double variance {-# MINIMAL(variance |stdDev )#-}-- | Type class for distributions with entropy, meaning Shannon entropy-- in the case of a discrete distribution, or differential entropy in the-- case of a continuous one. 'maybeEntropy' should return 'Nothing' if-- entropy is undefined for the chosen parameter values.class(Distribution d )=>MaybeEntropy d where-- | Returns the entropy of a distribution, in nats, if such is defined.maybeEntropy ::d ->MaybeDouble-- | Type class for distributions with entropy, meaning Shannon-- entropy in the case of a discrete distribution, or differential-- entropy in the case of a continuous one. If the distribution has-- well-defined entropy for all valid parameter values then it-- should be an instance of this type class.class(MaybeEntropy d )=>Entropy d where-- | Returns the entropy of a distribution, in nats.entropy ::d ->Double-- | Generate discrete random variates which have given-- distribution.classDistribution d =>ContGen d wheregenContVar ::(StatefulGeng m )=>d ->g ->m Double-- | Generate discrete random variates which have given-- distribution. 'ContGen' is superclass because it's always possible-- to generate real-valued variates from integer valuesclass(DiscreteDistr d ,ContGen d )=>DiscreteGen d wheregenDiscreteVar ::(StatefulGeng m )=>d ->g ->m Int-- | Estimate distribution from sample. First parameter in sample is-- distribution type and second is element type.classFromSample d a where-- | Estimate distribution from sample. Returns 'Nothing' if there is-- not enough data, or if no usable fit results from the method-- used, e.g., the estimated distribution parameters would be-- invalid or inaccurate.fromSample ::G.Vectorv a =>v a ->Maybed -- | Generate variates from continuous distribution using inverse-- transform rule.genContinuous ::(ContDistr d ,StatefulGeng m )=>d ->g ->m DoublegenContinuous :: forall d g (m :: * -> *). (ContDistr d, StatefulGen g m) => d -> g -> m Double genContinuous d d g gen =doDouble x <-g -> m Double forall g (m :: * -> *). StatefulGen g m => g -> m Double uniformDouble01Mg gen Double -> m Double forall a. a -> m a forall (m :: * -> *) a. Monad m => a -> m a return(Double -> m Double) -> Double -> m Double forall a b. (a -> b) -> a -> b $!d -> Double -> Double forall d. ContDistr d => d -> Double -> Double quantile d d Double x dataP =P {-# UNPACK#-}!Double{-# UNPACK#-}!Double-- | Approximate the value of /X/ for which P(/x/>/X/)=/p/.---- This method uses a combination of Newton-Raphson iteration and-- bisection with the given guess as a starting point. The upper and-- lower bounds specify the interval in which the probability-- distribution reaches the value /p/.findRoot ::ContDistr d =>d -- ^ Distribution->Double-- ^ Probability /p/->Double-- ^ Initial guess->Double-- ^ Lower bound on interval->Double-- ^ Upper bound on interval->DoublefindRoot :: forall d. ContDistr d => d -> Double -> Double -> Double -> Double -> Double findRoot d d Double prob =Int -> Double -> Double -> Double -> Double -> Double loop Int 0Double 1whereloop :: Int -> Double -> Double -> Double -> Double -> Double loop !(Int i ::Int)!Double dx !Double x !Double lo !Double hi |Double -> Double forall a. Num a => a -> a absDouble dx Double -> Double -> Bool forall a. Ord a => a -> a -> Bool <=Double accuracy Bool -> Bool -> Bool ||Int i Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >=Int maxIters =Double x |Bool otherwise=Int -> Double -> Double -> Double -> Double -> Double loop (Int i Int -> Int -> Int forall a. Num a => a -> a -> a +Int 1)Double dx'' Double x'' Double lo' Double hi' whereerr :: Double err =d -> Double -> Double forall d. Distribution d => d -> Double -> Double cumulative d d Double x Double -> Double -> Double forall a. Num a => a -> a -> a -Double prob P Double lo' Double hi' |Double err Double -> Double -> Bool forall a. Ord a => a -> a -> Bool <Double 0=Double -> Double -> P P Double x Double hi |Bool otherwise=Double -> Double -> P P Double lo Double x pdf :: Double pdf =d -> Double -> Double forall d. ContDistr d => d -> Double -> Double density d d Double x P Double dx' Double x' |Double pdf Double -> Double -> Bool forall a. Eq a => a -> a -> Bool /=Double 0=Double -> Double -> P P (Double err Double -> Double -> Double forall a. Fractional a => a -> a -> a /Double pdf )(Double x Double -> Double -> Double forall a. Num a => a -> a -> a -Double dx )|Bool otherwise=Double -> Double -> P P Double dx Double x P Double dx'' Double x'' |Double x' Double -> Double -> Bool forall a. Ord a => a -> a -> Bool <Double lo' Bool -> Bool -> Bool ||Double x' Double -> Double -> Bool forall a. Ord a => a -> a -> Bool >Double hi' Bool -> Bool -> Bool ||Double pdf Double -> Double -> Bool forall a. Eq a => a -> a -> Bool ==Double 0=lety :: Double y =(Double lo' Double -> Double -> Double forall a. Num a => a -> a -> a +Double hi' )Double -> Double -> Double forall a. Fractional a => a -> a -> a /Double 2inDouble -> Double -> P P (Double y Double -> Double -> Double forall a. Num a => a -> a -> a -Double x )Double y |Bool otherwise=Double -> Double -> P P Double dx' Double x' accuracy :: Double accuracy =Double 1e-15maxIters :: Int maxIters =Int 150-- | Sum probabilities in inclusive interval.sumProbabilities ::DiscreteDistr d =>d ->Int->Int->DoublesumProbabilities :: forall d. DiscreteDistr d => d -> Int -> Int -> Double sumProbabilities d d Int low Int hi =-- Return value is forced to be less than 1 to guard against roundoff errors.-- ATTENTION! this check should be removed for testing or it could mask bugs.Double -> Double -> Double forall a. Ord a => a -> a -> a minDouble 1(Double -> Double) -> (Vector Int -> Double) -> Vector Int -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .Vector Double -> Double forall (v :: * -> *). Vector v Double => v Double -> Double sum (Vector Double -> Double) -> (Vector Int -> Vector Double) -> Vector Int -> Double forall b c a. (b -> c) -> (a -> b) -> a -> c .(Int -> Double) -> Vector Int -> Vector Double forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b U.map(d -> Int -> Double forall d. DiscreteDistr d => d -> Int -> Double probability d d )(Vector Int -> Double) -> Vector Int -> Double forall a b. (a -> b) -> a -> b $Int -> Int -> Vector Int forall a. (Unbox a, Enum a) => a -> a -> Vector a U.enumFromToInt low Int hi