{-# LANGUAGE MultiParamTypeClasses #-}{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}-- |-- Module : Statistics.Distribution-- Copyright : (c) 2009 Bryan O'Sullivan-- License : BSD3---- Maintainer : bos@serpentine.com-- Stability : experimental-- Portability : portable---- Type classes for probability distributionsmoduleStatistics.Distribution(-- * Type classesDistribution (..),DiscreteDistr (..),ContDistr (..)-- ** Distribution statistics,MaybeMean (..),Mean (..),MaybeVariance (..),Variance (..),MaybeEntropy (..),Entropy (..),FromSample (..)-- ** Random number generation,ContGen (..),DiscreteGen (..),genContinuous -- * Helper functions,findRoot ,sumProbabilities )whereimportPreludehiding(sum)importStatistics.Function (square )importStatistics.Sample.Internal (sum )importSystem.Random.Stateful(StatefulGen,uniformDouble01M)importqualifiedData.Vector.UnboxedasUimportqualifiedData.Vector.GenericasG-- | Type class common to all distributions. Only c.d.f. could be-- defined for both discrete and continuous distributions.classDistribution d where-- | Cumulative distribution function. The probability that a-- random variable /X/ is less or equal than /x/,-- i.e. P(/X/≤/x/). Cumulative should be defined for-- infinities as well:---- > cumulative d +∞ = 1-- > cumulative d -∞ = 0cumulative ::d ->Double->Doublecumulative d
d Double
x =Double
1Double -> Double -> Double
forall a. Num a => a -> a -> a
-d -> Double -> Double
forall d. Distribution d => d -> Double -> Double
complCumulative d
d Double
x -- | One's complement of cumulative distribution:---- > complCumulative d x = 1 - cumulative d x---- It's useful when one is interested in P(/X/>/x/) and-- expression on the right side begin to lose precision. This-- function have default implementation but implementors are-- encouraged to provide more precise implementation.complCumulative ::d ->Double->DoublecomplCumulative d
d Double
x =Double
1Double -> Double -> Double
forall a. Num a => a -> a -> a
-d -> Double -> Double
forall d. Distribution d => d -> Double -> Double
cumulative d
d Double
x {-# MINIMAL(cumulative |complCumulative )#-}-- | Discrete probability distribution.classDistribution d =>DiscreteDistr d where-- | Probability of n-th outcome.probability ::d ->Int->Doubleprobability d
d =Double -> Double
forall a. Floating a => a -> a
exp(Double -> Double) -> (Int -> Double) -> Int -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
.d -> Int -> Double
forall d. DiscreteDistr d => d -> Int -> Double
logProbability d
d -- | Logarithm of probability of n-th outcomelogProbability ::d ->Int->DoublelogProbability d
d =Double -> Double
forall a. Floating a => a -> a
log(Double -> Double) -> (Int -> Double) -> Int -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
.d -> Int -> Double
forall d. DiscreteDistr d => d -> Int -> Double
probability d
d {-# MINIMAL(probability |logProbability )#-}-- | Continuous probability distribution.---- Minimal complete definition is 'quantile' and either 'density' or-- 'logDensity'.classDistribution d =>ContDistr d where-- | Probability density function. Probability that random-- variable /X/ lies in the infinitesimal interval-- [/x/,/x+/δ/x/) equal to /density(x)/⋅δ/x/density ::d ->Double->Doubledensity d
d =Double -> Double
forall a. Floating a => a -> a
exp(Double -> Double) -> (Double -> Double) -> Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
.d -> Double -> Double
forall d. ContDistr d => d -> Double -> Double
logDensity d
d -- | Natural logarithm of density.logDensity ::d ->Double->DoublelogDensity d
d =Double -> Double
forall a. Floating a => a -> a
log(Double -> Double) -> (Double -> Double) -> Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
.d -> Double -> Double
forall d. ContDistr d => d -> Double -> Double
density d
d -- | Inverse of the cumulative distribution function. The value-- /x/ for which P(/X/≤/x/) = /p/. If probability is outside-- of [0,1] range function should call 'error'quantile ::d ->Double->Doublequantile d
d Double
x =d -> Double -> Double
forall d. ContDistr d => d -> Double -> Double
complQuantile d
d (Double
1Double -> Double -> Double
forall a. Num a => a -> a -> a
-Double
x )-- | 1-complement of @quantile@:---- > complQuantile x ≡ quantile (1 - x)complQuantile ::d ->Double->DoublecomplQuantile d
d Double
x =d -> Double -> Double
forall d. ContDistr d => d -> Double -> Double
quantile d
d (Double
1Double -> Double -> Double
forall a. Num a => a -> a -> a
-Double
x ){-# MINIMAL(density |logDensity ),(quantile |complQuantile )#-}-- | Type class for distributions with mean. 'maybeMean' should return-- 'Nothing' if it's undefined for current value of dataclassDistribution d =>MaybeMean d wheremaybeMean ::d ->MaybeDouble-- | Type class for distributions with mean. If a distribution has-- finite mean for all valid values of parameters it should be-- instance of this type class.classMaybeMean d =>Mean d wheremean ::d ->Double-- | Type class for distributions with variance. If variance is-- undefined for some parameter values both 'maybeVariance' and-- 'maybeStdDev' should return Nothing.---- Minimal complete definition is 'maybeVariance' or 'maybeStdDev'classMaybeMean d =>MaybeVariance d wheremaybeVariance ::d ->MaybeDoublemaybeVariance =(Double -> Double) -> Maybe Double -> Maybe Double
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmapDouble -> Double
square (Maybe Double -> Maybe Double)
-> (d -> Maybe Double) -> d -> Maybe Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
.d -> Maybe Double
forall d. MaybeVariance d => d -> Maybe Double
maybeStdDev maybeStdDev ::d ->MaybeDoublemaybeStdDev =(Double -> Double) -> Maybe Double -> Maybe Double
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmapDouble -> Double
forall a. Floating a => a -> a
sqrt(Maybe Double -> Maybe Double)
-> (d -> Maybe Double) -> d -> Maybe Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
.d -> Maybe Double
forall d. MaybeVariance d => d -> Maybe Double
maybeVariance {-# MINIMAL(maybeVariance |maybeStdDev )#-}-- | Type class for distributions with variance. If distribution have-- finite variance for all valid parameter values it should be-- instance of this type class.---- Minimal complete definition is 'variance' or 'stdDev'class(Mean d ,MaybeVariance d )=>Variance d wherevariance ::d ->Doublevariance d
d =Double -> Double
square (d -> Double
forall d. Variance d => d -> Double
stdDev d
d )stdDev ::d ->DoublestdDev =Double -> Double
forall a. Floating a => a -> a
sqrt(Double -> Double) -> (d -> Double) -> d -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
.d -> Double
forall d. Variance d => d -> Double
variance {-# MINIMAL(variance |stdDev )#-}-- | Type class for distributions with entropy, meaning Shannon entropy-- in the case of a discrete distribution, or differential entropy in the-- case of a continuous one. 'maybeEntropy' should return 'Nothing' if-- entropy is undefined for the chosen parameter values.class(Distribution d )=>MaybeEntropy d where-- | Returns the entropy of a distribution, in nats, if such is defined.maybeEntropy ::d ->MaybeDouble-- | Type class for distributions with entropy, meaning Shannon-- entropy in the case of a discrete distribution, or differential-- entropy in the case of a continuous one. If the distribution has-- well-defined entropy for all valid parameter values then it-- should be an instance of this type class.class(MaybeEntropy d )=>Entropy d where-- | Returns the entropy of a distribution, in nats.entropy ::d ->Double-- | Generate discrete random variates which have given-- distribution.classDistribution d =>ContGen d wheregenContVar ::(StatefulGeng m )=>d ->g ->m Double-- | Generate discrete random variates which have given-- distribution. 'ContGen' is superclass because it's always possible-- to generate real-valued variates from integer valuesclass(DiscreteDistr d ,ContGen d )=>DiscreteGen d wheregenDiscreteVar ::(StatefulGeng m )=>d ->g ->m Int-- | Estimate distribution from sample. First parameter in sample is-- distribution type and second is element type.classFromSample d a where-- | Estimate distribution from sample. Returns 'Nothing' if there is-- not enough data, or if no usable fit results from the method-- used, e.g., the estimated distribution parameters would be-- invalid or inaccurate.fromSample ::G.Vectorv a =>v a ->Maybed -- | Generate variates from continuous distribution using inverse-- transform rule.genContinuous ::(ContDistr d ,StatefulGeng m )=>d ->g ->m DoublegenContinuous :: forall d g (m :: * -> *).
(ContDistr d, StatefulGen g m) =>
d -> g -> m Double
genContinuous d
d g
gen =doDouble
x <-g -> m Double
forall g (m :: * -> *). StatefulGen g m => g -> m Double
uniformDouble01Mg
gen Double -> m Double
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return(Double -> m Double) -> Double -> m Double
forall a b. (a -> b) -> a -> b
$!d -> Double -> Double
forall d. ContDistr d => d -> Double -> Double
quantile d
d Double
x dataP =P {-# UNPACK#-}!Double{-# UNPACK#-}!Double-- | Approximate the value of /X/ for which P(/x/>/X/)=/p/.---- This method uses a combination of Newton-Raphson iteration and-- bisection with the given guess as a starting point. The upper and-- lower bounds specify the interval in which the probability-- distribution reaches the value /p/.findRoot ::ContDistr d =>d -- ^ Distribution->Double-- ^ Probability /p/->Double-- ^ Initial guess->Double-- ^ Lower bound on interval->Double-- ^ Upper bound on interval->DoublefindRoot :: forall d.
ContDistr d =>
d -> Double -> Double -> Double -> Double -> Double
findRoot d
d Double
prob =Int -> Double -> Double -> Double -> Double -> Double
loop Int
0Double
1whereloop :: Int -> Double -> Double -> Double -> Double -> Double
loop !(Int
i ::Int)!Double
dx !Double
x !Double
lo !Double
hi |Double -> Double
forall a. Num a => a -> a
absDouble
dx Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<=Double
accuracy Bool -> Bool -> Bool
||Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>=Int
maxIters =Double
x |Bool
otherwise=Int -> Double -> Double -> Double -> Double -> Double
loop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)Double
dx'' Double
x'' Double
lo' Double
hi' whereerr :: Double
err =d -> Double -> Double
forall d. Distribution d => d -> Double -> Double
cumulative d
d Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
-Double
prob P Double
lo' Double
hi' |Double
err Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<Double
0=Double -> Double -> P
P Double
x Double
hi |Bool
otherwise=Double -> Double -> P
P Double
lo Double
x pdf :: Double
pdf =d -> Double -> Double
forall d. ContDistr d => d -> Double -> Double
density d
d Double
x P Double
dx' Double
x' |Double
pdf Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
/=Double
0=Double -> Double -> P
P (Double
err Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
pdf )(Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
-Double
dx )|Bool
otherwise=Double -> Double -> P
P Double
dx Double
x P Double
dx'' Double
x'' |Double
x' Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<Double
lo' Bool -> Bool -> Bool
||Double
x' Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>Double
hi' Bool -> Bool -> Bool
||Double
pdf Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
==Double
0=lety :: Double
y =(Double
lo' Double -> Double -> Double
forall a. Num a => a -> a -> a
+Double
hi' )Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
2inDouble -> Double -> P
P (Double
y Double -> Double -> Double
forall a. Num a => a -> a -> a
-Double
x )Double
y |Bool
otherwise=Double -> Double -> P
P Double
dx' Double
x' accuracy :: Double
accuracy =Double
1e-15maxIters :: Int
maxIters =Int
150-- | Sum probabilities in inclusive interval.sumProbabilities ::DiscreteDistr d =>d ->Int->Int->DoublesumProbabilities :: forall d. DiscreteDistr d => d -> Int -> Int -> Double
sumProbabilities d
d Int
low Int
hi =-- Return value is forced to be less than 1 to guard against roundoff errors.-- ATTENTION! this check should be removed for testing or it could mask bugs.Double -> Double -> Double
forall a. Ord a => a -> a -> a
minDouble
1(Double -> Double)
-> (Vector Int -> Double) -> Vector Int -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
.Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
sum (Vector Double -> Double)
-> (Vector Int -> Vector Double) -> Vector Int -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
.(Int -> Double) -> Vector Int -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map(d -> Int -> Double
forall d. DiscreteDistr d => d -> Int -> Double
probability d
d )(Vector Int -> Double) -> Vector Int -> Double
forall a b. (a -> b) -> a -> b
$Int -> Int -> Vector Int
forall a. (Unbox a, Enum a) => a -> a -> Vector a
U.enumFromToInt
low Int
hi