-- | Deterministic and probabilistic valuesmoduleNumeric.Probability.DistributionwhereimportNumeric.Probability.Show (showR )importqualifiedNumeric.Probability.Shape asShapeimportControl.Applicative(Applicative(..))importControl.Monad(liftM,liftM2,join,)importqualifiedData.FoldableasFoldimportqualifiedData.List.HTasListHTimportqualifiedData.MapasMapimportqualifiedData.ListasListimportqualifiedData.MaybeasMaybeimportData.Tuple.HT(mapFst,)importData.Ord.HT(comparing,)importData.Eq.HT(equating,)importPreludehiding(map,filter)-- * EventstypeEvent a =a ->BooloneOf::Eqa =>[a ]->Event a oneOf =flipelemjust::Eqa =>a ->Event a just =(==)-- * Distributions{- |
Probability disribution
The underlying data structure is a list.
Unfortunately we cannot use a more efficient data structure
because the key type must be of class 'Ord',
but the 'Monad' class does not allow constraints for result types.
The Monad instance is particularly useful
because many generic monad functions make sense here,
monad transformers can be used
and the monadic design allows to simulate probabilistic games in an elegant manner.
We have the same problem like making "Data.Set" an instance of 'Monad',
see <http://www.randomhacks.net/articles/2007/03/15/data-set-monad-haskell-macros>
If you need efficiency, you should remove redundant elements by 'norm'.
'norm' converts to 'Data.Map' and back internally
and you can hope that the compiler fuses the lists with the intermediate Map structure.
The defined monad is equivalent to
@WriterT (Product prob) [] a@.
See <http://www.randomhacks.net/articles/2007/02/21/refactoring-probability-distributions>.
-}newtypeT prob a =Cons {decons ::[(a ,prob )]}certainly::Numprob =>a ->T prob a certainly x =Cons [(x ,1)]instanceNumprob =>Monad(T prob )wherereturn =certainly d >>= f =Cons [(y ,q *p )|(x ,p )<-deconsd ,(y ,q )<-decons(f x )]instanceNumprob =>Applicative(T prob )wherepure =certainly fm <*> m =Cons [(f x ,q *p )|(f ,p )<-deconsfm ,(x ,q )<-deconsm ]{-
Dist cannot be an instance of MonadPlus,
because there is no mzero
(it would be an empty list of events, but their probabilities do not sum up to 1)
and thus it breaks the normalization for the >>= combinator.
See for instance the Boys example:
 do f <- family
 guard (existsBoy f)
 return f
mplus is not associative because we have to normalize the sum of probabilities to 1.
instance MonadPlus Dist where
 mzero = Cons []
 mplus d d' =
 if isZero d || isZero d'
 then mzero
 else unfoldD $ choose 0.5 d d'
isZero :: Dist a -> Bool
isZero (Cons d) = null d
-}instanceFunctor(T prob )wherefmap f (Cons d )=Cons [(f x ,p )|(x ,p )<-d ]errorMargin::RealFloatprob =>prob errorMargin =leteps =10/fromInteger(floatRadixeps )^floatDigitseps ineps {- |
Check whether two distributions are equal when neglecting rounding errors.
We do not want to put this into an 'Eq' instance,
since it is not exact equivalence
and it seems to be too easy to mix it up with @liftM2 (==) x y@.
-}approx::(RealFloatprob ,Orda )=>T prob a ->T prob a ->Boolapprox (Cons xs )(Cons ys )=let(xse ,xsp )=unzip(norm' xs )(yse ,ysp )=unzip(norm' ys )inxse ==yse &&all(\p ->absp <errorMargin )(zipWith(-)xsp ysp )-- ** Auxiliary functions for constructing and working with distributionslift::(Numprob )=>([(a ,prob )]->[(a ,prob )])->T prob a ->T prob a lift f =Cons .f .deconssize::T prob a ->Intsize =length.deconscheck::(RealFloatprob ,Showprob )=>T prob a ->T prob a check (Cons d )=ifabs(1-sumP d )<errorMargin thenCons d elseerror("Illegal distribution: total probability = "++show(sumP d ))-- | can fail because of rounding errors, better use 'fromFreqs'cons::(RealFloatprob ,Showprob )=>[(a ,prob )]->T prob a cons =check .Cons sumP::Numprob =>[(a ,prob )]->prob sumP =sum.List.mapsndsortP::Ordprob =>[(a ,prob )]->[(a ,prob )]sortP =List.sortBy(comparingsnd)sortPDesc::Ordprob =>[(a ,prob )]->[(a ,prob )]sortPDesc =List.sortBy(flip$comparingsnd)sortElem::Orda =>[(a ,prob )]->[(a ,prob )]sortElem =List.sortBy(comparingfst)-- ** Normalization = groupingnorm::(Numprob ,Orda )=>T prob a ->T prob a norm =lift norm' norm'::(Numprob ,Orda )=>[(a ,prob )]->[(a ,prob )]norm' =Map.toAscList.Map.fromListWith(+)norm''::(Numprob ,Orda )=>[(a ,prob )]->[(a ,prob )]norm'' =List.map(\xs ->casexs of((x ,_):_)->(x ,sum(List.mapsndxs ))_->error"Probability.Distribution.norm'': every sub-list in groupBy must be non-empty").ListHT.groupBy(equatingfst).sortElem -- | pretty printingpretty::(Orda ,Showa ,Numprob ,Ordprob )=>(prob ->String)->T prob a ->Stringpretty _(Cons [])="Impossible"prettyshowProb (Cons xs )=letw =maximum(List.map(length.show.fst)xs )inconcatMap(\(x ,p )->showR w x ++' ':showProb p ++"\n")(sortPDesc (norm' xs ))infix0//% (//%)::(Orda ,Showa )=>T Rationala ->()->IO()(//% )x ()=putStr(pretty showx )instance(Numprob ,Ordprob ,Showprob ,Orda ,Showa )=>Show(T prob a )whereshowsPrec p (Cons xs )=showParen(p >10)(showString"fromFreqs ".showsPrec11(sortPDesc (norm' xs ))){- |
We would like to have an equality test of type
> (==) :: T prob a -> T prob a -> T prob Bool
that is consistent with the 'Num' instance.
We would certainly define
> x==y = norm (liftM2 (==) x y) .
However the 'Eq' class enforces the type
> T prob a -> T prob a -> Bool .
We could implement this as check for equal distributions.
This would be inconsistent with the 'Num' instance
because it compares entire distributions,
not only individual outcomes.
Thus we provide this function as 'equal'.
I would prefer to omit the 'Eq' instance completely,
but unfortunately the 'Num' instance requires 'Eq' as superclass.
-}instanceEq(T prob a )where(== )=error"Probability.Distribution.== cannot be implemented sensibly."{-
instance (Num prob, Ord a) => Eq (T prob a) where
 (==) = equal
-}equal::(Numprob ,Eqprob ,Orda )=>T prob a ->T prob a ->Boolequal =equating(decons.norm ){-
The Num operations consider their operands as independent distributions
(like all operations on distributions do).
All functions normalize their results if normalization is lost by the plain operation.
This is essential for performance.
Thus @sum $ replicate 10 d@ is significantly faster
than @fmap sum $ replicateM 10 d@
-}instance(Numprob ,Ordprob ,Orda ,Numa )=>Num(T prob a )wherefromInteger =return.fromIntegerx + y =norm (liftM2(+)x y )x -y =norm (liftM2(-)x y )x * y =norm (liftM2(*)x y )abs x =norm (liftMabsx )signum x =norm (liftMsignumx )negate x =liftMnegatex instance(Numprob ,Ordprob ,Orda ,Fractionala )=>Fractional(T prob a )wherefromRational =return.fromRationalrecip x =liftMrecipx x / y =norm (liftM2(/)x y )-- * Spread: functions to convert a list of values into a distribution-- | distribution generatorstypeSpread prob a =[a ]->T prob a {- not a valid distribution
impossible :: T prob a
impossible = mzero
-}choose::Numprob =>prob ->a ->a ->T prob a choose p x y =Cons $zip[x ,y ][p ,1-p ]enum::Fractionalprob =>[Int]->Spread prob a enum =relative .List.mapfromIntegral{- |
Give a list of frequencies, they do not need to sum up to 1.
-}relative::Fractionalprob =>[prob ]->Spread prob a relative ns =fromFreqs .flipzipns shape::Fractionalprob =>(prob ->prob )->Spread prob a shape _[]=error"Probability.shape: empty list"shapef xs =letincr =1/fromIntegral(lengthxs -1)ps =List.mapf (iterate(+incr )0)infromFreqs (zipxs ps )linear::Fractionalprob =>Spread prob a linear =shape Shape.linear uniform::Fractionalprob =>Spread prob a uniform =shape Shape.uniform negExp::Floatingprob =>Spread prob a negExp =shape Shape.negExp normal::Floatingprob =>Spread prob a normal =shape Shape.normal -- | extracting and mapping the domain of a distributionextract::T prob a ->[a ]extract =List.mapfst.decons-- | 'fmap' with normalizationmap::(Numprob ,Ordb )=>(a ->b )->T prob a ->T prob b map f =norm .fmapf {- |
unfold a distribution of distributions into one distribution,
this is 'Control.Monad.join' with normalization.
-}unfold::(Numprob ,Orda )=>T prob (T prob a )->T prob a unfold =norm .join-- | conditional distributioncond::(Numprob )=>T prob Bool->T prob a {-^ True -}->T prob a {-^ False -}->T prob a cond b d d' =b >>=\c ->ifc thend elsed' truth::(Numprob )=>T prob Bool->prob truth dist =casedecons(norm dist )of(b ,p ):_->ifb thenp else1-p []->error"Probability.truth: corrupt boolean random variable"infixl1>>=? infixr1?=<< -- | conditional probability, identical to 'Dist.filter'(?=<<)::(Fractionalprob )=>(a ->Bool)->T prob a ->T prob a (?=<< )=filter {- |
'Dist.filter' in infix form.
Can be considered an additional monadic combinator,
which can be used where you would want 'Control.Monad.guard' otherwise.
-}(>>=?)::(Fractionalprob )=>T prob a ->(a ->Bool)->T prob a (>>=? )=flipfilter -- | filtering distributionsdataSelect a =Case a |Other deriving(Eq,Ord,Show)above,below::(Numprob ,Ordprob ,Orda )=>prob ->T prob a ->T prob (Select a )above p =select (>=p )below p =select (<=p )select::(Numprob ,Ordprob ,Orda )=>(prob ->Bool)->T prob a ->T prob (Select a )select condp (Cons d )=let(d1 ,d2 )=Map.partitioncondp $Map.fromListWith(+)d inCons $(Other ,Fold.sumd2 ):List.map(mapFstCase )(Map.toAscListd1 )fromFreqs::(Fractionalprob )=>[(a ,prob )]->T prob a fromFreqs xs =Cons (List.map(\(x ,p )->(x ,p /q ))xs )whereq =sumP xs filter::(Fractionalprob )=>(a ->Bool)->T prob a ->T prob a filter p =fromFreqs .List.filter(p .fst).deconsmapMaybe::(Fractionalprob )=>(a ->Maybeb )->T prob a ->T prob b mapMaybe f =fromFreqs .Maybe.mapMaybe(\(x ,p )->fmap(flip(,)p )$f x ).decons-- | selecting from distributionsselectP::(Numprob ,Ordprob )=>T prob a ->prob ->a selectP (Cons d )p =scanP p d scanP::(Numprob ,Ordprob )=>prob ->[(a ,prob )]->a scanP p ((x ,q ):ps )=ifp <=q ||nullps thenx elsescanP (p -q )ps scanP_[]=error"Probability.scanP: distribution must be non-empty"infixr1?? (??)::Numprob =>Event a ->T prob a ->prob (?? )p =sumP .List.filter(p .fst).decons-- | expectation valueexpected::(Numa )=>T a a ->a expected =sum.List.map(\(x ,p )->x *p ).decons-- | statistical analysesvariance::(Numa )=>T a a ->a variance x =expected (fmap((^(2::Int)).subtract(expected x ))x )stdDev::(Floatinga )=>T a a ->a stdDev =sqrt.variance