-- | Deterministic and probabilistic valuesmoduleNumeric.Probability.DistributionwhereimportNumeric.Probability.Show (showR )importqualifiedNumeric.Probability.Shape asShapeimportControl.Applicative(Applicative(..))importControl.Monad(liftM,liftM2,join,)importqualifiedData.FoldableasFoldimportqualifiedData.List.HTasListHTimportqualifiedData.MapasMapimportqualifiedData.ListasListimportqualifiedData.MaybeasMaybeimportData.Tuple.HT(mapFst,)importData.Ord.HT(comparing,)importData.Eq.HT(equating,)importPreludehiding(map,filter)-- * EventstypeEvent a =a ->BooloneOf::Eqa =>[a ]->Event a oneOf =flipelemjust::Eqa =>a ->Event a just =(==)-- * Distributions{- | Probability disribution The underlying data structure is a list. Unfortunately we cannot use a more efficient data structure because the key type must be of class 'Ord', but the 'Monad' class does not allow constraints for result types. The Monad instance is particularly useful because many generic monad functions make sense here, monad transformers can be used and the monadic design allows to simulate probabilistic games in an elegant manner. We have the same problem like making "Data.Set" an instance of 'Monad', see <http://www.randomhacks.net/articles/2007/03/15/data-set-monad-haskell-macros> If you need efficiency, you should remove redundant elements by 'norm'. 'norm' converts to 'Data.Map' and back internally and you can hope that the compiler fuses the lists with the intermediate Map structure. The defined monad is equivalent to @WriterT (Product prob) [] a@. See <http://www.randomhacks.net/articles/2007/02/21/refactoring-probability-distributions>. -}newtypeT prob a =Cons {decons ::[(a ,prob )]}certainly::Numprob =>a ->T prob a certainly x =Cons [(x ,1)]instanceNumprob =>Monad(T prob )wherereturn =certainly d >>= f =Cons [(y ,q *p )|(x ,p )<-deconsd ,(y ,q )<-decons(f x )]instanceNumprob =>Applicative(T prob )wherepure =certainly fm <*> m =Cons [(f x ,q *p )|(f ,p )<-deconsfm ,(x ,q )<-deconsm ]{- Dist cannot be an instance of MonadPlus, because there is no mzero (it would be an empty list of events, but their probabilities do not sum up to 1) and thus it breaks the normalization for the >>= combinator. See for instance the Boys example: do f <- family guard (existsBoy f) return f mplus is not associative because we have to normalize the sum of probabilities to 1. instance MonadPlus Dist where mzero = Cons [] mplus d d' = if isZero d || isZero d' then mzero else unfoldD $ choose 0.5 d d' isZero :: Dist a -> Bool isZero (Cons d) = null d -}instanceFunctor(T prob )wherefmap f (Cons d )=Cons [(f x ,p )|(x ,p )<-d ]errorMargin::RealFloatprob =>prob errorMargin =leteps =10/fromInteger(floatRadixeps )^floatDigitseps ineps {- | Check whether two distributions are equal when neglecting rounding errors. We do not want to put this into an 'Eq' instance, since it is not exact equivalence and it seems to be too easy to mix it up with @liftM2 (==) x y@. -}approx::(RealFloatprob ,Orda )=>T prob a ->T prob a ->Boolapprox (Cons xs )(Cons ys )=let(xse ,xsp )=unzip(norm' xs )(yse ,ysp )=unzip(norm' ys )inxse ==yse &&all(\p ->absp <errorMargin )(zipWith(-)xsp ysp )-- ** Auxiliary functions for constructing and working with distributionslift::(Numprob )=>([(a ,prob )]->[(a ,prob )])->T prob a ->T prob a lift f =Cons .f .deconssize::T prob a ->Intsize =length.deconscheck::(RealFloatprob ,Showprob )=>T prob a ->T prob a check (Cons d )=ifabs(1-sumP d )<errorMargin thenCons d elseerror("Illegal distribution: total probability = "++show(sumP d ))-- | can fail because of rounding errors, better use 'fromFreqs'cons::(RealFloatprob ,Showprob )=>[(a ,prob )]->T prob a cons =check .Cons sumP::Numprob =>[(a ,prob )]->prob sumP =sum.List.mapsndsortP::Ordprob =>[(a ,prob )]->[(a ,prob )]sortP =List.sortBy(comparingsnd)sortPDesc::Ordprob =>[(a ,prob )]->[(a ,prob )]sortPDesc =List.sortBy(flip$comparingsnd)sortElem::Orda =>[(a ,prob )]->[(a ,prob )]sortElem =List.sortBy(comparingfst)-- ** Normalization = groupingnorm::(Numprob ,Orda )=>T prob a ->T prob a norm =lift norm' norm'::(Numprob ,Orda )=>[(a ,prob )]->[(a ,prob )]norm' =Map.toAscList.Map.fromListWith(+)norm''::(Numprob ,Orda )=>[(a ,prob )]->[(a ,prob )]norm'' =List.map(\xs ->casexs of((x ,_):_)->(x ,sum(List.mapsndxs ))_->error"Probability.Distribution.norm'': every sub-list in groupBy must be non-empty").ListHT.groupBy(equatingfst).sortElem -- | pretty printingpretty::(Orda ,Showa ,Numprob ,Ordprob )=>(prob ->String)->T prob a ->Stringpretty _(Cons [])="Impossible"prettyshowProb (Cons xs )=letw =maximum(List.map(length.show.fst)xs )inconcatMap(\(x ,p )->showR w x ++' ':showProb p ++"\n")(sortPDesc (norm' xs ))infix0//% (//%)::(Orda ,Showa )=>T Rationala ->()->IO()(//% )x ()=putStr(pretty showx )instance(Numprob ,Ordprob ,Showprob ,Orda ,Showa )=>Show(T prob a )whereshowsPrec p (Cons xs )=showParen(p >10)(showString"fromFreqs ".showsPrec11(sortPDesc (norm' xs ))){- | We would like to have an equality test of type > (==) :: T prob a -> T prob a -> T prob Bool that is consistent with the 'Num' instance. We would certainly define > x==y = norm (liftM2 (==) x y) . However the 'Eq' class enforces the type > T prob a -> T prob a -> Bool . We could implement this as check for equal distributions. This would be inconsistent with the 'Num' instance because it compares entire distributions, not only individual outcomes. Thus we provide this function as 'equal'. I would prefer to omit the 'Eq' instance completely, but unfortunately the 'Num' instance requires 'Eq' as superclass. -}instanceEq(T prob a )where(== )=error"Probability.Distribution.== cannot be implemented sensibly."{- instance (Num prob, Ord a) => Eq (T prob a) where (==) = equal -}equal::(Numprob ,Eqprob ,Orda )=>T prob a ->T prob a ->Boolequal =equating(decons.norm ){- The Num operations consider their operands as independent distributions (like all operations on distributions do). All functions normalize their results if normalization is lost by the plain operation. This is essential for performance. Thus @sum $ replicate 10 d@ is significantly faster than @fmap sum $ replicateM 10 d@ -}instance(Numprob ,Ordprob ,Orda ,Numa )=>Num(T prob a )wherefromInteger =return.fromIntegerx + y =norm (liftM2(+)x y )x -y =norm (liftM2(-)x y )x * y =norm (liftM2(*)x y )abs x =norm (liftMabsx )signum x =norm (liftMsignumx )negate x =liftMnegatex instance(Numprob ,Ordprob ,Orda ,Fractionala )=>Fractional(T prob a )wherefromRational =return.fromRationalrecip x =liftMrecipx x / y =norm (liftM2(/)x y )-- * Spread: functions to convert a list of values into a distribution-- | distribution generatorstypeSpread prob a =[a ]->T prob a {- not a valid distribution impossible :: T prob a impossible = mzero -}choose::Numprob =>prob ->a ->a ->T prob a choose p x y =Cons $zip[x ,y ][p ,1-p ]enum::Fractionalprob =>[Int]->Spread prob a enum =relative .List.mapfromIntegral{- | Give a list of frequencies, they do not need to sum up to 1. -}relative::Fractionalprob =>[prob ]->Spread prob a relative ns =fromFreqs .flipzipns shape::Fractionalprob =>(prob ->prob )->Spread prob a shape _[]=error"Probability.shape: empty list"shapef xs =letincr =1/fromIntegral(lengthxs -1)ps =List.mapf (iterate(+incr )0)infromFreqs (zipxs ps )linear::Fractionalprob =>Spread prob a linear =shape Shape.linear uniform::Fractionalprob =>Spread prob a uniform =shape Shape.uniform negExp::Floatingprob =>Spread prob a negExp =shape Shape.negExp normal::Floatingprob =>Spread prob a normal =shape Shape.normal -- | extracting and mapping the domain of a distributionextract::T prob a ->[a ]extract =List.mapfst.decons-- | 'fmap' with normalizationmap::(Numprob ,Ordb )=>(a ->b )->T prob a ->T prob b map f =norm .fmapf {- | unfold a distribution of distributions into one distribution, this is 'Control.Monad.join' with normalization. -}unfold::(Numprob ,Orda )=>T prob (T prob a )->T prob a unfold =norm .join-- | conditional distributioncond::(Numprob )=>T prob Bool->T prob a {-^ True -}->T prob a {-^ False -}->T prob a cond b d d' =b >>=\c ->ifc thend elsed' truth::(Numprob )=>T prob Bool->prob truth dist =casedecons(norm dist )of(b ,p ):_->ifb thenp else1-p []->error"Probability.truth: corrupt boolean random variable"infixl1>>=? infixr1?=<< -- | conditional probability, identical to 'Dist.filter'(?=<<)::(Fractionalprob )=>(a ->Bool)->T prob a ->T prob a (?=<< )=filter {- | 'Dist.filter' in infix form. Can be considered an additional monadic combinator, which can be used where you would want 'Control.Monad.guard' otherwise. -}(>>=?)::(Fractionalprob )=>T prob a ->(a ->Bool)->T prob a (>>=? )=flipfilter -- | filtering distributionsdataSelect a =Case a |Other deriving(Eq,Ord,Show)above,below::(Numprob ,Ordprob ,Orda )=>prob ->T prob a ->T prob (Select a )above p =select (>=p )below p =select (<=p )select::(Numprob ,Ordprob ,Orda )=>(prob ->Bool)->T prob a ->T prob (Select a )select condp (Cons d )=let(d1 ,d2 )=Map.partitioncondp $Map.fromListWith(+)d inCons $(Other ,Fold.sumd2 ):List.map(mapFstCase )(Map.toAscListd1 )fromFreqs::(Fractionalprob )=>[(a ,prob )]->T prob a fromFreqs xs =Cons (List.map(\(x ,p )->(x ,p /q ))xs )whereq =sumP xs filter::(Fractionalprob )=>(a ->Bool)->T prob a ->T prob a filter p =fromFreqs .List.filter(p .fst).deconsmapMaybe::(Fractionalprob )=>(a ->Maybeb )->T prob a ->T prob b mapMaybe f =fromFreqs .Maybe.mapMaybe(\(x ,p )->fmap(flip(,)p )$f x ).decons-- | selecting from distributionsselectP::(Numprob ,Ordprob )=>T prob a ->prob ->a selectP (Cons d )p =scanP p d scanP::(Numprob ,Ordprob )=>prob ->[(a ,prob )]->a scanP p ((x ,q ):ps )=ifp <=q ||nullps thenx elsescanP (p -q )ps scanP_[]=error"Probability.scanP: distribution must be non-empty"infixr1?? (??)::Numprob =>Event a ->T prob a ->prob (?? )p =sumP .List.filter(p .fst).decons-- | expectation valueexpected::(Numa )=>T a a ->a expected =sum.List.map(\(x ,p )->x *p ).decons-- | statistical analysesvariance::(Numa )=>T a a ->a variance x =expected (fmap((^(2::Int)).subtract(expected x ))x )stdDev::(Floatinga )=>T a a ->a stdDev =sqrt.variance