{-# LANGUAGE CPP #-}{-# LANGUAGE BangPatterns, MagicHash, UnboxedTuples, NoImplicitPrelude #-}{-# OPTIONS_GHC -O2 -fno-warn-name-shadowing #-}-- |-- Module : GHC.Internal.Encoding.UTF8-- Copyright : (c) The University of Glasgow, 1994-2023-- License : see libraries/base/LICENSE---- Maintainer : ghc-devs@haskell.org-- Stability : internal-- Portability : non-portable (GHC extensions)---- /The API of this module is unstable and not meant to be consumed by the general public./-- If you absolutely must depend on it, make sure to use a tight upper-- bound, e.g., @base < 4.X@ rather than @base < 5@, because the interface can-- change rapidly without much warning.---- Simple UTF-8 codecs supporting non-streaming encoding/decoding.-- For encoding where codepoints may be broken across buffers,-- see "GHC.IO.Encoding.UTF8".---- This is one of several UTF-8 implementations provided by GHC; see Note-- [GHC's many UTF-8 implementations] in "GHC.Encoding.UTF8" for an-- overview.--moduleGHC.Internal.Encoding.UTF8 (-- * Decoding single charactersutf8DecodeCharAddr# ,utf8DecodeCharPtr ,utf8DecodeCharByteArray# -- * Decoding strings,utf8DecodeByteArray# ,utf8DecodeForeignPtr -- * Counting characters,utf8CountCharsByteArray# -- * Comparison,utf8CompareByteArray# -- * Encoding strings,utf8EncodePtr ,utf8EncodeByteArray# ,utf8EncodedLength )whereimportGHC.Types importGHC.Internal.Base importGHC.Internal.IO importGHC.Internal.ST importGHC.Internal.Word importGHC.Internal.ForeignPtr importGHC.Internal.Num importGHC.Internal.Bits importGHC.Internal.Real importGHC.Internal.Ptr {- Note [GHC's many UTF-8 implementations] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Currently GHC ships with at least five UTF-8 implementations: a. the implementation used by GHC in `ghc-boot:GHC.Utils.Encoding`; this can be used at a number of types including `Addr#`, `ByteArray#`, `ForeignPtr`, `Ptr`, `ShortByteString`, and `ByteString`. Most of this can be removed in GHC 9.6+2, when the copies in `base` will become available to `ghc-boot`. b. the copy of the `ghc-boot` definition now exported by `base:GHC.Encoding.UTF8`. This can be used at `Addr#`, `Ptr`, `ByteArray#`, and `ForeignPtr`. c. the decoder used by `unpackCStringUtf8#` in `ghc-prim:GHC.CString`; this is specialised at `Addr#`. d. the codec used by the IO subsystem in `base:GHC.IO.Encoding.UTF8`; this is specialised at `Addr#` but, unlike the above, supports recovery in the presence of partial codepoints (since in IO contexts codepoints may be broken across buffers) e. the implementation provided by the `text` library On its face, this seems a tad silly. On the other hand, these implementations do materially differ from one another (e.g. in the types they support, the detail in errors they can report, and the ability to recover from partial codepoints). Consequently, it's quite unclear that further consolidation would be worthwhile. The most obvious opportunity is to move (b) into `ghc-prim` and use it to implement (c) (namely `unpackCStringUtf8#` and friends). However, it's not clear that this would be worthwhile as several of the types supported by (b) are defined in `base`. -}-- We can't write the decoder as efficiently as we'd like without-- resorting to unboxed extensions, unfortunately. I tried to write-- an IO version of this function, but GHC can't eliminate boxed-- results from an IO-returning function.---- We assume we can ignore overflow when parsing a multibyte character here.-- To make this safe, we add extra sentinel bytes to unparsed UTF-8 sequences-- before decoding them (see "GHC.Data.StringBuffer").{-# INLINEutf8DecodeChar# #-}-- | Decode a single codepoint from a byte buffer indexed by the given indexing-- function.utf8DecodeChar# ::(Int# ->Word# )->(#Char# ,Int# #)utf8DecodeChar# :: (Int# -> Word#) -> (# Char#, Int# #) utf8DecodeChar# Int# -> Word# indexWord8# =let!ch0 :: Int# ch0 =Word# -> Int# word2Int# (Int# -> Word# indexWord8# Int# 0#)incase()of() _|Int# -> Bool isTrue# (Int# ch0 Int# -> Int# -> Int# <=# Int# 0x7F#)->(#Int# -> Char# chr# Int# ch0 ,Int# 1##)|Int# -> Bool isTrue# ((Int# ch0 Int# -> Int# -> Int# >=# Int# 0xC0#)Int# -> Int# -> Int# `andI#` (Int# ch0 Int# -> Int# -> Int# <=# Int# 0xDF#))->let!ch1 :: Int# ch1 =Word# -> Int# word2Int# (Int# -> Word# indexWord8# Int# 1#)inifInt# -> Bool isTrue# ((Int# ch1 Int# -> Int# -> Int# <# Int# 0x80#)Int# -> Int# -> Int# `orI#` (Int# ch1 Int# -> Int# -> Int# >=# Int# 0xC0#))thenInt# -> (# Char#, Int# #) fail Int# 1#else(#Int# -> Char# chr# (((Int# ch0 Int# -> Int# -> Int# -# Int# 0xC0#)Int# -> Int# -> Int# `uncheckedIShiftL#` Int# 6#)Int# -> Int# -> Int# +# (Int# ch1 Int# -> Int# -> Int# -# Int# 0x80#)),Int# 2##)|Int# -> Bool isTrue# ((Int# ch0 Int# -> Int# -> Int# >=# Int# 0xE0#)Int# -> Int# -> Int# `andI#` (Int# ch0 Int# -> Int# -> Int# <=# Int# 0xEF#))->let!ch1 :: Int# ch1 =Word# -> Int# word2Int# (Int# -> Word# indexWord8# Int# 1#)inifInt# -> Bool isTrue# ((Int# ch1 Int# -> Int# -> Int# <# Int# 0x80#)Int# -> Int# -> Int# `orI#` (Int# ch1 Int# -> Int# -> Int# >=# Int# 0xC0#))thenInt# -> (# Char#, Int# #) fail Int# 1#elselet!ch2 :: Int# ch2 =Word# -> Int# word2Int# (Int# -> Word# indexWord8# Int# 2#)inifInt# -> Bool isTrue# ((Int# ch2 Int# -> Int# -> Int# <# Int# 0x80#)Int# -> Int# -> Int# `orI#` (Int# ch2 Int# -> Int# -> Int# >=# Int# 0xC0#))thenInt# -> (# Char#, Int# #) fail Int# 2#else(#Int# -> Char# chr# (((Int# ch0 Int# -> Int# -> Int# -# Int# 0xE0#)Int# -> Int# -> Int# `uncheckedIShiftL#` Int# 12#)Int# -> Int# -> Int# +# ((Int# ch1 Int# -> Int# -> Int# -# Int# 0x80#)Int# -> Int# -> Int# `uncheckedIShiftL#` Int# 6#)Int# -> Int# -> Int# +# (Int# ch2 Int# -> Int# -> Int# -# Int# 0x80#)),Int# 3##)|Int# -> Bool isTrue# ((Int# ch0 Int# -> Int# -> Int# >=# Int# 0xF0#)Int# -> Int# -> Int# `andI#` (Int# ch0 Int# -> Int# -> Int# <=# Int# 0xF8#))->let!ch1 :: Int# ch1 =Word# -> Int# word2Int# (Int# -> Word# indexWord8# Int# 1#)inifInt# -> Bool isTrue# ((Int# ch1 Int# -> Int# -> Int# <# Int# 0x80#)Int# -> Int# -> Int# `orI#` (Int# ch1 Int# -> Int# -> Int# >=# Int# 0xC0#))thenInt# -> (# Char#, Int# #) fail Int# 1#elselet!ch2 :: Int# ch2 =Word# -> Int# word2Int# (Int# -> Word# indexWord8# Int# 2#)inifInt# -> Bool isTrue# ((Int# ch2 Int# -> Int# -> Int# <# Int# 0x80#)Int# -> Int# -> Int# `orI#` (Int# ch2 Int# -> Int# -> Int# >=# Int# 0xC0#))thenInt# -> (# Char#, Int# #) fail Int# 2#elselet!ch3 :: Int# ch3 =Word# -> Int# word2Int# (Int# -> Word# indexWord8# Int# 3#)inifInt# -> Bool isTrue# ((Int# ch3 Int# -> Int# -> Int# <# Int# 0x80#)Int# -> Int# -> Int# `orI#` (Int# ch3 Int# -> Int# -> Int# >=# Int# 0xC0#))thenInt# -> (# Char#, Int# #) fail Int# 3#else(#Int# -> Char# chr# (((Int# ch0 Int# -> Int# -> Int# -# Int# 0xF0#)Int# -> Int# -> Int# `uncheckedIShiftL#` Int# 18#)Int# -> Int# -> Int# +# ((Int# ch1 Int# -> Int# -> Int# -# Int# 0x80#)Int# -> Int# -> Int# `uncheckedIShiftL#` Int# 12#)Int# -> Int# -> Int# +# ((Int# ch2 Int# -> Int# -> Int# -# Int# 0x80#)Int# -> Int# -> Int# `uncheckedIShiftL#` Int# 6#)Int# -> Int# -> Int# +# (Int# ch3 Int# -> Int# -> Int# -# Int# 0x80#)),Int# 4##)|Bool otherwise ->Int# -> (# Char#, Int# #) fail Int# 1#where-- all invalid sequences end up here:fail ::Int# ->(#Char# ,Int# #)fail :: Int# -> (# Char#, Int# #) fail Int# nBytes# =(#Char# '0円'#,Int# nBytes# #)-- '\xFFFD' would be the usual replacement character, but-- that's a valid symbol in Haskell, so will result in a-- confusing parse error later on. Instead we use '0円' which-- will signal a lexer error immediately.-- | Decode a single character at the given 'Addr#'.utf8DecodeCharAddr# ::Addr# ->Int# ->(#Char# ,Int# #)utf8DecodeCharAddr# :: Addr# -> Int# -> (# Char#, Int# #) utf8DecodeCharAddr# Addr# a# Int# off# = #if !MIN_VERSION_ghc_prim(0,10,0) utf8DecodeChar#(\i#->indexWord8OffAddr#a#(i#+#off#)) #else (Int# -> Word#) -> (# Char#, Int# #) utf8DecodeChar# (\Int# i# ->Word8# -> Word# word8ToWord# (Addr# -> Int# -> Word8# indexWord8OffAddr# Addr# a# (Int# i# Int# -> Int# -> Int# +# Int# off# ))) #endif -- | Decode a single codepoint starting at the given 'Ptr'.utf8DecodeCharPtr ::Ptr Word8 ->(Char ,Int )utf8DecodeCharPtr :: Ptr Word8 -> (Char, Int) utf8DecodeCharPtr !(Ptr Addr# a# )=caseAddr# -> Int# -> (# Char#, Int# #) utf8DecodeCharAddr# Addr# a# Int# 0#of(#Char# c# ,Int# nBytes# #)->(Char# -> Char C# Char# c# ,Int# -> Int I# Int# nBytes# )-- | Decode a single codepoint starting at the given byte offset into a-- 'ByteArray#'.utf8DecodeCharByteArray# ::ByteArray# ->Int# ->(#Char# ,Int# #)utf8DecodeCharByteArray# :: ByteArray# -> Int# -> (# Char#, Int# #) utf8DecodeCharByteArray# ByteArray# ba# Int# off# = #if !MIN_VERSION_ghc_prim(0,10,0) utf8DecodeChar#(\i#->indexWord8Array#ba#(i#+#off#)) #else (Int# -> Word#) -> (# Char#, Int# #) utf8DecodeChar# (\Int# i# ->Word8# -> Word# word8ToWord# (ByteArray# -> Int# -> Word8# indexWord8Array# ByteArray# ba# (Int# i# Int# -> Int# -> Int# +# Int# off# ))) #endif {-# INLINEutf8Decode# #-}utf8Decode# ::(IO ())->(Int# ->(#Char# ,Int# #))->Int# ->IO [Char ]utf8Decode# :: IO () -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char] utf8Decode# IO () retain Int# -> (# Char#, Int# #) decodeChar# Int# len# =Int# -> IO [Char] unpack Int# 0#whereunpack :: Int# -> IO [Char] unpack Int# i# |Int# -> Bool isTrue# (Int# i# Int# -> Int# -> Int# >=# Int# len# )=IO () retain IO () -> IO [Char] -> IO [Char] forall a b. IO a -> IO b -> IO b forall (m :: * -> *) a b. Monad m => m a -> m b -> m b >> [Char] -> IO [Char] forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return []|Bool otherwise =caseInt# -> (# Char#, Int# #) decodeChar# Int# i# of(#Char# c# ,Int# nBytes# #)->dorest <-IO [Char] -> IO [Char] forall a. IO a -> IO a unsafeDupableInterleaveIO (IO [Char] -> IO [Char]) -> IO [Char] -> IO [Char] forall a b. (a -> b) -> a -> b $ Int# -> IO [Char] unpack (Int# i# Int# -> Int# -> Int# +# Int# nBytes# )return (C# c# : rest )utf8DecodeForeignPtr ::ForeignPtr Word8 ->Int ->Int ->[Char ]utf8DecodeForeignPtr :: ForeignPtr Word8 -> Int -> Int -> [Char] utf8DecodeForeignPtr ForeignPtr Word8 fp Int offset (I# Int# len# )=IO [Char] -> [Char] forall a. IO a -> a unsafeDupablePerformIO (IO [Char] -> [Char]) -> IO [Char] -> [Char] forall a b. (a -> b) -> a -> b $ dolet!(Ptr Addr# a# )=ForeignPtr Word8 -> Ptr Word8 forall a. ForeignPtr a -> Ptr a unsafeForeignPtrToPtr ForeignPtr Word8 fp Ptr Word8 -> Int -> Ptr (ZonkAny 0) forall a b. Ptr a -> Int -> Ptr b `plusPtr` Int offset IO () -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char] utf8Decode# (ForeignPtr Word8 -> IO () forall a. ForeignPtr a -> IO () touchForeignPtr ForeignPtr Word8 fp )(Addr# -> Int# -> (# Char#, Int# #) utf8DecodeCharAddr# Addr# a# )Int# len# -- Note that since utf8Decode# returns a thunk the lifetime of the-- ForeignPtr actually needs to be longer than the lexical lifetime-- withForeignPtr would provide here. That's why we use touchForeignPtr to-- keep the fp alive until the last character has actually been decoded.utf8DecodeByteArray# ::ByteArray# ->[Char ]utf8DecodeByteArray# :: ByteArray# -> [Char] utf8DecodeByteArray# ByteArray# ba# =IO [Char] -> [Char] forall a. IO a -> a unsafeDupablePerformIO (IO [Char] -> [Char]) -> IO [Char] -> [Char] forall a b. (a -> b) -> a -> b $ letlen# :: Int# len# =ByteArray# -> Int# sizeofByteArray# ByteArray# ba# inIO () -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char] utf8Decode# (() -> IO () forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return ())(ByteArray# -> Int# -> (# Char#, Int# #) utf8DecodeCharByteArray# ByteArray# ba# )Int# len# utf8CompareByteArray# ::ByteArray# ->ByteArray# ->Ordering utf8CompareByteArray# :: ByteArray# -> ByteArray# -> Ordering utf8CompareByteArray# ByteArray# a1 ByteArray# a2 =Int# -> Int# -> Ordering go Int# 0#Int# 0#-- UTF-8 has the property that sorting by bytes values also sorts by-- code-points.-- BUT we use "Modified UTF-8" which encodes 0円 as 0xC080 so this property-- doesn't hold and we must explicitly check this case here.-- Note that decoding every code point would also work but it would be much-- more costly.where!sz1 :: Int# sz1 =ByteArray# -> Int# sizeofByteArray# ByteArray# a1 !sz2 :: Int# sz2 =ByteArray# -> Int# sizeofByteArray# ByteArray# a2 go :: Int# -> Int# -> Ordering go Int# off1 Int# off2 |Int# -> Bool isTrue# ((Int# off1 Int# -> Int# -> Int# >=# Int# sz1 )Int# -> Int# -> Int# `andI#` (Int# off2 Int# -> Int# -> Int# >=# Int# sz2 ))=Ordering EQ |Int# -> Bool isTrue# (Int# off1 Int# -> Int# -> Int# >=# Int# sz1 )=Ordering LT |Int# -> Bool isTrue# (Int# off2 Int# -> Int# -> Int# >=# Int# sz2 )=Ordering GT |Bool otherwise = #if !MIN_VERSION_ghc_prim(0,10,0) let!b1_1=indexWord8Array#a1off1!b2_1=indexWord8Array#a2off2 #else let!b1_1 :: Word# b1_1 =Word8# -> Word# word8ToWord# (ByteArray# -> Int# -> Word8# indexWord8Array# ByteArray# a1 Int# off1 )!b2_1 :: Word# b2_1 =Word8# -> Word# word8ToWord# (ByteArray# -> Int# -> Word8# indexWord8Array# ByteArray# a2 Int# off2 ) #endif incaseWord# b1_1 ofWord# 0xC0##->caseWord# b2_1 ofWord# 0xC0##->Int# -> Int# -> Ordering go (Int# off1 Int# -> Int# -> Int# +# Int# 1#)(Int# off2 Int# -> Int# -> Int# +# Int# 1#) #if !MIN_VERSION_ghc_prim(0,10,0) _->caseindexWord8Array#a1(off1+#1#)of #else Word# _->caseWord8# -> Word# word8ToWord# (ByteArray# -> Int# -> Word8# indexWord8Array# ByteArray# a1 (Int# off1 Int# -> Int# -> Int# +# Int# 1#))of #endif Word# 0x80##->Ordering LT Word# _->Int# -> Int# -> Ordering go (Int# off1 Int# -> Int# -> Int# +# Int# 1#)(Int# off2 Int# -> Int# -> Int# +# Int# 1#)Word# _->caseWord# b2_1 of #if !MIN_VERSION_ghc_prim(0,10,0) 0xC0##->caseindexWord8Array#a2(off2+#1#)of #else Word# 0xC0##->caseWord8# -> Word# word8ToWord# (ByteArray# -> Int# -> Word8# indexWord8Array# ByteArray# a2 (Int# off2 Int# -> Int# -> Int# +# Int# 1#))of #endif Word# 0x80##->Ordering GT Word# _->Int# -> Int# -> Ordering go (Int# off1 Int# -> Int# -> Int# +# Int# 1#)(Int# off2 Int# -> Int# -> Int# +# Int# 1#)Word# _|Int# -> Bool isTrue# (Word# b1_1 Word# -> Word# -> Int# `gtWord#` Word# b2_1 )->Ordering GT |Int# -> Bool isTrue# (Word# b1_1 Word# -> Word# -> Int# `ltWord#` Word# b2_1 )->Ordering LT |Bool otherwise ->Int# -> Int# -> Ordering go (Int# off1 Int# -> Int# -> Int# +# Int# 1#)(Int# off2 Int# -> Int# -> Int# +# Int# 1#)utf8CountCharsByteArray# ::ByteArray# ->Int utf8CountCharsByteArray# :: ByteArray# -> Int utf8CountCharsByteArray# ByteArray# ba =Int# -> Int# -> Int go Int# 0#Int# 0#wherelen# :: Int# len# =ByteArray# -> Int# sizeofByteArray# ByteArray# ba go :: Int# -> Int# -> Int go Int# i# Int# n# |Int# -> Bool isTrue# (Int# i# Int# -> Int# -> Int# >=# Int# len# )=Int# -> Int I# Int# n# |Bool otherwise =caseByteArray# -> Int# -> (# Char#, Int# #) utf8DecodeCharByteArray# ByteArray# ba Int# i# of(#Char# _,Int# nBytes# #)->Int# -> Int# -> Int go (Int# i# Int# -> Int# -> Int# +# Int# nBytes# )(Int# n# Int# -> Int# -> Int# +# Int# 1#){-# INLINEutf8EncodeChar #-}utf8EncodeChar ::(Int# ->Word8# ->State# s ->State# s )->Char ->ST s Int utf8EncodeChar :: forall s. (Int# -> Word8# -> State# s -> State# s) -> Char -> ST s Int utf8EncodeChar Int# -> Word8# -> State# s -> State# s write# Char c =letx :: Word x =Int -> Word forall a b. (Integral a, Num b) => a -> b fromIntegral (Char -> Int ord Char c )incase()of() _|Word x Word -> Word -> Bool forall a. Ord a => a -> a -> Bool > Word 0Bool -> Bool -> Bool && Word x Word -> Word -> Bool forall a. Ord a => a -> a -> Bool <= Word 0x007f->doInt -> Word -> ST s () write Int 0Word x Int -> ST s Int forall a. a -> ST s a forall (m :: * -> *) a. Monad m => a -> m a return Int 1-- NB. '0円' is encoded as '\xC0\x80', not '0円'. This is so that we-- can have 0-terminated UTF-8 strings (see GHC.Internal.Base.unpackCStringUtf8).|Word x Word -> Word -> Bool forall a. Ord a => a -> a -> Bool <= Word 0x07ff->doInt -> Word -> ST s () write Int 0(Word 0xC0Word -> Word -> Word forall a. Bits a => a -> a -> a .|. ((Word x Word -> Int -> Word forall a. Bits a => a -> Int -> a `shiftR` Int 6)Word -> Word -> Word forall a. Bits a => a -> a -> a .&. Word 0x1F))Int -> Word -> ST s () write Int 1(Word 0x80Word -> Word -> Word forall a. Bits a => a -> a -> a .|. (Word x Word -> Word -> Word forall a. Bits a => a -> a -> a .&. Word 0x3F))Int -> ST s Int forall a. a -> ST s a forall (m :: * -> *) a. Monad m => a -> m a return Int 2|Word x Word -> Word -> Bool forall a. Ord a => a -> a -> Bool <= Word 0xffff->doInt -> Word -> ST s () write Int 0(Word 0xE0Word -> Word -> Word forall a. Bits a => a -> a -> a .|. (Word x Word -> Int -> Word forall a. Bits a => a -> Int -> a `shiftR` Int 12)Word -> Word -> Word forall a. Bits a => a -> a -> a .&. Word 0x0F)Int -> Word -> ST s () write Int 1(Word 0x80Word -> Word -> Word forall a. Bits a => a -> a -> a .|. (Word x Word -> Int -> Word forall a. Bits a => a -> Int -> a `shiftR` Int 6)Word -> Word -> Word forall a. Bits a => a -> a -> a .&. Word 0x3F)Int -> Word -> ST s () write Int 2(Word 0x80Word -> Word -> Word forall a. Bits a => a -> a -> a .|. (Word x Word -> Word -> Word forall a. Bits a => a -> a -> a .&. Word 0x3F))Int -> ST s Int forall a. a -> ST s a forall (m :: * -> *) a. Monad m => a -> m a return Int 3|Bool otherwise ->doInt -> Word -> ST s () write Int 0(Word 0xF0Word -> Word -> Word forall a. Bits a => a -> a -> a .|. (Word x Word -> Int -> Word forall a. Bits a => a -> Int -> a `shiftR` Int 18))Int -> Word -> ST s () write Int 1(Word 0x80Word -> Word -> Word forall a. Bits a => a -> a -> a .|. ((Word x Word -> Int -> Word forall a. Bits a => a -> Int -> a `shiftR` Int 12)Word -> Word -> Word forall a. Bits a => a -> a -> a .&. Word 0x3F))Int -> Word -> ST s () write Int 2(Word 0x80Word -> Word -> Word forall a. Bits a => a -> a -> a .|. ((Word x Word -> Int -> Word forall a. Bits a => a -> Int -> a `shiftR` Int 6)Word -> Word -> Word forall a. Bits a => a -> a -> a .&. Word 0x3F))Int -> Word -> ST s () write Int 3(Word 0x80Word -> Word -> Word forall a. Bits a => a -> a -> a .|. (Word x Word -> Word -> Word forall a. Bits a => a -> a -> a .&. Word 0x3F))Int -> ST s Int forall a. a -> ST s a forall (m :: * -> *) a. Monad m => a -> m a return Int 4where{-# INLINEwrite #-}write :: Int -> Word -> ST s () write (I# Int# off# )(W# Word# c# )=STRep s () -> ST s () forall s a. STRep s a -> ST s a ST (STRep s () -> ST s ()) -> STRep s () -> ST s () forall a b. (a -> b) -> a -> b $ \State# s s -> #if !MIN_VERSION_ghc_prim(0,10,0) casewrite#off#(narrowWord8#c#)sof #else caseInt# -> Word8# -> State# s -> State# s write# Int# off# (Word# -> Word8# wordToWord8# Word# c# )State# s s of #endif State# s s ->(#State# s s ,()#)utf8EncodePtr ::Ptr Word8 ->String ->IO ()utf8EncodePtr :: Ptr Word8 -> [Char] -> IO () utf8EncodePtr (Ptr Addr# a# )[Char] str =Addr# -> [Char] -> IO () go Addr# a# [Char] str wherego :: Addr# -> [Char] -> IO () go !Addr# _[]=() -> IO () forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return ()go Addr# a# (Char c : [Char] cs )=do #if !MIN_VERSION_ghc_prim(0,10,0) -- writeWord8OffAddr# was taking a Word#I#off#<-stToIO$utf8EncodeChar(\iw->writeWord8OffAddr#a#i(extendWord8#w))c #else I# off# <-ST RealWorld Int -> IO Int forall a. ST RealWorld a -> IO a stToIO (ST RealWorld Int -> IO Int) -> ST RealWorld Int -> IO Int forall a b. (a -> b) -> a -> b $ (Int# -> Word8# -> State# RealWorld -> State# RealWorld) -> Char -> ST RealWorld Int forall s. (Int# -> Word8# -> State# s -> State# s) -> Char -> ST s Int utf8EncodeChar (Addr# -> Int# -> Word8# -> State# RealWorld -> State# RealWorld forall d. Addr# -> Int# -> Word8# -> State# d -> State# d writeWord8OffAddr# Addr# a# )Char c #endif go (a# `plusAddr#` off# )cs utf8EncodeByteArray# ::String ->ByteArray# utf8EncodeByteArray# :: [Char] -> ByteArray# utf8EncodeByteArray# [Char] str =(State# RealWorld -> ByteArray#) -> ByteArray# forall o. (State# RealWorld -> o) -> o runRW# ((State# RealWorld -> ByteArray#) -> ByteArray#) -> (State# RealWorld -> ByteArray#) -> ByteArray# forall a b. (a -> b) -> a -> b $ \State# RealWorld s ->case[Char] -> Int utf8EncodedLength [Char] str of{I# Int# len# ->caseInt# -> State# RealWorld -> (# State# RealWorld, MutableByteArray# RealWorld #) forall d. Int# -> State# d -> (# State# d, MutableByteArray# d #) newByteArray# Int# len# State# RealWorld s of{(#State# RealWorld s ,MutableByteArray# RealWorld mba# #)->caseMutableByteArray# RealWorld -> Int# -> [Char] -> ST RealWorld () forall {s}. MutableByteArray# s -> Int# -> [Char] -> ST s () go MutableByteArray# RealWorld mba# Int# 0#[Char] str of{ST STRep RealWorld () f_go ->caseSTRep RealWorld () f_go State# RealWorld s of{(#State# RealWorld s ,()#)->caseMutableByteArray# RealWorld -> State# RealWorld -> (# State# RealWorld, ByteArray# #) forall d. MutableByteArray# d -> State# d -> (# State# d, ByteArray# #) unsafeFreezeByteArray# MutableByteArray# RealWorld mba# State# RealWorld s of{(#State# RealWorld _,ByteArray# ba# #)->ByteArray# ba# }}}}}wherego :: MutableByteArray# s -> Int# -> [Char] -> ST s () go MutableByteArray# s _Int# _[]=() -> ST s () forall a. a -> ST s a forall (m :: * -> *) a. Monad m => a -> m a return ()go MutableByteArray# s mba# Int# i# (Char c : [Char] cs )=do #if !MIN_VERSION_ghc_prim(0,10,0) -- writeWord8Array# was taking a Word#I#off#<-utf8EncodeChar(\j#w->writeWord8Array#mba#(i#+#j#)(extendWord8#w))c #else I# off# <-(Int# -> Word8# -> State# s -> State# s) -> Char -> ST s Int forall s. (Int# -> Word8# -> State# s -> State# s) -> Char -> ST s Int utf8EncodeChar (\Int# j# ->MutableByteArray# s -> Int# -> Word8# -> State# s -> State# s forall d. MutableByteArray# d -> Int# -> Word8# -> State# d -> State# d writeWord8Array# MutableByteArray# s mba# (Int# i# Int# -> Int# -> Int# +# Int# j# ))Char c #endif go mba# (i# +# off# )cs utf8EncodedLength ::String ->Int utf8EncodedLength :: [Char] -> Int utf8EncodedLength [Char] str =Int -> [Char] -> Int forall {t}. Num t => t -> [Char] -> t go Int 0[Char] str wherego :: t -> [Char] -> t go !t n []=t n go t n (Char c : [Char] cs )|Char -> Int ord Char c Int -> Int -> Bool forall a. Ord a => a -> a -> Bool > Int 0Bool -> Bool -> Bool && Char -> Int ord Char c Int -> Int -> Bool forall a. Ord a => a -> a -> Bool <= Int 0x007f=t -> [Char] -> t go (t n t -> t -> t forall a. Num a => a -> a -> a + t 1)[Char] cs |Char -> Int ord Char c Int -> Int -> Bool forall a. Ord a => a -> a -> Bool <= Int 0x07ff=t -> [Char] -> t go (t n t -> t -> t forall a. Num a => a -> a -> a + t 2)[Char] cs |Char -> Int ord Char c Int -> Int -> Bool forall a. Ord a => a -> a -> Bool <= Int 0xffff=t -> [Char] -> t go (t n t -> t -> t forall a. Num a => a -> a -> a + t 3)[Char] cs |Bool otherwise =t -> [Char] -> t go (t n t -> t -> t forall a. Num a => a -> a -> a + t 4)[Char] cs