{-# LANGUAGE CPP #-}{-# LANGUAGE BangPatterns, MagicHash, UnboxedTuples, NoImplicitPrelude #-}{-# OPTIONS_GHC -O2 -fno-warn-name-shadowing #-}-- |-- Module : GHC.Internal.Encoding.UTF8-- Copyright : (c) The University of Glasgow, 1994-2023-- License : see libraries/base/LICENSE---- Maintainer : ghc-devs@haskell.org-- Stability : internal-- Portability : non-portable (GHC extensions)---- /The API of this module is unstable and not meant to be consumed by the general public./-- If you absolutely must depend on it, make sure to use a tight upper-- bound, e.g., @base < 4.X@ rather than @base < 5@, because the interface can-- change rapidly without much warning.---- Simple UTF-8 codecs supporting non-streaming encoding/decoding.-- For encoding where codepoints may be broken across buffers,-- see "GHC.IO.Encoding.UTF8".---- This is one of several UTF-8 implementations provided by GHC; see Note-- [GHC's many UTF-8 implementations] in "GHC.Encoding.UTF8" for an-- overview.--moduleGHC.Internal.Encoding.UTF8 (-- * Decoding single charactersutf8DecodeCharAddr# ,utf8DecodeCharPtr ,utf8DecodeCharByteArray# -- * Decoding strings,utf8DecodeByteArray# ,utf8DecodeForeignPtr -- * Counting characters,utf8CountCharsByteArray# -- * Comparison,utf8CompareByteArray# -- * Encoding strings,utf8EncodePtr ,utf8EncodeByteArray# ,utf8EncodedLength )whereimportGHC.Types importGHC.Internal.Base importGHC.Internal.IO importGHC.Internal.ST importGHC.Internal.Word importGHC.Internal.ForeignPtr importGHC.Internal.Num importGHC.Internal.Bits importGHC.Internal.Real importGHC.Internal.Ptr {-
Note [GHC's many UTF-8 implementations]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Currently GHC ships with at least five UTF-8 implementations:
a. the implementation used by GHC in `ghc-boot:GHC.Utils.Encoding`; this can be
 used at a number of types including `Addr#`, `ByteArray#`, `ForeignPtr`,
 `Ptr`, `ShortByteString`, and `ByteString`. Most of this can be removed in
 GHC 9.6+2, when the copies in `base` will become available to `ghc-boot`.
b. the copy of the `ghc-boot` definition now exported by `base:GHC.Encoding.UTF8`.
 This can be used at `Addr#`, `Ptr`, `ByteArray#`, and `ForeignPtr`.
c. the decoder used by `unpackCStringUtf8#` in `ghc-prim:GHC.CString`; this is
 specialised at `Addr#`.
d. the codec used by the IO subsystem in `base:GHC.IO.Encoding.UTF8`; this is
 specialised at `Addr#` but, unlike the above, supports recovery in the presence
 of partial codepoints (since in IO contexts codepoints may be broken across
 buffers)
e. the implementation provided by the `text` library
On its face, this seems a tad silly. On the other hand, these implementations do
materially differ from one another (e.g. in the types they support, the
detail in errors they can report, and the ability to recover from partial
codepoints). Consequently, it's quite unclear that further consolidation
would be worthwhile.
The most obvious opportunity is to move (b) into `ghc-prim` and use it to
implement (c) (namely `unpackCStringUtf8#` and friends). However, it's not
clear that this would be worthwhile as several of the types supported by (b)
are defined in `base`.
-}-- We can't write the decoder as efficiently as we'd like without-- resorting to unboxed extensions, unfortunately. I tried to write-- an IO version of this function, but GHC can't eliminate boxed-- results from an IO-returning function.---- We assume we can ignore overflow when parsing a multibyte character here.-- To make this safe, we add extra sentinel bytes to unparsed UTF-8 sequences-- before decoding them (see "GHC.Data.StringBuffer").{-# INLINEutf8DecodeChar# #-}-- | Decode a single codepoint from a byte buffer indexed by the given indexing-- function.utf8DecodeChar# ::(Int# ->Word# )->(#Char# ,Int# #)utf8DecodeChar# :: (Int# -> Word#) -> (# Char#, Int# #)
utf8DecodeChar# Int# -> Word#
indexWord8# =let!ch0 :: Int#
ch0 =Word# -> Int#
word2Int# (Int# -> Word#
indexWord8# Int#
0#)incase()of()
_|Int# -> Bool
isTrue# (Int#
ch0 Int# -> Int# -> Int#
<=# Int#
0x7F#)->(#Int# -> Char#
chr# Int#
ch0 ,Int#
1##)|Int# -> Bool
isTrue# ((Int#
ch0 Int# -> Int# -> Int#
>=# Int#
0xC0#)Int# -> Int# -> Int#
`andI#` (Int#
ch0 Int# -> Int# -> Int#
<=# Int#
0xDF#))->let!ch1 :: Int#
ch1 =Word# -> Int#
word2Int# (Int# -> Word#
indexWord8# Int#
1#)inifInt# -> Bool
isTrue# ((Int#
ch1 Int# -> Int# -> Int#
<# Int#
0x80#)Int# -> Int# -> Int#
`orI#` (Int#
ch1 Int# -> Int# -> Int#
>=# Int#
0xC0#))thenInt# -> (# Char#, Int# #)
fail Int#
1#else(#Int# -> Char#
chr# (((Int#
ch0 Int# -> Int# -> Int#
-# Int#
0xC0#)Int# -> Int# -> Int#
`uncheckedIShiftL#` Int#
6#)Int# -> Int# -> Int#
+# (Int#
ch1 Int# -> Int# -> Int#
-# Int#
0x80#)),Int#
2##)|Int# -> Bool
isTrue# ((Int#
ch0 Int# -> Int# -> Int#
>=# Int#
0xE0#)Int# -> Int# -> Int#
`andI#` (Int#
ch0 Int# -> Int# -> Int#
<=# Int#
0xEF#))->let!ch1 :: Int#
ch1 =Word# -> Int#
word2Int# (Int# -> Word#
indexWord8# Int#
1#)inifInt# -> Bool
isTrue# ((Int#
ch1 Int# -> Int# -> Int#
<# Int#
0x80#)Int# -> Int# -> Int#
`orI#` (Int#
ch1 Int# -> Int# -> Int#
>=# Int#
0xC0#))thenInt# -> (# Char#, Int# #)
fail Int#
1#elselet!ch2 :: Int#
ch2 =Word# -> Int#
word2Int# (Int# -> Word#
indexWord8# Int#
2#)inifInt# -> Bool
isTrue# ((Int#
ch2 Int# -> Int# -> Int#
<# Int#
0x80#)Int# -> Int# -> Int#
`orI#` (Int#
ch2 Int# -> Int# -> Int#
>=# Int#
0xC0#))thenInt# -> (# Char#, Int# #)
fail Int#
2#else(#Int# -> Char#
chr# (((Int#
ch0 Int# -> Int# -> Int#
-# Int#
0xE0#)Int# -> Int# -> Int#
`uncheckedIShiftL#` Int#
12#)Int# -> Int# -> Int#
+# ((Int#
ch1 Int# -> Int# -> Int#
-# Int#
0x80#)Int# -> Int# -> Int#
`uncheckedIShiftL#` Int#
6#)Int# -> Int# -> Int#
+# (Int#
ch2 Int# -> Int# -> Int#
-# Int#
0x80#)),Int#
3##)|Int# -> Bool
isTrue# ((Int#
ch0 Int# -> Int# -> Int#
>=# Int#
0xF0#)Int# -> Int# -> Int#
`andI#` (Int#
ch0 Int# -> Int# -> Int#
<=# Int#
0xF8#))->let!ch1 :: Int#
ch1 =Word# -> Int#
word2Int# (Int# -> Word#
indexWord8# Int#
1#)inifInt# -> Bool
isTrue# ((Int#
ch1 Int# -> Int# -> Int#
<# Int#
0x80#)Int# -> Int# -> Int#
`orI#` (Int#
ch1 Int# -> Int# -> Int#
>=# Int#
0xC0#))thenInt# -> (# Char#, Int# #)
fail Int#
1#elselet!ch2 :: Int#
ch2 =Word# -> Int#
word2Int# (Int# -> Word#
indexWord8# Int#
2#)inifInt# -> Bool
isTrue# ((Int#
ch2 Int# -> Int# -> Int#
<# Int#
0x80#)Int# -> Int# -> Int#
`orI#` (Int#
ch2 Int# -> Int# -> Int#
>=# Int#
0xC0#))thenInt# -> (# Char#, Int# #)
fail Int#
2#elselet!ch3 :: Int#
ch3 =Word# -> Int#
word2Int# (Int# -> Word#
indexWord8# Int#
3#)inifInt# -> Bool
isTrue# ((Int#
ch3 Int# -> Int# -> Int#
<# Int#
0x80#)Int# -> Int# -> Int#
`orI#` (Int#
ch3 Int# -> Int# -> Int#
>=# Int#
0xC0#))thenInt# -> (# Char#, Int# #)
fail Int#
3#else(#Int# -> Char#
chr# (((Int#
ch0 Int# -> Int# -> Int#
-# Int#
0xF0#)Int# -> Int# -> Int#
`uncheckedIShiftL#` Int#
18#)Int# -> Int# -> Int#
+# ((Int#
ch1 Int# -> Int# -> Int#
-# Int#
0x80#)Int# -> Int# -> Int#
`uncheckedIShiftL#` Int#
12#)Int# -> Int# -> Int#
+# ((Int#
ch2 Int# -> Int# -> Int#
-# Int#
0x80#)Int# -> Int# -> Int#
`uncheckedIShiftL#` Int#
6#)Int# -> Int# -> Int#
+# (Int#
ch3 Int# -> Int# -> Int#
-# Int#
0x80#)),Int#
4##)|Bool
otherwise ->Int# -> (# Char#, Int# #)
fail Int#
1#where-- all invalid sequences end up here:fail ::Int# ->(#Char# ,Int# #)fail :: Int# -> (# Char#, Int# #)
fail Int#
nBytes# =(#Char#
'0円'#,Int#
nBytes# #)-- '\xFFFD' would be the usual replacement character, but-- that's a valid symbol in Haskell, so will result in a-- confusing parse error later on. Instead we use '0円' which-- will signal a lexer error immediately.-- | Decode a single character at the given 'Addr#'.utf8DecodeCharAddr# ::Addr# ->Int# ->(#Char# ,Int# #)utf8DecodeCharAddr# :: Addr# -> Int# -> (# Char#, Int# #)
utf8DecodeCharAddr# Addr#
a# Int#
off# =
#if !MIN_VERSION_ghc_prim(0,10,0)
utf8DecodeChar#(\i#->indexWord8OffAddr#a#(i#+#off#))
#else
(Int# -> Word#) -> (# Char#, Int# #)
utf8DecodeChar# (\Int#
i# ->Word8# -> Word#
word8ToWord# (Addr# -> Int# -> Word8#
indexWord8OffAddr# Addr#
a# (Int#
i# Int# -> Int# -> Int#
+# Int#
off# )))
#endif
-- | Decode a single codepoint starting at the given 'Ptr'.utf8DecodeCharPtr ::Ptr Word8 ->(Char ,Int )utf8DecodeCharPtr :: Ptr Word8 -> (Char, Int)
utf8DecodeCharPtr !(Ptr Addr#
a# )=caseAddr# -> Int# -> (# Char#, Int# #)
utf8DecodeCharAddr# Addr#
a# Int#
0#of(#Char#
c# ,Int#
nBytes# #)->(Char# -> Char
C# Char#
c# ,Int# -> Int
I# Int#
nBytes# )-- | Decode a single codepoint starting at the given byte offset into a-- 'ByteArray#'.utf8DecodeCharByteArray# ::ByteArray# ->Int# ->(#Char# ,Int# #)utf8DecodeCharByteArray# :: ByteArray# -> Int# -> (# Char#, Int# #)
utf8DecodeCharByteArray# ByteArray#
ba# Int#
off# =
#if !MIN_VERSION_ghc_prim(0,10,0)
utf8DecodeChar#(\i#->indexWord8Array#ba#(i#+#off#))
#else
(Int# -> Word#) -> (# Char#, Int# #)
utf8DecodeChar# (\Int#
i# ->Word8# -> Word#
word8ToWord# (ByteArray# -> Int# -> Word8#
indexWord8Array# ByteArray#
ba# (Int#
i# Int# -> Int# -> Int#
+# Int#
off# )))
#endif
{-# INLINEutf8Decode# #-}utf8Decode# ::(IO ())->(Int# ->(#Char# ,Int# #))->Int# ->IO [Char ]utf8Decode# :: IO () -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char]
utf8Decode# IO ()
retain Int# -> (# Char#, Int# #)
decodeChar# Int#
len# =Int# -> IO [Char]
unpack Int#
0#whereunpack :: Int# -> IO [Char]
unpack Int#
i# |Int# -> Bool
isTrue# (Int#
i# Int# -> Int# -> Int#
>=# Int#
len# )=IO ()
retain IO () -> IO [Char] -> IO [Char]
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> [Char] -> IO [Char]
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return []|Bool
otherwise =caseInt# -> (# Char#, Int# #)
decodeChar# Int#
i# of(#Char#
c# ,Int#
nBytes# #)->dorest <-IO [Char] -> IO [Char]
forall a. IO a -> IO a
unsafeDupableInterleaveIO (IO [Char] -> IO [Char]) -> IO [Char] -> IO [Char]
forall a b. (a -> b) -> a -> b
$ Int# -> IO [Char]
unpack (Int#
i# Int# -> Int# -> Int#
+# Int#
nBytes# )return (C# c# : rest )utf8DecodeForeignPtr ::ForeignPtr Word8 ->Int ->Int ->[Char ]utf8DecodeForeignPtr :: ForeignPtr Word8 -> Int -> Int -> [Char]
utf8DecodeForeignPtr ForeignPtr Word8
fp Int
offset (I# Int#
len# )=IO [Char] -> [Char]
forall a. IO a -> a
unsafeDupablePerformIO (IO [Char] -> [Char]) -> IO [Char] -> [Char]
forall a b. (a -> b) -> a -> b
$ dolet!(Ptr Addr#
a# )=ForeignPtr Word8 -> Ptr Word8
forall a. ForeignPtr a -> Ptr a
unsafeForeignPtrToPtr ForeignPtr Word8
fp Ptr Word8 -> Int -> Ptr (ZonkAny 0)
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
offset IO () -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char]
utf8Decode# (ForeignPtr Word8 -> IO ()
forall a. ForeignPtr a -> IO ()
touchForeignPtr ForeignPtr Word8
fp )(Addr# -> Int# -> (# Char#, Int# #)
utf8DecodeCharAddr# Addr#
a# )Int#
len# -- Note that since utf8Decode# returns a thunk the lifetime of the-- ForeignPtr actually needs to be longer than the lexical lifetime-- withForeignPtr would provide here. That's why we use touchForeignPtr to-- keep the fp alive until the last character has actually been decoded.utf8DecodeByteArray# ::ByteArray# ->[Char ]utf8DecodeByteArray# :: ByteArray# -> [Char]
utf8DecodeByteArray# ByteArray#
ba# =IO [Char] -> [Char]
forall a. IO a -> a
unsafeDupablePerformIO (IO [Char] -> [Char]) -> IO [Char] -> [Char]
forall a b. (a -> b) -> a -> b
$ letlen# :: Int#
len# =ByteArray# -> Int#
sizeofByteArray# ByteArray#
ba# inIO () -> (Int# -> (# Char#, Int# #)) -> Int# -> IO [Char]
utf8Decode# (() -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ())(ByteArray# -> Int# -> (# Char#, Int# #)
utf8DecodeCharByteArray# ByteArray#
ba# )Int#
len# utf8CompareByteArray# ::ByteArray# ->ByteArray# ->Ordering utf8CompareByteArray# :: ByteArray# -> ByteArray# -> Ordering
utf8CompareByteArray# ByteArray#
a1 ByteArray#
a2 =Int# -> Int# -> Ordering
go Int#
0#Int#
0#-- UTF-8 has the property that sorting by bytes values also sorts by-- code-points.-- BUT we use "Modified UTF-8" which encodes 0円 as 0xC080 so this property-- doesn't hold and we must explicitly check this case here.-- Note that decoding every code point would also work but it would be much-- more costly.where!sz1 :: Int#
sz1 =ByteArray# -> Int#
sizeofByteArray# ByteArray#
a1 !sz2 :: Int#
sz2 =ByteArray# -> Int#
sizeofByteArray# ByteArray#
a2 go :: Int# -> Int# -> Ordering
go Int#
off1 Int#
off2 |Int# -> Bool
isTrue# ((Int#
off1 Int# -> Int# -> Int#
>=# Int#
sz1 )Int# -> Int# -> Int#
`andI#` (Int#
off2 Int# -> Int# -> Int#
>=# Int#
sz2 ))=Ordering
EQ |Int# -> Bool
isTrue# (Int#
off1 Int# -> Int# -> Int#
>=# Int#
sz1 )=Ordering
LT |Int# -> Bool
isTrue# (Int#
off2 Int# -> Int# -> Int#
>=# Int#
sz2 )=Ordering
GT |Bool
otherwise =
#if !MIN_VERSION_ghc_prim(0,10,0)
let!b1_1=indexWord8Array#a1off1!b2_1=indexWord8Array#a2off2
#else
let!b1_1 :: Word#
b1_1 =Word8# -> Word#
word8ToWord# (ByteArray# -> Int# -> Word8#
indexWord8Array# ByteArray#
a1 Int#
off1 )!b2_1 :: Word#
b2_1 =Word8# -> Word#
word8ToWord# (ByteArray# -> Int# -> Word8#
indexWord8Array# ByteArray#
a2 Int#
off2 )
#endif
incaseWord#
b1_1 ofWord#
0xC0##->caseWord#
b2_1 ofWord#
0xC0##->Int# -> Int# -> Ordering
go (Int#
off1 Int# -> Int# -> Int#
+# Int#
1#)(Int#
off2 Int# -> Int# -> Int#
+# Int#
1#)
#if !MIN_VERSION_ghc_prim(0,10,0)
_->caseindexWord8Array#a1(off1+#1#)of
#else
Word#
_->caseWord8# -> Word#
word8ToWord# (ByteArray# -> Int# -> Word8#
indexWord8Array# ByteArray#
a1 (Int#
off1 Int# -> Int# -> Int#
+# Int#
1#))of
#endif
Word#
0x80##->Ordering
LT Word#
_->Int# -> Int# -> Ordering
go (Int#
off1 Int# -> Int# -> Int#
+# Int#
1#)(Int#
off2 Int# -> Int# -> Int#
+# Int#
1#)Word#
_->caseWord#
b2_1 of
#if !MIN_VERSION_ghc_prim(0,10,0)
0xC0##->caseindexWord8Array#a2(off2+#1#)of
#else
Word#
0xC0##->caseWord8# -> Word#
word8ToWord# (ByteArray# -> Int# -> Word8#
indexWord8Array# ByteArray#
a2 (Int#
off2 Int# -> Int# -> Int#
+# Int#
1#))of
#endif
Word#
0x80##->Ordering
GT Word#
_->Int# -> Int# -> Ordering
go (Int#
off1 Int# -> Int# -> Int#
+# Int#
1#)(Int#
off2 Int# -> Int# -> Int#
+# Int#
1#)Word#
_|Int# -> Bool
isTrue# (Word#
b1_1 Word# -> Word# -> Int#
`gtWord#` Word#
b2_1 )->Ordering
GT |Int# -> Bool
isTrue# (Word#
b1_1 Word# -> Word# -> Int#
`ltWord#` Word#
b2_1 )->Ordering
LT |Bool
otherwise ->Int# -> Int# -> Ordering
go (Int#
off1 Int# -> Int# -> Int#
+# Int#
1#)(Int#
off2 Int# -> Int# -> Int#
+# Int#
1#)utf8CountCharsByteArray# ::ByteArray# ->Int utf8CountCharsByteArray# :: ByteArray# -> Int
utf8CountCharsByteArray# ByteArray#
ba =Int# -> Int# -> Int
go Int#
0#Int#
0#wherelen# :: Int#
len# =ByteArray# -> Int#
sizeofByteArray# ByteArray#
ba go :: Int# -> Int# -> Int
go Int#
i# Int#
n# |Int# -> Bool
isTrue# (Int#
i# Int# -> Int# -> Int#
>=# Int#
len# )=Int# -> Int
I# Int#
n# |Bool
otherwise =caseByteArray# -> Int# -> (# Char#, Int# #)
utf8DecodeCharByteArray# ByteArray#
ba Int#
i# of(#Char#
_,Int#
nBytes# #)->Int# -> Int# -> Int
go (Int#
i# Int# -> Int# -> Int#
+# Int#
nBytes# )(Int#
n# Int# -> Int# -> Int#
+# Int#
1#){-# INLINEutf8EncodeChar #-}utf8EncodeChar ::(Int# ->Word8# ->State# s ->State# s )->Char ->ST s Int utf8EncodeChar :: forall s.
(Int# -> Word8# -> State# s -> State# s) -> Char -> ST s Int
utf8EncodeChar Int# -> Word8# -> State# s -> State# s
write# Char
c =letx :: Word
x =Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c )incase()of()
_|Word
x Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
> Word
0Bool -> Bool -> Bool
&& Word
x Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
0x007f->doInt -> Word -> ST s ()
write Int
0Word
x Int -> ST s Int
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return Int
1-- NB. '0円' is encoded as '\xC0\x80', not '0円'. This is so that we-- can have 0-terminated UTF-8 strings (see GHC.Internal.Base.unpackCStringUtf8).|Word
x Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
0x07ff->doInt -> Word -> ST s ()
write Int
0(Word
0xC0Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. ((Word
x Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`shiftR` Int
6)Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x1F))Int -> Word -> ST s ()
write Int
1(Word
0x80Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. (Word
x Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x3F))Int -> ST s Int
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return Int
2|Word
x Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
0xffff->doInt -> Word -> ST s ()
write Int
0(Word
0xE0Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. (Word
x Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`shiftR` Int
12)Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x0F)Int -> Word -> ST s ()
write Int
1(Word
0x80Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. (Word
x Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`shiftR` Int
6)Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x3F)Int -> Word -> ST s ()
write Int
2(Word
0x80Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. (Word
x Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x3F))Int -> ST s Int
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return Int
3|Bool
otherwise ->doInt -> Word -> ST s ()
write Int
0(Word
0xF0Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. (Word
x Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`shiftR` Int
18))Int -> Word -> ST s ()
write Int
1(Word
0x80Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. ((Word
x Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`shiftR` Int
12)Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x3F))Int -> Word -> ST s ()
write Int
2(Word
0x80Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. ((Word
x Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`shiftR` Int
6)Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x3F))Int -> Word -> ST s ()
write Int
3(Word
0x80Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. (Word
x Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x3F))Int -> ST s Int
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return Int
4where{-# INLINEwrite #-}write :: Int -> Word -> ST s ()
write (I# Int#
off# )(W# Word#
c# )=STRep s () -> ST s ()
forall s a. STRep s a -> ST s a
ST (STRep s () -> ST s ()) -> STRep s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ \State# s
s ->
#if !MIN_VERSION_ghc_prim(0,10,0)
casewrite#off#(narrowWord8#c#)sof
#else
caseInt# -> Word8# -> State# s -> State# s
write# Int#
off# (Word# -> Word8#
wordToWord8# Word#
c# )State# s
s of
#endif
State# s
s ->(#State# s
s ,()#)utf8EncodePtr ::Ptr Word8 ->String ->IO ()utf8EncodePtr :: Ptr Word8 -> [Char] -> IO ()
utf8EncodePtr (Ptr Addr#
a# )[Char]
str =Addr# -> [Char] -> IO ()
go Addr#
a# [Char]
str wherego :: Addr# -> [Char] -> IO ()
go !Addr#
_[]=() -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()go Addr#
a# (Char
c : [Char]
cs )=do
#if !MIN_VERSION_ghc_prim(0,10,0)
-- writeWord8OffAddr# was taking a Word#I#off#<-stToIO$utf8EncodeChar(\iw->writeWord8OffAddr#a#i(extendWord8#w))c
#else
I# off# <-ST RealWorld Int -> IO Int
forall a. ST RealWorld a -> IO a
stToIO (ST RealWorld Int -> IO Int) -> ST RealWorld Int -> IO Int
forall a b. (a -> b) -> a -> b
$ (Int# -> Word8# -> State# RealWorld -> State# RealWorld)
-> Char -> ST RealWorld Int
forall s.
(Int# -> Word8# -> State# s -> State# s) -> Char -> ST s Int
utf8EncodeChar (Addr# -> Int# -> Word8# -> State# RealWorld -> State# RealWorld
forall d. Addr# -> Int# -> Word8# -> State# d -> State# d
writeWord8OffAddr# Addr#
a# )Char
c 
#endif
go (a# `plusAddr#` off# )cs utf8EncodeByteArray# ::String ->ByteArray# utf8EncodeByteArray# :: [Char] -> ByteArray#
utf8EncodeByteArray# [Char]
str =(State# RealWorld -> ByteArray#) -> ByteArray#
forall o. (State# RealWorld -> o) -> o
runRW# ((State# RealWorld -> ByteArray#) -> ByteArray#)
-> (State# RealWorld -> ByteArray#) -> ByteArray#
forall a b. (a -> b) -> a -> b
$ \State# RealWorld
s ->case[Char] -> Int
utf8EncodedLength [Char]
str of{I# Int#
len# ->caseInt#
-> State# RealWorld
-> (# State# RealWorld, MutableByteArray# RealWorld #)
forall d. Int# -> State# d -> (# State# d, MutableByteArray# d #)
newByteArray# Int#
len# State# RealWorld
s of{(#State# RealWorld
s ,MutableByteArray# RealWorld
mba# #)->caseMutableByteArray# RealWorld -> Int# -> [Char] -> ST RealWorld ()
forall {s}. MutableByteArray# s -> Int# -> [Char] -> ST s ()
go MutableByteArray# RealWorld
mba# Int#
0#[Char]
str of{ST STRep RealWorld ()
f_go ->caseSTRep RealWorld ()
f_go State# RealWorld
s of{(#State# RealWorld
s ,()#)->caseMutableByteArray# RealWorld
-> State# RealWorld -> (# State# RealWorld, ByteArray# #)
forall d.
MutableByteArray# d -> State# d -> (# State# d, ByteArray# #)
unsafeFreezeByteArray# MutableByteArray# RealWorld
mba# State# RealWorld
s of{(#State# RealWorld
_,ByteArray#
ba# #)->ByteArray#
ba# }}}}}wherego :: MutableByteArray# s -> Int# -> [Char] -> ST s ()
go MutableByteArray# s
_Int#
_[]=() -> ST s ()
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return ()go MutableByteArray# s
mba# Int#
i# (Char
c : [Char]
cs )=do
#if !MIN_VERSION_ghc_prim(0,10,0)
-- writeWord8Array# was taking a Word#I#off#<-utf8EncodeChar(\j#w->writeWord8Array#mba#(i#+#j#)(extendWord8#w))c
#else
I# off# <-(Int# -> Word8# -> State# s -> State# s) -> Char -> ST s Int
forall s.
(Int# -> Word8# -> State# s -> State# s) -> Char -> ST s Int
utf8EncodeChar (\Int#
j# ->MutableByteArray# s -> Int# -> Word8# -> State# s -> State# s
forall d.
MutableByteArray# d -> Int# -> Word8# -> State# d -> State# d
writeWord8Array# MutableByteArray# s
mba# (Int#
i# Int# -> Int# -> Int#
+# Int#
j# ))Char
c 
#endif
go mba# (i# +# off# )cs utf8EncodedLength ::String ->Int utf8EncodedLength :: [Char] -> Int
utf8EncodedLength [Char]
str =Int -> [Char] -> Int
forall {t}. Num t => t -> [Char] -> t
go Int
0[Char]
str wherego :: t -> [Char] -> t
go !t
n []=t
n go t
n (Char
c : [Char]
cs )|Char -> Int
ord Char
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0Bool -> Bool -> Bool
&& Char -> Int
ord Char
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x007f=t -> [Char] -> t
go (t
n t -> t -> t
forall a. Num a => a -> a -> a
+ t
1)[Char]
cs |Char -> Int
ord Char
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x07ff=t -> [Char] -> t
go (t
n t -> t -> t
forall a. Num a => a -> a -> a
+ t
2)[Char]
cs |Char -> Int
ord Char
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xffff=t -> [Char] -> t
go (t
n t -> t -> t
forall a. Num a => a -> a -> a
+ t
3)[Char]
cs |Bool
otherwise =t -> [Char] -> t
go (t
n t -> t -> t
forall a. Num a => a -> a -> a
+ t
4)[Char]
cs