{-# LANGUAGE Trustworthy #-}{-# LANGUAGE NoImplicitPrelude , BangPatterns , NondecreasingIndentation , MagicHash #-}{-# OPTIONS_GHC -funbox-strict-fields #-}------------------------------------------------------------------------------- |-- Module : GHC.IO.Encoding.UTF8-- Copyright : (c) The University of Glasgow, 2009-- License : see libraries/base/LICENSE---- Maintainer : libraries@haskell.org-- Stability : internal-- Portability : non-portable---- UTF-8 Codec for the IO library---- Portions Copyright : (c) Tom Harper 2008-2009,-- (c) Bryan O'Sullivan 2009,-- (c) Duncan Coutts 2009-------------------------------------------------------------------------------moduleGHC.IO.Encoding.UTF8(utf8 ,mkUTF8 ,utf8_bom ,mkUTF8_bom )whereimportGHC.Base importGHC.Real importGHC.Num importGHC.IORef -- import GHC.IOimportGHC.IO.Buffer importGHC.IO.Encoding.Failure importGHC.IO.Encoding.Types importGHC.Word importData.Bits utf8 ::TextEncoding utf8 :: TextEncoding utf8 =CodingFailureMode -> TextEncoding mkUTF8 CodingFailureMode ErrorOnCodingFailure -- | @since 4.4.0.0mkUTF8 ::CodingFailureMode ->TextEncoding mkUTF8 :: CodingFailureMode -> TextEncoding mkUTF8 CodingFailureMode cfm =TextEncoding {textEncodingName :: String textEncodingName =String "UTF-8",mkTextDecoder :: IO (TextDecoder ()) mkTextDecoder =CodingFailureMode -> IO (TextDecoder ()) utf8_DF CodingFailureMode cfm ,mkTextEncoder :: IO (TextEncoder ()) mkTextEncoder =CodingFailureMode -> IO (TextEncoder ()) utf8_EF CodingFailureMode cfm }utf8_DF ::CodingFailureMode ->IO (TextDecoder ())utf8_DF :: CodingFailureMode -> IO (TextDecoder ()) utf8_DF CodingFailureMode cfm =TextDecoder () -> IO (TextDecoder ()) forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (BufferCodec {encode :: CodeBuffer Word8 Char encode =CodeBuffer Word8 Char utf8_decode ,recover :: Buffer Word8 -> Buffer Char -> IO (Buffer Word8, Buffer Char) recover =CodingFailureMode -> Buffer Word8 -> Buffer Char -> IO (Buffer Word8, Buffer Char) recoverDecode CodingFailureMode cfm ,close :: IO () close =() -> IO () forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (),getState :: IO () getState =() -> IO () forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (),setState :: () -> IO () setState =IO () -> () -> IO () forall a b. a -> b -> a const (IO () -> () -> IO ()) -> IO () -> () -> IO () forall a b. (a -> b) -> a -> b $ () -> IO () forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return ()})utf8_EF ::CodingFailureMode ->IO (TextEncoder ())utf8_EF :: CodingFailureMode -> IO (TextEncoder ()) utf8_EF CodingFailureMode cfm =TextEncoder () -> IO (TextEncoder ()) forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (BufferCodec {encode :: CodeBuffer Char Word8 encode =CodeBuffer Char Word8 utf8_encode ,recover :: Buffer Char -> Buffer Word8 -> IO (Buffer Char, Buffer Word8) recover =CodingFailureMode -> Buffer Char -> Buffer Word8 -> IO (Buffer Char, Buffer Word8) recoverEncode CodingFailureMode cfm ,close :: IO () close =() -> IO () forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (),getState :: IO () getState =() -> IO () forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (),setState :: () -> IO () setState =IO () -> () -> IO () forall a b. a -> b -> a const (IO () -> () -> IO ()) -> IO () -> () -> IO () forall a b. (a -> b) -> a -> b $ () -> IO () forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return ()})utf8_bom ::TextEncoding utf8_bom :: TextEncoding utf8_bom =CodingFailureMode -> TextEncoding mkUTF8_bom CodingFailureMode ErrorOnCodingFailure mkUTF8_bom ::CodingFailureMode ->TextEncoding mkUTF8_bom :: CodingFailureMode -> TextEncoding mkUTF8_bom CodingFailureMode cfm =TextEncoding {textEncodingName :: String textEncodingName =String "UTF-8BOM",mkTextDecoder :: IO (TextDecoder Bool) mkTextDecoder =CodingFailureMode -> IO (TextDecoder Bool) utf8_bom_DF CodingFailureMode cfm ,mkTextEncoder :: IO (TextEncoder Bool) mkTextEncoder =CodingFailureMode -> IO (TextEncoder Bool) utf8_bom_EF CodingFailureMode cfm }utf8_bom_DF ::CodingFailureMode ->IO (TextDecoder Bool )utf8_bom_DF :: CodingFailureMode -> IO (TextDecoder Bool) utf8_bom_DF CodingFailureMode cfm =doIORef Bool ref <-Bool -> IO (IORef Bool) forall a. a -> IO (IORef a) newIORef Bool True TextDecoder Bool -> IO (TextDecoder Bool) forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (BufferCodec {encode :: CodeBuffer Word8 Char encode =IORef Bool -> CodeBuffer Word8 Char utf8_bom_decode IORef Bool ref ,recover :: Buffer Word8 -> Buffer Char -> IO (Buffer Word8, Buffer Char) recover =CodingFailureMode -> Buffer Word8 -> Buffer Char -> IO (Buffer Word8, Buffer Char) recoverDecode CodingFailureMode cfm ,close :: IO () close =() -> IO () forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (),getState :: IO Bool getState =IORef Bool -> IO Bool forall a. IORef a -> IO a readIORef IORef Bool ref ,setState :: Bool -> IO () setState =IORef Bool -> Bool -> IO () forall a. IORef a -> a -> IO () writeIORef IORef Bool ref })utf8_bom_EF ::CodingFailureMode ->IO (TextEncoder Bool )utf8_bom_EF :: CodingFailureMode -> IO (TextEncoder Bool) utf8_bom_EF CodingFailureMode cfm =doIORef Bool ref <-Bool -> IO (IORef Bool) forall a. a -> IO (IORef a) newIORef Bool True TextEncoder Bool -> IO (TextEncoder Bool) forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (BufferCodec {encode :: CodeBuffer Char Word8 encode =IORef Bool -> CodeBuffer Char Word8 utf8_bom_encode IORef Bool ref ,recover :: Buffer Char -> Buffer Word8 -> IO (Buffer Char, Buffer Word8) recover =CodingFailureMode -> Buffer Char -> Buffer Word8 -> IO (Buffer Char, Buffer Word8) recoverEncode CodingFailureMode cfm ,close :: IO () close =() -> IO () forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (),getState :: IO Bool getState =IORef Bool -> IO Bool forall a. IORef a -> IO a readIORef IORef Bool ref ,setState :: Bool -> IO () setState =IORef Bool -> Bool -> IO () forall a. IORef a -> a -> IO () writeIORef IORef Bool ref })utf8_bom_decode ::IORef Bool ->DecodeBuffer utf8_bom_decode :: IORef Bool -> CodeBuffer Word8 Char utf8_bom_decode IORef Bool ref input :: Buffer Word8 input @Buffer {bufRaw :: forall e. Buffer e -> RawBuffer e bufRaw =RawBuffer Word8 iraw ,bufL :: forall e. Buffer e -> Int bufL =Int ir ,bufR :: forall e. Buffer e -> Int bufR =Int iw ,bufSize :: forall e. Buffer e -> Int bufSize =Int _}Buffer Char output =doBool first <-IORef Bool -> IO Bool forall a. IORef a -> IO a readIORef IORef Bool ref ifBool -> Bool not Bool first thenCodeBuffer Word8 Char utf8_decode Buffer Word8 input Buffer Char output elsedoletno_bom :: IO (CodingProgress, Buffer Word8, Buffer Char) no_bom =doIORef Bool -> Bool -> IO () forall a. IORef a -> a -> IO () writeIORef IORef Bool ref Bool False ;CodeBuffer Word8 Char utf8_decode Buffer Word8 input Buffer Char output ifInt iw Int -> Int -> Int forall a. Num a => a -> a -> a - Int ir Int -> Int -> Bool forall a. Ord a => a -> a -> Bool < Int 1then(CodingProgress, Buffer Word8, Buffer Char) -> IO (CodingProgress, Buffer Word8, Buffer Char) forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (CodingProgress InputUnderflow ,Buffer Word8 input ,Buffer Char output )elsedoWord8 c0 <-RawBuffer Word8 -> Int -> IO Word8 readWord8Buf RawBuffer Word8 iraw Int ir if(Word8 c0 Word8 -> Word8 -> Bool forall a. Eq a => a -> a -> Bool /= Word8 bom0 )thenIO (CodingProgress, Buffer Word8, Buffer Char) no_bom elsedoifInt iw Int -> Int -> Int forall a. Num a => a -> a -> a - Int ir Int -> Int -> Bool forall a. Ord a => a -> a -> Bool < Int 2then(CodingProgress, Buffer Word8, Buffer Char) -> IO (CodingProgress, Buffer Word8, Buffer Char) forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (CodingProgress InputUnderflow ,Buffer Word8 input ,Buffer Char output )elsedoWord8 c1 <-RawBuffer Word8 -> Int -> IO Word8 readWord8Buf RawBuffer Word8 iraw (Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1)if(Word8 c1 Word8 -> Word8 -> Bool forall a. Eq a => a -> a -> Bool /= Word8 bom1 )thenIO (CodingProgress, Buffer Word8, Buffer Char) no_bom elsedoifInt iw Int -> Int -> Int forall a. Num a => a -> a -> a - Int ir Int -> Int -> Bool forall a. Ord a => a -> a -> Bool < Int 3then(CodingProgress, Buffer Word8, Buffer Char) -> IO (CodingProgress, Buffer Word8, Buffer Char) forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (CodingProgress InputUnderflow ,Buffer Word8 input ,Buffer Char output )elsedoWord8 c2 <-RawBuffer Word8 -> Int -> IO Word8 readWord8Buf RawBuffer Word8 iraw (Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 2)if(Word8 c2 Word8 -> Word8 -> Bool forall a. Eq a => a -> a -> Bool /= Word8 bom2 )thenIO (CodingProgress, Buffer Word8, Buffer Char) no_bom elsedo-- found a BOM, ignore it and carry onIORef Bool -> Bool -> IO () forall a. IORef a -> a -> IO () writeIORef IORef Bool ref Bool False CodeBuffer Word8 Char utf8_decode Buffer Word8 input {bufL :: Int bufL =Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 3}Buffer Char output utf8_bom_encode ::IORef Bool ->EncodeBuffer utf8_bom_encode :: IORef Bool -> CodeBuffer Char Word8 utf8_bom_encode IORef Bool ref Buffer Char input output :: Buffer Word8 output @Buffer {bufRaw :: forall e. Buffer e -> RawBuffer e bufRaw =RawBuffer Word8 oraw ,bufL :: forall e. Buffer e -> Int bufL =Int _,bufR :: forall e. Buffer e -> Int bufR =Int ow ,bufSize :: forall e. Buffer e -> Int bufSize =Int os }=doBool b <-IORef Bool -> IO Bool forall a. IORef a -> IO a readIORef IORef Bool ref ifBool -> Bool not Bool b thenCodeBuffer Char Word8 utf8_encode Buffer Char input Buffer Word8 output elseifInt os Int -> Int -> Int forall a. Num a => a -> a -> a - Int ow Int -> Int -> Bool forall a. Ord a => a -> a -> Bool < Int 3then(CodingProgress, Buffer Char, Buffer Word8) -> IO (CodingProgress, Buffer Char, Buffer Word8) forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (CodingProgress OutputUnderflow ,Buffer Char input ,Buffer Word8 output )elsedoIORef Bool -> Bool -> IO () forall a. IORef a -> a -> IO () writeIORef IORef Bool ref Bool False RawBuffer Word8 -> Int -> Word8 -> IO () writeWord8Buf RawBuffer Word8 oraw Int ow Word8 bom0 RawBuffer Word8 -> Int -> Word8 -> IO () writeWord8Buf RawBuffer Word8 oraw (Int ow Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1)Word8 bom1 RawBuffer Word8 -> Int -> Word8 -> IO () writeWord8Buf RawBuffer Word8 oraw (Int ow Int -> Int -> Int forall a. Num a => a -> a -> a + Int 2)Word8 bom2 CodeBuffer Char Word8 utf8_encode Buffer Char input Buffer Word8 output {bufR :: Int bufR =Int ow Int -> Int -> Int forall a. Num a => a -> a -> a + Int 3}bom0 ,bom1 ,bom2 ::Word8 bom0 :: Word8 bom0 =Word8 0xefbom1 :: Word8 bom1 =Word8 0xbbbom2 :: Word8 bom2 =Word8 0xbfutf8_decode ::DecodeBuffer utf8_decode :: CodeBuffer Word8 Char utf8_decode input :: Buffer Word8 input @Buffer {bufRaw :: forall e. Buffer e -> RawBuffer e bufRaw =RawBuffer Word8 iraw ,bufL :: forall e. Buffer e -> Int bufL =Int ir0 ,bufR :: forall e. Buffer e -> Int bufR =Int iw ,bufSize :: forall e. Buffer e -> Int bufSize =Int _}output :: Buffer Char output @Buffer {bufRaw :: forall e. Buffer e -> RawBuffer e bufRaw =RawBuffer Char oraw ,bufL :: forall e. Buffer e -> Int bufL =Int _,bufR :: forall e. Buffer e -> Int bufR =Int ow0 ,bufSize :: forall e. Buffer e -> Int bufSize =Int os }=letloop :: Int -> Int -> IO (CodingProgress, Buffer Word8, Buffer Char) loop !Int ir !Int ow |Int ow Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >= Int os =CodingProgress -> Int -> Int -> IO (CodingProgress, Buffer Word8, Buffer Char) forall {m :: * -> *} {a}. Monad m => a -> Int -> Int -> m (a, Buffer Word8, Buffer Char) done CodingProgress OutputUnderflow Int ir Int ow |Int ir Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >= Int iw =CodingProgress -> Int -> Int -> IO (CodingProgress, Buffer Word8, Buffer Char) forall {m :: * -> *} {a}. Monad m => a -> Int -> Int -> m (a, Buffer Word8, Buffer Char) done CodingProgress InputUnderflow Int ir Int ow |Bool otherwise =doWord8 c0 <-RawBuffer Word8 -> Int -> IO Word8 readWord8Buf RawBuffer Word8 iraw Int ir caseWord8 c0 ofWord8 _|Word8 c0 Word8 -> Word8 -> Bool forall a. Ord a => a -> a -> Bool <= Word8 0x7f->doInt ow' <-RawBuffer Char -> Int -> Char -> IO Int writeCharBuf RawBuffer Char oraw Int ow (Int -> Char unsafeChr (Word8 -> Int forall a b. (Integral a, Num b) => a -> b fromIntegral Word8 c0 ))Int -> Int -> IO (CodingProgress, Buffer Word8, Buffer Char) loop (Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1)Int ow' |Word8 c0 Word8 -> Word8 -> Bool forall a. Ord a => a -> a -> Bool >= Word8 0xc0Bool -> Bool -> Bool && Word8 c0 Word8 -> Word8 -> Bool forall a. Ord a => a -> a -> Bool <= Word8 0xc1->IO (CodingProgress, Buffer Word8, Buffer Char) invalid -- Overlong forms|Word8 c0 Word8 -> Word8 -> Bool forall a. Ord a => a -> a -> Bool >= Word8 0xc2Bool -> Bool -> Bool && Word8 c0 Word8 -> Word8 -> Bool forall a. Ord a => a -> a -> Bool <= Word8 0xdf->ifInt iw Int -> Int -> Int forall a. Num a => a -> a -> a - Int ir Int -> Int -> Bool forall a. Ord a => a -> a -> Bool < Int 2thenCodingProgress -> Int -> Int -> IO (CodingProgress, Buffer Word8, Buffer Char) forall {m :: * -> *} {a}. Monad m => a -> Int -> Int -> m (a, Buffer Word8, Buffer Char) done CodingProgress InputUnderflow Int ir Int ow elsedoWord8 c1 <-RawBuffer Word8 -> Int -> IO Word8 readWord8Buf RawBuffer Word8 iraw (Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1)if(Word8 c1 Word8 -> Word8 -> Bool forall a. Ord a => a -> a -> Bool < Word8 0x80Bool -> Bool -> Bool || Word8 c1 Word8 -> Word8 -> Bool forall a. Ord a => a -> a -> Bool >= Word8 0xc0)thenIO (CodingProgress, Buffer Word8, Buffer Char) invalid elsedoInt ow' <-RawBuffer Char -> Int -> Char -> IO Int writeCharBuf RawBuffer Char oraw Int ow (Word8 -> Word8 -> Char chr2 Word8 c0 Word8 c1 )Int -> Int -> IO (CodingProgress, Buffer Word8, Buffer Char) loop (Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 2)Int ow' |Word8 c0 Word8 -> Word8 -> Bool forall a. Ord a => a -> a -> Bool >= Word8 0xe0Bool -> Bool -> Bool && Word8 c0 Word8 -> Word8 -> Bool forall a. Ord a => a -> a -> Bool <= Word8 0xef->caseInt iw Int -> Int -> Int forall a. Num a => a -> a -> a - Int ir ofInt 1->CodingProgress -> Int -> Int -> IO (CodingProgress, Buffer Word8, Buffer Char) forall {m :: * -> *} {a}. Monad m => a -> Int -> Int -> m (a, Buffer Word8, Buffer Char) done CodingProgress InputUnderflow Int ir Int ow Int 2->do-- check for an error even when we don't have-- the full sequence yet (#3341)Word8 c1 <-RawBuffer Word8 -> Int -> IO Word8 readWord8Buf RawBuffer Word8 iraw (Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1)ifBool -> Bool not (Word8 -> Word8 -> Word8 -> Bool validate3 Word8 c0 Word8 c1 Word8 0x80)thenIO (CodingProgress, Buffer Word8, Buffer Char) invalid elseCodingProgress -> Int -> Int -> IO (CodingProgress, Buffer Word8, Buffer Char) forall {m :: * -> *} {a}. Monad m => a -> Int -> Int -> m (a, Buffer Word8, Buffer Char) done CodingProgress InputUnderflow Int ir Int ow Int _->doWord8 c1 <-RawBuffer Word8 -> Int -> IO Word8 readWord8Buf RawBuffer Word8 iraw (Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1)Word8 c2 <-RawBuffer Word8 -> Int -> IO Word8 readWord8Buf RawBuffer Word8 iraw (Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 2)ifBool -> Bool not (Word8 -> Word8 -> Word8 -> Bool validate3 Word8 c0 Word8 c1 Word8 c2 )thenIO (CodingProgress, Buffer Word8, Buffer Char) invalid elsedoInt ow' <-RawBuffer Char -> Int -> Char -> IO Int writeCharBuf RawBuffer Char oraw Int ow (Word8 -> Word8 -> Word8 -> Char chr3 Word8 c0 Word8 c1 Word8 c2 )Int -> Int -> IO (CodingProgress, Buffer Word8, Buffer Char) loop (Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 3)Int ow' |Word8 c0 Word8 -> Word8 -> Bool forall a. Ord a => a -> a -> Bool >= Word8 0xf0->caseInt iw Int -> Int -> Int forall a. Num a => a -> a -> a - Int ir ofInt 1->CodingProgress -> Int -> Int -> IO (CodingProgress, Buffer Word8, Buffer Char) forall {m :: * -> *} {a}. Monad m => a -> Int -> Int -> m (a, Buffer Word8, Buffer Char) done CodingProgress InputUnderflow Int ir Int ow Int 2->do-- check for an error even when we don't have-- the full sequence yet (#3341)Word8 c1 <-RawBuffer Word8 -> Int -> IO Word8 readWord8Buf RawBuffer Word8 iraw (Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1)ifBool -> Bool not (Word8 -> Word8 -> Word8 -> Word8 -> Bool validate4 Word8 c0 Word8 c1 Word8 0x80Word8 0x80)thenIO (CodingProgress, Buffer Word8, Buffer Char) invalid elseCodingProgress -> Int -> Int -> IO (CodingProgress, Buffer Word8, Buffer Char) forall {m :: * -> *} {a}. Monad m => a -> Int -> Int -> m (a, Buffer Word8, Buffer Char) done CodingProgress InputUnderflow Int ir Int ow Int 3->doWord8 c1 <-RawBuffer Word8 -> Int -> IO Word8 readWord8Buf RawBuffer Word8 iraw (Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1)Word8 c2 <-RawBuffer Word8 -> Int -> IO Word8 readWord8Buf RawBuffer Word8 iraw (Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 2)ifBool -> Bool not (Word8 -> Word8 -> Word8 -> Word8 -> Bool validate4 Word8 c0 Word8 c1 Word8 c2 Word8 0x80)thenIO (CodingProgress, Buffer Word8, Buffer Char) invalid elseCodingProgress -> Int -> Int -> IO (CodingProgress, Buffer Word8, Buffer Char) forall {m :: * -> *} {a}. Monad m => a -> Int -> Int -> m (a, Buffer Word8, Buffer Char) done CodingProgress InputUnderflow Int ir Int ow Int _->doWord8 c1 <-RawBuffer Word8 -> Int -> IO Word8 readWord8Buf RawBuffer Word8 iraw (Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1)Word8 c2 <-RawBuffer Word8 -> Int -> IO Word8 readWord8Buf RawBuffer Word8 iraw (Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 2)Word8 c3 <-RawBuffer Word8 -> Int -> IO Word8 readWord8Buf RawBuffer Word8 iraw (Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 3)ifBool -> Bool not (Word8 -> Word8 -> Word8 -> Word8 -> Bool validate4 Word8 c0 Word8 c1 Word8 c2 Word8 c3 )thenIO (CodingProgress, Buffer Word8, Buffer Char) invalid elsedoInt ow' <-RawBuffer Char -> Int -> Char -> IO Int writeCharBuf RawBuffer Char oraw Int ow (Word8 -> Word8 -> Word8 -> Word8 -> Char chr4 Word8 c0 Word8 c1 Word8 c2 Word8 c3 )Int -> Int -> IO (CodingProgress, Buffer Word8, Buffer Char) loop (Int ir Int -> Int -> Int forall a. Num a => a -> a -> a + Int 4)Int ow' |Bool otherwise ->IO (CodingProgress, Buffer Word8, Buffer Char) invalid whereinvalid :: IO (CodingProgress, Buffer Word8, Buffer Char) invalid =CodingProgress -> Int -> Int -> IO (CodingProgress, Buffer Word8, Buffer Char) forall {m :: * -> *} {a}. Monad m => a -> Int -> Int -> m (a, Buffer Word8, Buffer Char) done CodingProgress InvalidSequence Int ir Int ow -- lambda-lifted, to avoid thunks being built in the inner-loop:done :: a -> Int -> Int -> m (a, Buffer Word8, Buffer Char) done a why !Int ir !Int ow =(a, Buffer Word8, Buffer Char) -> m (a, Buffer Word8, Buffer Char) forall a. a -> m a forall (m :: * -> *) a. Monad m => a -> m a return (a why ,ifInt ir Int -> Int -> Bool forall a. Eq a => a -> a -> Bool == Int iw thenBuffer Word8 input {bufL :: Int bufL =Int 0,bufR :: Int bufR =Int 0}elseBuffer Word8 input {bufL :: Int bufL =Int ir },Buffer Char output {bufR :: Int bufR =Int ow })inInt -> Int -> IO (CodingProgress, Buffer Word8, Buffer Char) loop Int ir0 Int ow0 utf8_encode ::EncodeBuffer utf8_encode :: CodeBuffer Char Word8 utf8_encode input :: Buffer Char input @Buffer {bufRaw :: forall e. Buffer e -> RawBuffer e bufRaw =RawBuffer Char iraw ,bufL :: forall e. Buffer e -> Int bufL =Int ir0 ,bufR :: forall e. Buffer e -> Int bufR =Int iw ,bufSize :: forall e. Buffer e -> Int bufSize =Int _}output :: Buffer Word8 output @Buffer {bufRaw :: forall e. Buffer e -> RawBuffer e bufRaw =RawBuffer Word8 oraw ,bufL :: forall e. Buffer e -> Int bufL =Int _,bufR :: forall e. Buffer e -> Int bufR =Int ow0 ,bufSize :: forall e. Buffer e -> Int bufSize =Int os }=letdone :: a -> Int -> Int -> m (a, Buffer Char, Buffer Word8) done a why !Int ir !Int ow =(a, Buffer Char, Buffer Word8) -> m (a, Buffer Char, Buffer Word8) forall a. a -> m a forall (m :: * -> *) a. Monad m => a -> m a return (a why ,ifInt ir Int -> Int -> Bool forall a. Eq a => a -> a -> Bool == Int iw thenBuffer Char input {bufL :: Int bufL =Int 0,bufR :: Int bufR =Int 0}elseBuffer Char input {bufL :: Int bufL =Int ir },Buffer Word8 output {bufR :: Int bufR =Int ow })loop :: Int -> Int -> IO (CodingProgress, Buffer Char, Buffer Word8) loop !Int ir !Int ow |Int ow Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >= Int os =CodingProgress -> Int -> Int -> IO (CodingProgress, Buffer Char, Buffer Word8) forall {m :: * -> *} {a}. Monad m => a -> Int -> Int -> m (a, Buffer Char, Buffer Word8) done CodingProgress OutputUnderflow Int ir Int ow |Int ir Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >= Int iw =CodingProgress -> Int -> Int -> IO (CodingProgress, Buffer Char, Buffer Word8) forall {m :: * -> *} {a}. Monad m => a -> Int -> Int -> m (a, Buffer Char, Buffer Word8) done CodingProgress InputUnderflow Int ir Int ow |Bool otherwise =do(Char c ,Int ir' )<-RawBuffer Char -> Int -> IO (Char, Int) readCharBuf RawBuffer Char iraw Int ir caseChar -> Int ord Char c ofInt x |Int x Int -> Int -> Bool forall a. Ord a => a -> a -> Bool <= Int 0x7F->doRawBuffer Word8 -> Int -> Word8 -> IO () writeWord8Buf RawBuffer Word8 oraw Int ow (Int -> Word8 forall a b. (Integral a, Num b) => a -> b fromIntegral Int x )Int -> Int -> IO (CodingProgress, Buffer Char, Buffer Word8) loop Int ir' (Int ow Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1)|Int x Int -> Int -> Bool forall a. Ord a => a -> a -> Bool <= Int 0x07FF->ifInt os Int -> Int -> Int forall a. Num a => a -> a -> a - Int ow Int -> Int -> Bool forall a. Ord a => a -> a -> Bool < Int 2thenCodingProgress -> Int -> Int -> IO (CodingProgress, Buffer Char, Buffer Word8) forall {m :: * -> *} {a}. Monad m => a -> Int -> Int -> m (a, Buffer Char, Buffer Word8) done CodingProgress OutputUnderflow Int ir Int ow elsedolet(Word8 c1 ,Word8 c2 )=Char -> (Word8, Word8) ord2 Char c RawBuffer Word8 -> Int -> Word8 -> IO () writeWord8Buf RawBuffer Word8 oraw Int ow Word8 c1 RawBuffer Word8 -> Int -> Word8 -> IO () writeWord8Buf RawBuffer Word8 oraw (Int ow Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1)Word8 c2 Int -> Int -> IO (CodingProgress, Buffer Char, Buffer Word8) loop Int ir' (Int ow Int -> Int -> Int forall a. Num a => a -> a -> a + Int 2)|Int x Int -> Int -> Bool forall a. Ord a => a -> a -> Bool <= Int 0xFFFF->ifChar -> Bool isSurrogate Char c thenCodingProgress -> Int -> Int -> IO (CodingProgress, Buffer Char, Buffer Word8) forall {m :: * -> *} {a}. Monad m => a -> Int -> Int -> m (a, Buffer Char, Buffer Word8) done CodingProgress InvalidSequence Int ir Int ow elsedoifInt os Int -> Int -> Int forall a. Num a => a -> a -> a - Int ow Int -> Int -> Bool forall a. Ord a => a -> a -> Bool < Int 3thenCodingProgress -> Int -> Int -> IO (CodingProgress, Buffer Char, Buffer Word8) forall {m :: * -> *} {a}. Monad m => a -> Int -> Int -> m (a, Buffer Char, Buffer Word8) done CodingProgress OutputUnderflow Int ir Int ow elsedolet(Word8 c1 ,Word8 c2 ,Word8 c3 )=Char -> (Word8, Word8, Word8) ord3 Char c RawBuffer Word8 -> Int -> Word8 -> IO () writeWord8Buf RawBuffer Word8 oraw Int ow Word8 c1 RawBuffer Word8 -> Int -> Word8 -> IO () writeWord8Buf RawBuffer Word8 oraw (Int ow Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1)Word8 c2 RawBuffer Word8 -> Int -> Word8 -> IO () writeWord8Buf RawBuffer Word8 oraw (Int ow Int -> Int -> Int forall a. Num a => a -> a -> a + Int 2)Word8 c3 Int -> Int -> IO (CodingProgress, Buffer Char, Buffer Word8) loop Int ir' (Int ow Int -> Int -> Int forall a. Num a => a -> a -> a + Int 3)|Bool otherwise ->doifInt os Int -> Int -> Int forall a. Num a => a -> a -> a - Int ow Int -> Int -> Bool forall a. Ord a => a -> a -> Bool < Int 4thenCodingProgress -> Int -> Int -> IO (CodingProgress, Buffer Char, Buffer Word8) forall {m :: * -> *} {a}. Monad m => a -> Int -> Int -> m (a, Buffer Char, Buffer Word8) done CodingProgress OutputUnderflow Int ir Int ow elsedolet(Word8 c1 ,Word8 c2 ,Word8 c3 ,Word8 c4 )=Char -> (Word8, Word8, Word8, Word8) ord4 Char c RawBuffer Word8 -> Int -> Word8 -> IO () writeWord8Buf RawBuffer Word8 oraw Int ow Word8 c1 RawBuffer Word8 -> Int -> Word8 -> IO () writeWord8Buf RawBuffer Word8 oraw (Int ow Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1)Word8 c2 RawBuffer Word8 -> Int -> Word8 -> IO () writeWord8Buf RawBuffer Word8 oraw (Int ow Int -> Int -> Int forall a. Num a => a -> a -> a + Int 2)Word8 c3 RawBuffer Word8 -> Int -> Word8 -> IO () writeWord8Buf RawBuffer Word8 oraw (Int ow Int -> Int -> Int forall a. Num a => a -> a -> a + Int 3)Word8 c4 Int -> Int -> IO (CodingProgress, Buffer Char, Buffer Word8) loop Int ir' (Int ow Int -> Int -> Int forall a. Num a => a -> a -> a + Int 4)inInt -> Int -> IO (CodingProgress, Buffer Char, Buffer Word8) loop Int ir0 Int ow0 -- ------------------------------------------------------------------------------- UTF-8 primitives, lifted from Data.Text.Fusion.Utf8ord2 ::Char ->(Word8 ,Word8 )ord2 :: Char -> (Word8, Word8) ord2 Char c =Bool -> (Word8, Word8) -> (Word8, Word8) forall a. (?callStack::CallStack) => Bool -> a -> a assert (Int n Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >= Int 0x80Bool -> Bool -> Bool && Int n Int -> Int -> Bool forall a. Ord a => a -> a -> Bool <= Int 0x07ff)(Word8 x1 ,Word8 x2 )wheren :: Int n =Char -> Int ord Char c x1 :: Word8 x1 =Int -> Word8 forall a b. (Integral a, Num b) => a -> b fromIntegral (Int -> Word8) -> Int -> Word8 forall a b. (a -> b) -> a -> b $ (Int n Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftR` Int 6)Int -> Int -> Int forall a. Num a => a -> a -> a + Int 0xC0x2 :: Word8 x2 =Int -> Word8 forall a b. (Integral a, Num b) => a -> b fromIntegral (Int -> Word8) -> Int -> Word8 forall a b. (a -> b) -> a -> b $ (Int n Int -> Int -> Int forall a. Bits a => a -> a -> a .&. Int 0x3F)Int -> Int -> Int forall a. Num a => a -> a -> a + Int 0x80ord3 ::Char ->(Word8 ,Word8 ,Word8 )ord3 :: Char -> (Word8, Word8, Word8) ord3 Char c =Bool -> (Word8, Word8, Word8) -> (Word8, Word8, Word8) forall a. (?callStack::CallStack) => Bool -> a -> a assert (Int n Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >= Int 0x0800Bool -> Bool -> Bool && Int n Int -> Int -> Bool forall a. Ord a => a -> a -> Bool <= Int 0xffff)(Word8 x1 ,Word8 x2 ,Word8 x3 )wheren :: Int n =Char -> Int ord Char c x1 :: Word8 x1 =Int -> Word8 forall a b. (Integral a, Num b) => a -> b fromIntegral (Int -> Word8) -> Int -> Word8 forall a b. (a -> b) -> a -> b $ (Int n Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftR` Int 12)Int -> Int -> Int forall a. Num a => a -> a -> a + Int 0xE0x2 :: Word8 x2 =Int -> Word8 forall a b. (Integral a, Num b) => a -> b fromIntegral (Int -> Word8) -> Int -> Word8 forall a b. (a -> b) -> a -> b $ ((Int n Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftR` Int 6)Int -> Int -> Int forall a. Bits a => a -> a -> a .&. Int 0x3F)Int -> Int -> Int forall a. Num a => a -> a -> a + Int 0x80x3 :: Word8 x3 =Int -> Word8 forall a b. (Integral a, Num b) => a -> b fromIntegral (Int -> Word8) -> Int -> Word8 forall a b. (a -> b) -> a -> b $ (Int n Int -> Int -> Int forall a. Bits a => a -> a -> a .&. Int 0x3F)Int -> Int -> Int forall a. Num a => a -> a -> a + Int 0x80ord4 ::Char ->(Word8 ,Word8 ,Word8 ,Word8 )ord4 :: Char -> (Word8, Word8, Word8, Word8) ord4 Char c =Bool -> (Word8, Word8, Word8, Word8) -> (Word8, Word8, Word8, Word8) forall a. (?callStack::CallStack) => Bool -> a -> a assert (Int n Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >= Int 0x10000)(Word8 x1 ,Word8 x2 ,Word8 x3 ,Word8 x4 )wheren :: Int n =Char -> Int ord Char c x1 :: Word8 x1 =Int -> Word8 forall a b. (Integral a, Num b) => a -> b fromIntegral (Int -> Word8) -> Int -> Word8 forall a b. (a -> b) -> a -> b $ (Int n Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftR` Int 18)Int -> Int -> Int forall a. Num a => a -> a -> a + Int 0xF0x2 :: Word8 x2 =Int -> Word8 forall a b. (Integral a, Num b) => a -> b fromIntegral (Int -> Word8) -> Int -> Word8 forall a b. (a -> b) -> a -> b $ ((Int n Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftR` Int 12)Int -> Int -> Int forall a. Bits a => a -> a -> a .&. Int 0x3F)Int -> Int -> Int forall a. Num a => a -> a -> a + Int 0x80x3 :: Word8 x3 =Int -> Word8 forall a b. (Integral a, Num b) => a -> b fromIntegral (Int -> Word8) -> Int -> Word8 forall a b. (a -> b) -> a -> b $ ((Int n Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftR` Int 6)Int -> Int -> Int forall a. Bits a => a -> a -> a .&. Int 0x3F)Int -> Int -> Int forall a. Num a => a -> a -> a + Int 0x80x4 :: Word8 x4 =Int -> Word8 forall a b. (Integral a, Num b) => a -> b fromIntegral (Int -> Word8) -> Int -> Word8 forall a b. (a -> b) -> a -> b $ (Int n Int -> Int -> Int forall a. Bits a => a -> a -> a .&. Int 0x3F)Int -> Int -> Int forall a. Num a => a -> a -> a + Int 0x80chr2 ::Word8 ->Word8 ->Char chr2 :: Word8 -> Word8 -> Char chr2 (W8# Word8# x1# )(W8# Word8# x2# )=Char# -> Char C# (Int# -> Char# chr# (Int# z1# Int# -> Int# -> Int# +# Int# z2# ))where!y1# :: Int# y1# =Word# -> Int# word2Int# (Word8# -> Word# word8ToWord# Word8# x1# )!y2# :: Int# y2# =Word# -> Int# word2Int# (Word8# -> Word# word8ToWord# Word8# x2# )!z1# :: Int# z1# =Int# -> Int# -> Int# uncheckedIShiftL# (Int# y1# Int# -> Int# -> Int# -# Int# 0xC0#)Int# 6#!z2# :: Int# z2# =Int# y2# Int# -> Int# -> Int# -# Int# 0x80#{-# INLINEchr2 #-}chr3 ::Word8 ->Word8 ->Word8 ->Char chr3 :: Word8 -> Word8 -> Word8 -> Char chr3 (W8# Word8# x1# )(W8# Word8# x2# )(W8# Word8# x3# )=Char# -> Char C# (Int# -> Char# chr# (Int# z1# Int# -> Int# -> Int# +# Int# z2# Int# -> Int# -> Int# +# Int# z3# ))where!y1# :: Int# y1# =Word# -> Int# word2Int# (Word8# -> Word# word8ToWord# Word8# x1# )!y2# :: Int# y2# =Word# -> Int# word2Int# (Word8# -> Word# word8ToWord# Word8# x2# )!y3# :: Int# y3# =Word# -> Int# word2Int# (Word8# -> Word# word8ToWord# Word8# x3# )!z1# :: Int# z1# =Int# -> Int# -> Int# uncheckedIShiftL# (Int# y1# Int# -> Int# -> Int# -# Int# 0xE0#)Int# 12#!z2# :: Int# z2# =Int# -> Int# -> Int# uncheckedIShiftL# (Int# y2# Int# -> Int# -> Int# -# Int# 0x80#)Int# 6#!z3# :: Int# z3# =Int# y3# Int# -> Int# -> Int# -# Int# 0x80#{-# INLINEchr3 #-}chr4 ::Word8 ->Word8 ->Word8 ->Word8 ->Char chr4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char chr4 (W8# Word8# x1# )(W8# Word8# x2# )(W8# Word8# x3# )(W8# Word8# x4# )=Char# -> Char C# (Int# -> Char# chr# (Int# z1# Int# -> Int# -> Int# +# Int# z2# Int# -> Int# -> Int# +# Int# z3# Int# -> Int# -> Int# +# Int# z4# ))where!y1# :: Int# y1# =Word# -> Int# word2Int# (Word8# -> Word# word8ToWord# Word8# x1# )!y2# :: Int# y2# =Word# -> Int# word2Int# (Word8# -> Word# word8ToWord# Word8# x2# )!y3# :: Int# y3# =Word# -> Int# word2Int# (Word8# -> Word# word8ToWord# Word8# x3# )!y4# :: Int# y4# =Word# -> Int# word2Int# (Word8# -> Word# word8ToWord# Word8# x4# )!z1# :: Int# z1# =Int# -> Int# -> Int# uncheckedIShiftL# (Int# y1# Int# -> Int# -> Int# -# Int# 0xF0#)Int# 18#!z2# :: Int# z2# =Int# -> Int# -> Int# uncheckedIShiftL# (Int# y2# Int# -> Int# -> Int# -# Int# 0x80#)Int# 12#!z3# :: Int# z3# =Int# -> Int# -> Int# uncheckedIShiftL# (Int# y3# Int# -> Int# -> Int# -# Int# 0x80#)Int# 6#!z4# :: Int# z4# =Int# y4# Int# -> Int# -> Int# -# Int# 0x80#{-# INLINEchr4 #-}between ::Word8 -- ^ byte to check->Word8 -- ^ lower bound->Word8 -- ^ upper bound->Bool between :: Word8 -> Word8 -> Word8 -> Bool between Word8 x Word8 y Word8 z =Word8 x Word8 -> Word8 -> Bool forall a. Ord a => a -> a -> Bool >= Word8 y Bool -> Bool -> Bool && Word8 x Word8 -> Word8 -> Bool forall a. Ord a => a -> a -> Bool <= Word8 z {-# INLINEbetween #-}validate3 ::Word8 ->Word8 ->Word8 ->Bool {-# INLINEvalidate3 #-}validate3 :: Word8 -> Word8 -> Word8 -> Bool validate3 Word8 x1 Word8 x2 Word8 x3 =Bool validate3_1 Bool -> Bool -> Bool || Bool validate3_2 Bool -> Bool -> Bool || Bool validate3_3 Bool -> Bool -> Bool || Bool validate3_4 wherevalidate3_1 :: Bool validate3_1 =(Word8 x1 Word8 -> Word8 -> Bool forall a. Eq a => a -> a -> Bool == Word8 0xE0)Bool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x2 Word8 0xA0Word8 0xBFBool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x3 Word8 0x80Word8 0xBFvalidate3_2 :: Bool validate3_2 =Word8 -> Word8 -> Word8 -> Bool between Word8 x1 Word8 0xE1Word8 0xECBool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x2 Word8 0x80Word8 0xBFBool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x3 Word8 0x80Word8 0xBFvalidate3_3 :: Bool validate3_3 =Word8 x1 Word8 -> Word8 -> Bool forall a. Eq a => a -> a -> Bool == Word8 0xEDBool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x2 Word8 0x80Word8 0x9FBool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x3 Word8 0x80Word8 0xBFvalidate3_4 :: Bool validate3_4 =Word8 -> Word8 -> Word8 -> Bool between Word8 x1 Word8 0xEEWord8 0xEFBool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x2 Word8 0x80Word8 0xBFBool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x3 Word8 0x80Word8 0xBFvalidate4 ::Word8 ->Word8 ->Word8 ->Word8 ->Bool {-# INLINEvalidate4 #-}validate4 :: Word8 -> Word8 -> Word8 -> Word8 -> Bool validate4 Word8 x1 Word8 x2 Word8 x3 Word8 x4 =Bool validate4_1 Bool -> Bool -> Bool || Bool validate4_2 Bool -> Bool -> Bool || Bool validate4_3 wherevalidate4_1 :: Bool validate4_1 =Word8 x1 Word8 -> Word8 -> Bool forall a. Eq a => a -> a -> Bool == Word8 0xF0Bool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x2 Word8 0x90Word8 0xBFBool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x3 Word8 0x80Word8 0xBFBool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x4 Word8 0x80Word8 0xBFvalidate4_2 :: Bool validate4_2 =Word8 -> Word8 -> Word8 -> Bool between Word8 x1 Word8 0xF1Word8 0xF3Bool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x2 Word8 0x80Word8 0xBFBool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x3 Word8 0x80Word8 0xBFBool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x4 Word8 0x80Word8 0xBFvalidate4_3 :: Bool validate4_3 =Word8 x1 Word8 -> Word8 -> Bool forall a. Eq a => a -> a -> Bool == Word8 0xF4Bool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x2 Word8 0x80Word8 0x8FBool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x3 Word8 0x80Word8 0xBFBool -> Bool -> Bool && Word8 -> Word8 -> Word8 -> Bool between Word8 x4 Word8 0x80Word8 0xBF