{-# LANGUAGE Trustworthy #-}{-# LANGUAGE CPP , NoImplicitPrelude , NondecreasingIndentation #-}{-# OPTIONS_HADDOCK not-home #-}------------------------------------------------------------------------------- |-- Module : GHC.IO.Encoding.Iconv-- Copyright : (c) The University of Glasgow, 2008-2009-- License : see libraries/base/LICENSE---- Maintainer : libraries@haskell.org-- Stability : internal-- Portability : non-portable---- This module provides text encoding/decoding using iconv-------------------------------------------------------------------------------moduleGHC.IO.Encoding.Iconv( #if !defined(mingw32_HOST_OS) iconvEncoding ,mkIconvEncoding ,localeEncodingName #endif )where #include "MachDeps.h" #include "HsBaseConfig.h" #if defined(mingw32_HOST_OS) importGHC.Base()-- For build ordering #else importForeign importForeign.C hiding(charIsRepresentable )importData.Maybe importGHC.Base importGHC.Foreign (charIsRepresentable )importGHC.IO.Buffer importGHC.IO.Encoding.Failure importGHC.IO.Encoding.Types importGHC.List (span )importGHC.Num importGHC.Show importGHC.Real importSystem.IO.Unsafe (unsafePerformIO )importSystem.Posix.Internals c_DEBUG_DUMP ::Boolc_DEBUG_DUMP :: Bool c_DEBUG_DUMP =Bool Falseiconv_trace ::String ->IO()iconv_trace :: String -> IO () iconv_trace s :: String s |Bool c_DEBUG_DUMP =String -> IO () puts String s |Bool otherwise =() -> IO () forall (m :: * -> *) a. Monad m => a -> m a return ()-- ------------------------------------------------------------------------------- iconv encoders/decoders{-# NOINLINElocaleEncodingName #-}localeEncodingName ::String localeEncodingName :: String localeEncodingName =IO String -> String forall a. IO a -> a unsafePerformIO (IO String -> String) -> IO String -> String forall a b. (a -> b) -> a -> b $ do-- Use locale_charset() or nl_langinfo(CODESET) to get the encoding-- if we have either of them.CString cstr <-IO CString c_localeEncoding CString -> IO String peekCAString CString cstr -- Assume charset names are ASCII-- We hope iconv_t is a storable type. It should be, since it has at least the-- value -1, which is a possible return value from iconv_open.typeIConv =CLong -- ToDo: (#type iconv_t)foreignimportccallunsafe"hs_iconv_open"hs_iconv_open ::CString ->CString ->IOIConv foreignimportccallunsafe"hs_iconv_close"hs_iconv_close ::IConv ->IOCInt foreignimportccallunsafe"hs_iconv"hs_iconv ::IConv ->Ptr CString ->Ptr CSize ->Ptr CString ->Ptr CSize ->IOCSize foreignimportccallunsafe"localeEncoding"c_localeEncoding ::IOCString haskellChar ::String #if defined(WORDS_BIGENDIAN) haskellChar|charSize==2="UTF-16BE"|otherwise="UTF-32BE" #else haskellChar :: String haskellChar |Int charSize Int -> Int -> Bool forall a. Eq a => a -> a -> Bool ==2="UTF-16LE"|Bool otherwise ="UTF-32LE" #endif char_shift ::Intchar_shift :: Int char_shift |Int charSize Int -> Int -> Bool forall a. Eq a => a -> a -> Bool ==2=1|Bool otherwise =2iconvEncoding ::String ->IO(Maybe TextEncoding )iconvEncoding :: String -> IO (Maybe TextEncoding) iconvEncoding =CodingFailureMode -> String -> IO (Maybe TextEncoding) mkIconvEncoding CodingFailureMode ErrorOnCodingFailure -- | Construct an iconv-based 'TextEncoding' for the given character set and-- 'CodingFailureMode'.---- As iconv is missing in some minimal environments (e.g. #10298), this-- checks to ensure that iconv is working properly before returning the-- encoding, returning 'Nothing' if not.mkIconvEncoding ::CodingFailureMode ->String ->IO(Maybe TextEncoding )mkIconvEncoding :: CodingFailureMode -> String -> IO (Maybe TextEncoding) mkIconvEncoding cfm :: CodingFailureMode cfm charset :: String charset =doletenc :: TextEncoding enc =TextEncoding :: forall dstate estate. String -> IO (TextDecoder dstate) -> IO (TextEncoder estate) -> TextEncoding TextEncoding {textEncodingName :: String textEncodingName =String charset ,mkTextDecoder :: IO (TextDecoder ()) mkTextDecoder =String -> String -> (Buffer Word8 -> Buffer Char -> IO (Buffer Word8, Buffer Char)) -> (IConv -> Buffer Word8 -> Buffer Char -> IO (CodingProgress, Buffer Word8, Buffer Char)) -> IO (TextDecoder ()) forall a b. String -> String -> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b)) -> (IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (BufferCodec a b ()) newIConv String raw_charset (String haskellChar String -> String -> String forall a. [a] -> [a] -> [a] ++ String suffix )(CodingFailureMode -> Buffer Word8 -> Buffer Char -> IO (Buffer Word8, Buffer Char) recoverDecode CodingFailureMode cfm )IConv -> Buffer Word8 -> Buffer Char -> IO (CodingProgress, Buffer Word8, Buffer Char) iconvDecode ,mkTextEncoder :: IO (TextEncoder ()) mkTextEncoder =String -> String -> (Buffer Char -> Buffer Word8 -> IO (Buffer Char, Buffer Word8)) -> (IConv -> Buffer Char -> Buffer Word8 -> IO (CodingProgress, Buffer Char, Buffer Word8)) -> IO (TextEncoder ()) forall a b. String -> String -> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b)) -> (IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (BufferCodec a b ()) newIConv String haskellChar String charset (CodingFailureMode -> Buffer Char -> Buffer Word8 -> IO (Buffer Char, Buffer Word8) recoverEncode CodingFailureMode cfm )IConv -> Buffer Char -> Buffer Word8 -> IO (CodingProgress, Buffer Char, Buffer Word8) iconvEncode }Bool good <-TextEncoding -> Char -> IO Bool charIsRepresentable TextEncoding enc 'a'Maybe TextEncoding -> IO (Maybe TextEncoding) forall (m :: * -> *) a. Monad m => a -> m a return (Maybe TextEncoding -> IO (Maybe TextEncoding)) -> Maybe TextEncoding -> IO (Maybe TextEncoding) forall a b. (a -> b) -> a -> b $ ifBool good thenTextEncoding -> Maybe TextEncoding forall a. a -> Maybe a Just TextEncoding enc elseMaybe TextEncoding forall a. Maybe a Nothing where-- An annoying feature of GNU iconv is that the //PREFIXES only take-- effect when they appear on the tocode parameter to iconv_open:(raw_charset :: String raw_charset ,suffix :: String suffix )=(Char -> Bool) -> String -> (String, String) forall a. (a -> Bool) -> [a] -> ([a], [a]) span (Char -> Char -> Bool forall a. Eq a => a -> a -> Bool /='/')String charset newIConv ::String ->String ->(Buffer a ->Buffer b ->IO(Buffer a ,Buffer b ))->(IConv ->Buffer a ->Buffer b ->IO(CodingProgress ,Buffer a ,Buffer b ))->IO(BufferCodec a b ())newIConv :: String -> String -> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b)) -> (IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (BufferCodec a b ()) newIConv from :: String from to :: String to rec :: Buffer a -> Buffer b -> IO (Buffer a, Buffer b) rec fn :: IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b) fn =-- Assume charset names are ASCIIString -> (CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ()) forall a. String -> (CString -> IO a) -> IO a withCAString String from ((CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ())) -> (CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ()) forall a b. (a -> b) -> a -> b $ \from_str :: CString from_str ->String -> (CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ()) forall a. String -> (CString -> IO a) -> IO a withCAString String to ((CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ())) -> (CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ()) forall a b. (a -> b) -> a -> b $ \to_str :: CString to_str ->doIConv iconvt <-String -> IO IConv -> IO IConv forall a. (Eq a, Num a) => String -> IO a -> IO a throwErrnoIfMinus1 "mkTextEncoding"(IO IConv -> IO IConv) -> IO IConv -> IO IConv forall a b. (a -> b) -> a -> b $ CString -> CString -> IO IConv hs_iconv_open CString to_str CString from_str leticlose :: IO () iclose =String -> IO CInt -> IO () forall a. (Eq a, Num a) => String -> IO a -> IO () throwErrnoIfMinus1_ "Iconv.close"(IO CInt -> IO ()) -> IO CInt -> IO () forall a b. (a -> b) -> a -> b $ IConv -> IO CInt hs_iconv_close IConv iconvt BufferCodec a b () -> IO (BufferCodec a b ()) forall (m :: * -> *) a. Monad m => a -> m a return BufferCodec :: forall from to state. CodeBuffer from to -> (Buffer from -> Buffer to -> IO (Buffer from, Buffer to)) -> IO () -> IO state -> (state -> IO ()) -> BufferCodec from to state BufferCodec {encode :: Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b) encode =IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b) fn IConv iconvt ,recover :: Buffer a -> Buffer b -> IO (Buffer a, Buffer b) recover =Buffer a -> Buffer b -> IO (Buffer a, Buffer b) rec ,close :: IO () close =IO () iclose ,-- iconv doesn't supply a way to save/restore the stategetState :: IO () getState =() -> IO () forall (m :: * -> *) a. Monad m => a -> m a return (),setState :: () -> IO () setState =IO () -> () -> IO () forall a b. a -> b -> a const (IO () -> () -> IO ()) -> IO () -> () -> IO () forall a b. (a -> b) -> a -> b $ () -> IO () forall (m :: * -> *) a. Monad m => a -> m a return ()}iconvDecode ::IConv ->DecodeBuffer iconvDecode :: IConv -> Buffer Word8 -> Buffer Char -> IO (CodingProgress, Buffer Word8, Buffer Char) iconvDecode iconv_t :: IConv iconv_t ibuf :: Buffer Word8 ibuf obuf :: Buffer Char obuf =IConv -> Buffer Word8 -> Int -> Buffer Char -> Int -> IO (CodingProgress, Buffer Word8, Buffer Char) forall a b. IConv -> Buffer a -> Int -> Buffer b -> Int -> IO (CodingProgress, Buffer a, Buffer b) iconvRecode IConv iconv_t Buffer Word8 ibuf 0Buffer Char obuf Int char_shift iconvEncode ::IConv ->EncodeBuffer iconvEncode :: IConv -> Buffer Char -> Buffer Word8 -> IO (CodingProgress, Buffer Char, Buffer Word8) iconvEncode iconv_t :: IConv iconv_t ibuf :: Buffer Char ibuf obuf :: Buffer Word8 obuf =IConv -> Buffer Char -> Int -> Buffer Word8 -> Int -> IO (CodingProgress, Buffer Char, Buffer Word8) forall a b. IConv -> Buffer a -> Int -> Buffer b -> Int -> IO (CodingProgress, Buffer a, Buffer b) iconvRecode IConv iconv_t Buffer Char ibuf Int char_shift Buffer Word8 obuf 0iconvRecode ::IConv ->Buffer a ->Int->Buffer b ->Int->IO(CodingProgress ,Buffer a ,Buffer b )iconvRecode :: IConv -> Buffer a -> Int -> Buffer b -> Int -> IO (CodingProgress, Buffer a, Buffer b) iconvRecode iconv_t :: IConv iconv_t input :: Buffer a input @Buffer {bufRaw :: forall e. Buffer e -> RawBuffer e bufRaw =RawBuffer a iraw ,bufL :: forall e. Buffer e -> Int bufL =Int ir ,bufR :: forall e. Buffer e -> Int bufR =Int iw ,bufSize :: forall e. Buffer e -> Int bufSize =Int _}iscale :: Int iscale output :: Buffer b output @Buffer {bufRaw :: forall e. Buffer e -> RawBuffer e bufRaw =RawBuffer b oraw ,bufL :: forall e. Buffer e -> Int bufL =Int _,bufR :: forall e. Buffer e -> Int bufR =Int ow ,bufSize :: forall e. Buffer e -> Int bufSize =Int os }oscale :: Int oscale =doString -> IO () iconv_trace ("haskellChar="String -> String -> String forall a. [a] -> [a] -> [a] ++ String -> String forall a. Show a => a -> String show String haskellChar )String -> IO () iconv_trace ("iconvRecode before, input="String -> String -> String forall a. [a] -> [a] -> [a] ++ String -> String forall a. Show a => a -> String show (Buffer a -> String forall a. Buffer a -> String summaryBuffer Buffer a input ))String -> IO () iconv_trace ("iconvRecode before, output="String -> String -> String forall a. [a] -> [a] -> [a] ++ String -> String forall a. Show a => a -> String show (Buffer b -> String forall a. Buffer a -> String summaryBuffer Buffer b output ))RawBuffer a -> (Ptr a -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall e a. RawBuffer e -> (Ptr e -> IO a) -> IO a withRawBuffer RawBuffer a iraw ((Ptr a -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b)) -> (Ptr a -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. (a -> b) -> a -> b $ \piraw :: Ptr a piraw ->doRawBuffer b -> (Ptr b -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall e a. RawBuffer e -> (Ptr e -> IO a) -> IO a withRawBuffer RawBuffer b oraw ((Ptr b -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b)) -> (Ptr b -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. (a -> b) -> a -> b $ \poraw :: Ptr b poraw ->doCString -> (Ptr CString -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b with (Ptr a piraw Ptr a -> Int -> CString forall a b. Ptr a -> Int -> Ptr b `plusPtr` (Int ir Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftL` Int iscale ))((Ptr CString -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b)) -> (Ptr CString -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. (a -> b) -> a -> b $ \p_inbuf :: Ptr CString p_inbuf ->doCString -> (Ptr CString -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b with (Ptr b poraw Ptr b -> Int -> CString forall a b. Ptr a -> Int -> Ptr b `plusPtr` (Int ow Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftL` Int oscale ))((Ptr CString -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b)) -> (Ptr CString -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. (a -> b) -> a -> b $ \p_outbuf :: Ptr CString p_outbuf ->doCSize -> (Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b with (Int -> CSize forall a b. (Integral a, Num b) => a -> b fromIntegral ((Int iw Int -> Int -> Int forall a. Num a => a -> a -> a - Int ir )Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftL` Int iscale ))((Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b)) -> (Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. (a -> b) -> a -> b $ \p_inleft :: Ptr CSize p_inleft ->doCSize -> (Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b with (Int -> CSize forall a b. (Integral a, Num b) => a -> b fromIntegral ((Int os Int -> Int -> Int forall a. Num a => a -> a -> a - Int ow )Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftL` Int oscale ))((Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b)) -> (Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. (a -> b) -> a -> b $ \p_outleft :: Ptr CSize p_outleft ->doCSize res <-IConv -> Ptr CString -> Ptr CSize -> Ptr CString -> Ptr CSize -> IO CSize hs_iconv IConv iconv_t Ptr CString p_inbuf Ptr CSize p_inleft Ptr CString p_outbuf Ptr CSize p_outleft CSize new_inleft <-Ptr CSize -> IO CSize forall a. Storable a => Ptr a -> IO a peek Ptr CSize p_inleft CSize new_outleft <-Ptr CSize -> IO CSize forall a. Storable a => Ptr a -> IO a peek Ptr CSize p_outleft letnew_inleft' :: Int new_inleft' =CSize -> Int forall a b. (Integral a, Num b) => a -> b fromIntegral CSize new_inleft Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftR` Int iscale new_outleft' :: Int new_outleft' =CSize -> Int forall a b. (Integral a, Num b) => a -> b fromIntegral CSize new_outleft Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftR` Int oscale new_input :: Buffer a new_input |CSize new_inleft CSize -> CSize -> Bool forall a. Eq a => a -> a -> Bool ==0=Buffer a input {bufL :: Int bufL =0,bufR :: Int bufR =0}|Bool otherwise =Buffer a input {bufL :: Int bufL =Int iw Int -> Int -> Int forall a. Num a => a -> a -> a - Int new_inleft' }new_output :: Buffer b new_output =Buffer b output {bufR :: Int bufR =Int os Int -> Int -> Int forall a. Num a => a -> a -> a - Int new_outleft' }String -> IO () iconv_trace ("iconv res="String -> String -> String forall a. [a] -> [a] -> [a] ++ CSize -> String forall a. Show a => a -> String show CSize res )String -> IO () iconv_trace ("iconvRecode after, input="String -> String -> String forall a. [a] -> [a] -> [a] ++ String -> String forall a. Show a => a -> String show (Buffer a -> String forall a. Buffer a -> String summaryBuffer Buffer a new_input ))String -> IO () iconv_trace ("iconvRecode after, output="String -> String -> String forall a. [a] -> [a] -> [a] ++ String -> String forall a. Show a => a -> String show (Buffer b -> String forall a. Buffer a -> String summaryBuffer Buffer b new_output ))if(CSize res CSize -> CSize -> Bool forall a. Eq a => a -> a -> Bool /=-1)thendo-- all input translated(CodingProgress, Buffer a, Buffer b) -> IO (CodingProgress, Buffer a, Buffer b) forall (m :: * -> *) a. Monad m => a -> m a return (CodingProgress InputUnderflow ,Buffer a new_input ,Buffer b new_output )elsedoErrno errno <-IO Errno getErrno caseErrno errno ofe :: Errno e |Errno e Errno -> Errno -> Bool forall a. Eq a => a -> a -> Bool ==Errno e2BIG ->(CodingProgress, Buffer a, Buffer b) -> IO (CodingProgress, Buffer a, Buffer b) forall (m :: * -> *) a. Monad m => a -> m a return (CodingProgress OutputUnderflow ,Buffer a new_input ,Buffer b new_output )|Errno e Errno -> Errno -> Bool forall a. Eq a => a -> a -> Bool ==Errno eINVAL ->(CodingProgress, Buffer a, Buffer b) -> IO (CodingProgress, Buffer a, Buffer b) forall (m :: * -> *) a. Monad m => a -> m a return (CodingProgress InputUnderflow ,Buffer a new_input ,Buffer b new_output )-- Sometimes iconv reports EILSEQ for a-- character in the input even when there is no room-- in the output; in this case we might be about to-- change the encoding anyway, so the following bytes-- could very well be in a different encoding.---- Because we can only say InvalidSequence if there is at least-- one element left in the output, we have to special case this.|Errno e Errno -> Errno -> Bool forall a. Eq a => a -> a -> Bool ==Errno eILSEQ ->(CodingProgress, Buffer a, Buffer b) -> IO (CodingProgress, Buffer a, Buffer b) forall (m :: * -> *) a. Monad m => a -> m a return (ifInt new_outleft' Int -> Int -> Bool forall a. Eq a => a -> a -> Bool ==0thenCodingProgress OutputUnderflow elseCodingProgress InvalidSequence ,Buffer a new_input ,Buffer b new_output )|Bool otherwise ->doString -> IO () iconv_trace ("iconv returned error: "String -> String -> String forall a. [a] -> [a] -> [a] ++ IOError -> String forall a. Show a => a -> String show (String -> Errno -> Maybe Handle -> Maybe String -> IOError errnoToIOError "iconv"Errno e Maybe Handle forall a. Maybe a Nothing Maybe String forall a. Maybe a Nothing ))String -> IO (CodingProgress, Buffer a, Buffer b) forall a. String -> IO a throwErrno "iconvRecoder" #endif /* !mingw32_HOST_OS */