{-# LANGUAGE Trustworthy #-}{-# LANGUAGE CPP , NoImplicitPrelude , NondecreasingIndentation #-}{-# OPTIONS_HADDOCK not-home #-}------------------------------------------------------------------------------- |-- Module : GHC.IO.Encoding.Iconv-- Copyright : (c) The University of Glasgow, 2008-2009-- License : see libraries/base/LICENSE---- Maintainer : libraries@haskell.org-- Stability : internal-- Portability : non-portable---- This module provides text encoding/decoding using iconv-------------------------------------------------------------------------------moduleGHC.IO.Encoding.Iconv( #if !defined(mingw32_HOST_OS) iconvEncoding ,mkIconvEncoding ,localeEncodingName #endif )where #include "MachDeps.h" #include "HsBaseConfig.h" #if defined(mingw32_HOST_OS) importGHC.Base()-- For build ordering #else importForeign importForeign.C hiding(charIsRepresentable )importData.Maybe importGHC.Base importGHC.Foreign (charIsRepresentable )importGHC.IO.Buffer importGHC.IO.Encoding.Failure importGHC.IO.Encoding.Types importGHC.List (span )importGHC.Num importGHC.Show importGHC.Real importSystem.IO.Unsafe (unsafePerformIO )importSystem.Posix.Internals c_DEBUG_DUMP ::Bool c_DEBUG_DUMP :: Bool c_DEBUG_DUMP =Bool False iconv_trace ::String ->IO ()iconv_trace :: String -> IO () iconv_trace String s |Bool c_DEBUG_DUMP =String -> IO () puts String s |Bool otherwise =() -> IO () forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return ()-- ------------------------------------------------------------------------------- iconv encoders/decoders{-# NOINLINElocaleEncodingName #-}localeEncodingName ::String localeEncodingName :: String localeEncodingName =IO String -> String forall a. IO a -> a unsafePerformIO (IO String -> String) -> IO String -> String forall a b. (a -> b) -> a -> b $ do-- Use locale_charset() or nl_langinfo(CODESET) to get the encoding-- if we have either of them.CString cstr <-IO CString c_localeEncoding CString -> IO String peekCAString CString cstr -- Assume charset names are ASCII-- We hope iconv_t is a storable type. It should be, since it has at least the-- value -1, which is a possible return value from iconv_open.typeIConv =CLong -- ToDo: (#type iconv_t)foreignimportccallunsafe"hs_iconv_open"hs_iconv_open ::CString ->CString ->IO IConv foreignimportccallunsafe"hs_iconv_close"hs_iconv_close ::IConv ->IO CInt foreignimportccallunsafe"hs_iconv"hs_iconv ::IConv ->Ptr CString ->Ptr CSize ->Ptr CString ->Ptr CSize ->IO CSize foreignimportccallunsafe"localeEncoding"c_localeEncoding ::IO CString haskellChar ::String #if defined(WORDS_BIGENDIAN) haskellChar|charSize==2="UTF-16BE"|otherwise="UTF-32BE" #else haskellChar :: String haskellChar |Int charSize Int -> Int -> Bool forall a. Eq a => a -> a -> Bool == Int 2=String "UTF-16LE"|Bool otherwise =String "UTF-32LE" #endif char_shift ::Int char_shift :: Int char_shift |Int charSize Int -> Int -> Bool forall a. Eq a => a -> a -> Bool == Int 2=Int 1|Bool otherwise =Int 2iconvEncoding ::String ->IO (Maybe TextEncoding )iconvEncoding :: String -> IO (Maybe TextEncoding) iconvEncoding =CodingFailureMode -> String -> IO (Maybe TextEncoding) mkIconvEncoding CodingFailureMode ErrorOnCodingFailure -- | Construct an iconv-based 'TextEncoding' for the given character set and-- 'CodingFailureMode'.---- As iconv is missing in some minimal environments (e.g. #10298), this-- checks to ensure that iconv is working properly before returning the-- encoding, returning 'Nothing' if not.mkIconvEncoding ::CodingFailureMode ->String ->IO (Maybe TextEncoding )mkIconvEncoding :: CodingFailureMode -> String -> IO (Maybe TextEncoding) mkIconvEncoding CodingFailureMode cfm String charset =doletenc :: TextEncoding enc =TextEncoding {textEncodingName :: String textEncodingName =String charset ,mkTextDecoder :: IO (TextDecoder ()) mkTextDecoder =String -> String -> (Buffer Word8 -> Buffer CharBufElem -> IO (Buffer Word8, Buffer CharBufElem)) -> (IConv -> Buffer Word8 -> Buffer CharBufElem -> IO (CodingProgress, Buffer Word8, Buffer CharBufElem)) -> IO (TextDecoder ()) forall a b. String -> String -> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b)) -> (IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (BufferCodec a b ()) newIConv String raw_charset (String haskellChar String -> String -> String forall a. [a] -> [a] -> [a] ++ String suffix )(CodingFailureMode -> Buffer Word8 -> Buffer CharBufElem -> IO (Buffer Word8, Buffer CharBufElem) recoverDecode CodingFailureMode cfm )IConv -> Buffer Word8 -> Buffer CharBufElem -> IO (CodingProgress, Buffer Word8, Buffer CharBufElem) iconvDecode ,mkTextEncoder :: IO (TextEncoder ()) mkTextEncoder =String -> String -> (Buffer CharBufElem -> Buffer Word8 -> IO (Buffer CharBufElem, Buffer Word8)) -> (IConv -> Buffer CharBufElem -> Buffer Word8 -> IO (CodingProgress, Buffer CharBufElem, Buffer Word8)) -> IO (TextEncoder ()) forall a b. String -> String -> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b)) -> (IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (BufferCodec a b ()) newIConv String haskellChar String charset (CodingFailureMode -> Buffer CharBufElem -> Buffer Word8 -> IO (Buffer CharBufElem, Buffer Word8) recoverEncode CodingFailureMode cfm )IConv -> Buffer CharBufElem -> Buffer Word8 -> IO (CodingProgress, Buffer CharBufElem, Buffer Word8) iconvEncode }Bool good <-TextEncoding -> CharBufElem -> IO Bool charIsRepresentable TextEncoding enc CharBufElem 'a'Maybe TextEncoding -> IO (Maybe TextEncoding) forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (Maybe TextEncoding -> IO (Maybe TextEncoding)) -> Maybe TextEncoding -> IO (Maybe TextEncoding) forall a b. (a -> b) -> a -> b $ ifBool good thenTextEncoding -> Maybe TextEncoding forall a. a -> Maybe a Just TextEncoding enc elseMaybe TextEncoding forall a. Maybe a Nothing where-- An annoying feature of GNU iconv is that the //PREFIXES only take-- effect when they appear on the tocode parameter to iconv_open:(String raw_charset ,String suffix )=(CharBufElem -> Bool) -> String -> (String, String) forall a. (a -> Bool) -> [a] -> ([a], [a]) span (CharBufElem -> CharBufElem -> Bool forall a. Eq a => a -> a -> Bool /= CharBufElem '/')String charset newIConv ::String ->String ->(Buffer a ->Buffer b ->IO (Buffer a ,Buffer b ))->(IConv ->Buffer a ->Buffer b ->IO (CodingProgress ,Buffer a ,Buffer b ))->IO (BufferCodec a b ())newIConv :: forall a b. String -> String -> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b)) -> (IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (BufferCodec a b ()) newIConv String from String to Buffer a -> Buffer b -> IO (Buffer a, Buffer b) rec IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b) fn =-- Assume charset names are ASCIIString -> (CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ()) forall a. String -> (CString -> IO a) -> IO a withCAString String from ((CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ())) -> (CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ()) forall a b. (a -> b) -> a -> b $ \CString from_str ->String -> (CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ()) forall a. String -> (CString -> IO a) -> IO a withCAString String to ((CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ())) -> (CString -> IO (BufferCodec a b ())) -> IO (BufferCodec a b ()) forall a b. (a -> b) -> a -> b $ \CString to_str ->doIConv iconvt <-String -> IO IConv -> IO IConv forall a. (Eq a, Num a) => String -> IO a -> IO a throwErrnoIfMinus1 String "mkTextEncoding"(IO IConv -> IO IConv) -> IO IConv -> IO IConv forall a b. (a -> b) -> a -> b $ CString -> CString -> IO IConv hs_iconv_open CString to_str CString from_str leticlose :: IO () iclose =String -> IO CInt -> IO () forall a. (Eq a, Num a) => String -> IO a -> IO () throwErrnoIfMinus1_ String "Iconv.close"(IO CInt -> IO ()) -> IO CInt -> IO () forall a b. (a -> b) -> a -> b $ IConv -> IO CInt hs_iconv_close IConv iconvt BufferCodec a b () -> IO (BufferCodec a b ()) forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return BufferCodec {encode :: Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b) encode =IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b) fn IConv iconvt ,recover :: Buffer a -> Buffer b -> IO (Buffer a, Buffer b) recover =Buffer a -> Buffer b -> IO (Buffer a, Buffer b) rec ,close :: IO () close =IO () iclose ,-- iconv doesn't supply a way to save/restore the stategetState :: IO () getState =() -> IO () forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (),setState :: () -> IO () setState =IO () -> () -> IO () forall a b. a -> b -> a const (IO () -> () -> IO ()) -> IO () -> () -> IO () forall a b. (a -> b) -> a -> b $ () -> IO () forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return ()}iconvDecode ::IConv ->DecodeBuffer iconvDecode :: IConv -> Buffer Word8 -> Buffer CharBufElem -> IO (CodingProgress, Buffer Word8, Buffer CharBufElem) iconvDecode IConv iconv_t Buffer Word8 ibuf Buffer CharBufElem obuf =IConv -> Buffer Word8 -> Int -> Buffer CharBufElem -> Int -> IO (CodingProgress, Buffer Word8, Buffer CharBufElem) forall a b. IConv -> Buffer a -> Int -> Buffer b -> Int -> IO (CodingProgress, Buffer a, Buffer b) iconvRecode IConv iconv_t Buffer Word8 ibuf Int 0Buffer CharBufElem obuf Int char_shift iconvEncode ::IConv ->EncodeBuffer iconvEncode :: IConv -> Buffer CharBufElem -> Buffer Word8 -> IO (CodingProgress, Buffer CharBufElem, Buffer Word8) iconvEncode IConv iconv_t Buffer CharBufElem ibuf Buffer Word8 obuf =IConv -> Buffer CharBufElem -> Int -> Buffer Word8 -> Int -> IO (CodingProgress, Buffer CharBufElem, Buffer Word8) forall a b. IConv -> Buffer a -> Int -> Buffer b -> Int -> IO (CodingProgress, Buffer a, Buffer b) iconvRecode IConv iconv_t Buffer CharBufElem ibuf Int char_shift Buffer Word8 obuf Int 0iconvRecode ::IConv ->Buffer a ->Int ->Buffer b ->Int ->IO (CodingProgress ,Buffer a ,Buffer b )iconvRecode :: forall a b. IConv -> Buffer a -> Int -> Buffer b -> Int -> IO (CodingProgress, Buffer a, Buffer b) iconvRecode IConv iconv_t input :: Buffer a input @Buffer {bufRaw :: forall e. Buffer e -> RawBuffer e bufRaw =RawBuffer a iraw ,bufL :: forall e. Buffer e -> Int bufL =Int ir ,bufR :: forall e. Buffer e -> Int bufR =Int iw ,bufSize :: forall e. Buffer e -> Int bufSize =Int _}Int iscale output :: Buffer b output @Buffer {bufRaw :: forall e. Buffer e -> RawBuffer e bufRaw =RawBuffer b oraw ,bufL :: forall e. Buffer e -> Int bufL =Int _,bufR :: forall e. Buffer e -> Int bufR =Int ow ,bufSize :: forall e. Buffer e -> Int bufSize =Int os }Int oscale =doString -> IO () iconv_trace (String "haskellChar="String -> String -> String forall a. [a] -> [a] -> [a] ++ String -> String forall a. Show a => a -> String show String haskellChar )String -> IO () iconv_trace (String "iconvRecode before, input="String -> String -> String forall a. [a] -> [a] -> [a] ++ String -> String forall a. Show a => a -> String show (Buffer a -> String forall a. Buffer a -> String summaryBuffer Buffer a input ))String -> IO () iconv_trace (String "iconvRecode before, output="String -> String -> String forall a. [a] -> [a] -> [a] ++ String -> String forall a. Show a => a -> String show (Buffer b -> String forall a. Buffer a -> String summaryBuffer Buffer b output ))RawBuffer a -> (Ptr a -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall e a. RawBuffer e -> (Ptr e -> IO a) -> IO a withRawBuffer RawBuffer a iraw ((Ptr a -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b)) -> (Ptr a -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. (a -> b) -> a -> b $ \Ptr a piraw ->doRawBuffer b -> (Ptr b -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall e a. RawBuffer e -> (Ptr e -> IO a) -> IO a withRawBuffer RawBuffer b oraw ((Ptr b -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b)) -> (Ptr b -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. (a -> b) -> a -> b $ \Ptr b poraw ->doCString -> (Ptr CString -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b with (Ptr a piraw Ptr a -> Int -> CString forall a b. Ptr a -> Int -> Ptr b `plusPtr` (Int ir Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftL` Int iscale ))((Ptr CString -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b)) -> (Ptr CString -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. (a -> b) -> a -> b $ \Ptr CString p_inbuf ->doCString -> (Ptr CString -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b with (Ptr b poraw Ptr b -> Int -> CString forall a b. Ptr a -> Int -> Ptr b `plusPtr` (Int ow Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftL` Int oscale ))((Ptr CString -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b)) -> (Ptr CString -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. (a -> b) -> a -> b $ \Ptr CString p_outbuf ->doCSize -> (Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b with (Int -> CSize forall a b. (Integral a, Num b) => a -> b fromIntegral ((Int iw Int -> Int -> Int forall a. Num a => a -> a -> a - Int ir )Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftL` Int iscale ))((Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b)) -> (Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. (a -> b) -> a -> b $ \Ptr CSize p_inleft ->doCSize -> (Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b with (Int -> CSize forall a b. (Integral a, Num b) => a -> b fromIntegral ((Int os Int -> Int -> Int forall a. Num a => a -> a -> a - Int ow )Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftL` Int oscale ))((Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b)) -> (Ptr CSize -> IO (CodingProgress, Buffer a, Buffer b)) -> IO (CodingProgress, Buffer a, Buffer b) forall a b. (a -> b) -> a -> b $ \Ptr CSize p_outleft ->doCSize res <-IConv -> Ptr CString -> Ptr CSize -> Ptr CString -> Ptr CSize -> IO CSize hs_iconv IConv iconv_t Ptr CString p_inbuf Ptr CSize p_inleft Ptr CString p_outbuf Ptr CSize p_outleft CSize new_inleft <-Ptr CSize -> IO CSize forall a. Storable a => Ptr a -> IO a peek Ptr CSize p_inleft CSize new_outleft <-Ptr CSize -> IO CSize forall a. Storable a => Ptr a -> IO a peek Ptr CSize p_outleft letnew_inleft' :: Int new_inleft' =CSize -> Int forall a b. (Integral a, Num b) => a -> b fromIntegral CSize new_inleft Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftR` Int iscale new_outleft' :: Int new_outleft' =CSize -> Int forall a b. (Integral a, Num b) => a -> b fromIntegral CSize new_outleft Int -> Int -> Int forall a. Bits a => a -> Int -> a `shiftR` Int oscale new_input :: Buffer a new_input |CSize new_inleft CSize -> CSize -> Bool forall a. Eq a => a -> a -> Bool == CSize 0=Buffer a input {bufL :: Int bufL =Int 0,bufR :: Int bufR =Int 0}|Bool otherwise =Buffer a input {bufL :: Int bufL =Int iw Int -> Int -> Int forall a. Num a => a -> a -> a - Int new_inleft' }new_output :: Buffer b new_output =Buffer b output {bufR :: Int bufR =Int os Int -> Int -> Int forall a. Num a => a -> a -> a - Int new_outleft' }String -> IO () iconv_trace (String "iconv res="String -> String -> String forall a. [a] -> [a] -> [a] ++ CSize -> String forall a. Show a => a -> String show CSize res )String -> IO () iconv_trace (String "iconvRecode after, input="String -> String -> String forall a. [a] -> [a] -> [a] ++ String -> String forall a. Show a => a -> String show (Buffer a -> String forall a. Buffer a -> String summaryBuffer Buffer a new_input ))String -> IO () iconv_trace (String "iconvRecode after, output="String -> String -> String forall a. [a] -> [a] -> [a] ++ String -> String forall a. Show a => a -> String show (Buffer b -> String forall a. Buffer a -> String summaryBuffer Buffer b new_output ))if(CSize res CSize -> CSize -> Bool forall a. Eq a => a -> a -> Bool /= -CSize 1)then-- all input translated(CodingProgress, Buffer a, Buffer b) -> IO (CodingProgress, Buffer a, Buffer b) forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (CodingProgress InputUnderflow ,Buffer a new_input ,Buffer b new_output )elsedoErrno errno <-IO Errno getErrno caseErrno errno ofErrno e |Errno e Errno -> Errno -> Bool forall a. Eq a => a -> a -> Bool == Errno e2BIG ->(CodingProgress, Buffer a, Buffer b) -> IO (CodingProgress, Buffer a, Buffer b) forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (CodingProgress OutputUnderflow ,Buffer a new_input ,Buffer b new_output )|Errno e Errno -> Errno -> Bool forall a. Eq a => a -> a -> Bool == Errno eINVAL ->(CodingProgress, Buffer a, Buffer b) -> IO (CodingProgress, Buffer a, Buffer b) forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (CodingProgress InputUnderflow ,Buffer a new_input ,Buffer b new_output )-- Sometimes iconv reports EILSEQ for a-- character in the input even when there is no room-- in the output; in this case we might be about to-- change the encoding anyway, so the following bytes-- could very well be in a different encoding.---- Because we can only say InvalidSequence if there is at least-- one element left in the output, we have to special case this.|Errno e Errno -> Errno -> Bool forall a. Eq a => a -> a -> Bool == Errno eILSEQ ->(CodingProgress, Buffer a, Buffer b) -> IO (CodingProgress, Buffer a, Buffer b) forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (ifInt new_outleft' Int -> Int -> Bool forall a. Eq a => a -> a -> Bool == Int 0thenCodingProgress OutputUnderflow elseCodingProgress InvalidSequence ,Buffer a new_input ,Buffer b new_output )|Bool otherwise ->doString -> IO () iconv_trace (String "iconv returned error: "String -> String -> String forall a. [a] -> [a] -> [a] ++ IOError -> String forall a. Show a => a -> String show (String -> Errno -> Maybe Handle -> Maybe String -> IOError errnoToIOError String "iconv"Errno e Maybe Handle forall a. Maybe a Nothing Maybe String forall a. Maybe a Nothing ))String -> IO (CodingProgress, Buffer a, Buffer b) forall a. String -> IO a throwErrno String "iconvRecoder" #endif /* !mingw32_HOST_OS */