{-# LANGUAGE Trustworthy #-}{-# LANGUAGE CPP , NoImplicitPrelude , NondecreasingIndentation #-}{-# OPTIONS_HADDOCK hide #-}------------------------------------------------------------------------------- |-- Module : GHC.IO.Encoding.Iconv-- Copyright : (c) The University of Glasgow, 2008-2009-- License : see libraries/base/LICENSE---- Maintainer : libraries@haskell.org-- Stability : internal-- Portability : non-portable---- This module provides text encoding/decoding using iconv-------------------------------------------------------------------------------moduleGHC.IO.Encoding.Iconv(#if !defined(mingw32_HOST_OS) iconvEncoding ,mkIconvEncoding ,localeEncodingName #endif )where#include "MachDeps.h" #include "HsBaseConfig.h" #if defined(mingw32_HOST_OS) importGHC.Base()-- For build ordering#else importForeign importForeign.C hiding(charIsRepresentable )importData.Maybe importGHC.Base importGHC.Foreign (charIsRepresentable )importGHC.IO.Buffer importGHC.IO.Encoding.Failure importGHC.IO.Encoding.Types importGHC.List (span )importGHC.Num importGHC.Show importGHC.Real importSystem.IO.Unsafe (unsafePerformIO )importSystem.Posix.Internals c_DEBUG_DUMP::Boolc_DEBUG_DUMP =Falseiconv_trace::String ->IO()iconv_trace s |c_DEBUG_DUMP =puts s |otherwise =return ()-- ------------------------------------------------------------------------------- iconv encoders/decoders{-# NOINLINElocaleEncodingName#-}localeEncodingName::String localeEncodingName =unsafePerformIO $ do-- Use locale_charset() or nl_langinfo(CODESET) to get the encoding-- if we have either of them.cstr <-c_localeEncoding peekCAString cstr -- Assume charset names are ASCII-- We hope iconv_t is a storable type. It should be, since it has at least the-- value -1, which is a possible return value from iconv_open.typeIConv =CLong -- ToDo: (#type iconv_t)foreignimportccallunsafe"hs_iconv_open"hs_iconv_open::CString ->CString ->IOIConv foreignimportccallunsafe"hs_iconv_close"hs_iconv_close::IConv ->IOCInt foreignimportccallunsafe"hs_iconv"hs_iconv::IConv ->Ptr CString ->Ptr CSize ->Ptr CString ->Ptr CSize ->IOCSize foreignimportccallunsafe"localeEncoding"c_localeEncoding::IOCString haskellChar::String #if defined(WORDS_BIGENDIAN) haskellChar|charSize==2="UTF-16BE"|otherwise="UTF-32BE"#else haskellChar |charSize ==2="UTF-16LE"|otherwise ="UTF-32LE"#endif char_shift::Intchar_shift |charSize ==2=1|otherwise =2iconvEncoding::String ->IO(Maybe TextEncoding )iconvEncoding =mkIconvEncoding ErrorOnCodingFailure -- | Construct an iconv-based 'TextEncoding' for the given character set and-- 'CodingFailureMode'.---- As iconv is missing in some minimal environments (e.g. #10298), this-- checks to ensure that iconv is working properly before returning the-- encoding, returning 'Nothing' if not.mkIconvEncoding::CodingFailureMode ->String ->IO(Maybe TextEncoding )mkIconvEncoding cfm charset =doletenc =TextEncoding {textEncodingName=charset ,mkTextDecoder=newIConv raw_charset (haskellChar ++ suffix )(recoverDecode cfm )iconvDecode ,mkTextEncoder=newIConv haskellChar charset (recoverEncode cfm )iconvEncode }good <-charIsRepresentable enc 'a'return $ ifgood thenJust enc elseNothing where-- An annoying feature of GNU iconv is that the //PREFIXES only take-- effect when they appear on the tocode parameter to iconv_open:(raw_charset ,suffix )=span (/='/')charset newIConv::String ->String ->(Buffer a ->Buffer b ->IO(Buffer a ,Buffer b ))->(IConv ->Buffer a ->Buffer b ->IO(CodingProgress ,Buffer a ,Buffer b ))->IO(BufferCodec a b ())newIConv from to rec fn =-- Assume charset names are ASCIIwithCAString from $ \from_str ->withCAString to $ \to_str ->doiconvt <-throwErrnoIfMinus1 "mkTextEncoding"$ hs_iconv_open to_str from_str leticlose =throwErrnoIfMinus1_ "Iconv.close"$ hs_iconv_close iconvt return BufferCodec {encode=fn iconvt ,recover=rec ,close=iclose ,-- iconv doesn't supply a way to save/restore the stategetState=return (),setState=const $ return ()}iconvDecode::IConv ->DecodeBuffer iconvDecode iconv_t ibuf obuf =iconvRecode iconv_t ibuf 0obuf char_shift iconvEncode::IConv ->EncodeBuffer iconvEncode iconv_t ibuf obuf =iconvRecode iconv_t ibuf char_shift obuf 0iconvRecode::IConv ->Buffer a ->Int->Buffer b ->Int->IO(CodingProgress ,Buffer a ,Buffer b )iconvRecode iconv_t input @Buffer {bufRaw=iraw ,bufL=ir ,bufR=iw ,bufSize=_}iscale output @Buffer {bufRaw=oraw ,bufL=_,bufR=ow ,bufSize=os }oscale =doiconv_trace ("haskellChar="++ show haskellChar )iconv_trace ("iconvRecode before, input="++ show (summaryBuffer input ))iconv_trace ("iconvRecode before, output="++ show (summaryBuffer output ))withRawBuffer iraw $ \piraw ->dowithRawBuffer oraw $ \poraw ->dowith (piraw `plusPtr `(ir `shiftL `iscale ))$ \p_inbuf ->dowith (poraw `plusPtr `(ow `shiftL `oscale ))$ \p_outbuf ->dowith (fromIntegral ((iw -ir )`shiftL `iscale ))$ \p_inleft ->dowith (fromIntegral ((os -ow )`shiftL `oscale ))$ \p_outleft ->dores <-hs_iconv iconv_t p_inbuf p_inleft p_outbuf p_outleft new_inleft <-peek p_inleft new_outleft <-peek p_outleft letnew_inleft' =fromIntegral new_inleft `shiftR `iscale new_outleft' =fromIntegral new_outleft `shiftR `oscale new_input |new_inleft ==0=input {bufL=0,bufR=0}|otherwise =input {bufL=iw -new_inleft' }new_output =output {bufR=os -new_outleft' }iconv_trace ("iconv res="++ show res )iconv_trace ("iconvRecode after, input="++ show (summaryBuffer new_input ))iconv_trace ("iconvRecode after, output="++ show (summaryBuffer new_output ))if(res /=-1)thendo-- all input translatedreturn (InputUnderflow ,new_input ,new_output )elsedoerrno <-getErrno caseerrno ofe |e ==e2BIG ->return (OutputUnderflow ,new_input ,new_output )|e ==eINVAL ->return (InputUnderflow ,new_input ,new_output )-- Sometimes iconv reports EILSEQ for a-- character in the input even when there is no room-- in the output; in this case we might be about to-- change the encoding anyway, so the following bytes-- could very well be in a different encoding.---- Because we can only say InvalidSequence if there is at least-- one element left in the output, we have to special case this.|e ==eILSEQ ->return (ifnew_outleft' ==0thenOutputUnderflow elseInvalidSequence ,new_input ,new_output )|otherwise ->doiconv_trace ("iconv returned error: "++ show (errnoToIOError "iconv"e Nothing Nothing ))throwErrno "iconvRecoder"#endif /* !mingw32_HOST_OS */