GHC/IO/Encoding/Iconv.hs

{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE CPP
 , NoImplicitPrelude
 , ForeignFunctionInterface
 , NondecreasingIndentation
 #-}

-----------------------------------------------------------------------------
-- |
-- Module : GHC.IO.Encoding.Iconv
-- Copyright : (c) The University of Glasgow, 2008-2009
-- License : see libraries/base/LICENSE
-- 
-- Maintainer : libraries@haskell.org
-- Stability : internal
-- Portability : non-portable
--
-- This module provides text encoding/decoding using iconv
--
-----------------------------------------------------------------------------

-- #hide
module GHC.IO.Encoding.Iconv (
#if !defined(mingw32_HOST_OS)
 iconvEncoding, mkIconvEncoding,
 localeEncodingName
#endif
 ) where

#include "MachDeps.h"
#include "HsBaseConfig.h"

#if !defined(mingw32_HOST_OS)

import Foreign.Safe
import Foreign.C
import Data.Maybe
import GHC.Base
import GHC.IO.Buffer
import GHC.IO.Encoding.Failure
import GHC.IO.Encoding.Types
import GHC.List (span)
import GHC.Num
import GHC.Show
import GHC.Real
import System.IO.Unsafe (unsafePerformIO)
import System.Posix.Internals

c_DEBUG_DUMP :: Bool
c_DEBUG_DUMP = False

iconv_trace :: String -> IO ()
iconv_trace s
 | c_DEBUG_DUMP = puts s
 | otherwise = return ()

-- -----------------------------------------------------------------------------
-- iconv encoders/decoders

{-# NOINLINE localeEncodingName #-}
localeEncodingName :: String
localeEncodingName = unsafePerformIO $ do
 -- Use locale_charset() or nl_langinfo(CODESET) to get the encoding
 -- if we have either of them.
 cstr <- c_localeEncoding
 peekCAString cstr -- Assume charset names are ASCII

-- We hope iconv_t is a storable type. It should be, since it has at least the
-- value -1, which is a possible return value from iconv_open.
type IConv = CLong -- ToDo: (#type iconv_t)

foreign import ccall unsafe "hs_iconv_open"
 hs_iconv_open :: CString -> CString -> IO IConv

foreign import ccall unsafe "hs_iconv_close"
 hs_iconv_close :: IConv -> IO CInt

foreign import ccall unsafe "hs_iconv"
 hs_iconv :: IConv -> Ptr CString -> Ptr CSize -> Ptr CString -> Ptr CSize
	 -> IO CSize

foreign import ccall unsafe "localeEncoding"
 c_localeEncoding :: IO CString

haskellChar :: String
#ifdef WORDS_BIGENDIAN
haskellChar | charSize == 2 = "UTF-16BE"
 | otherwise = "UTF-32BE"
#else
haskellChar | charSize == 2 = "UTF-16LE"
 | otherwise = "UTF-32LE"
#endif

char_shift :: Int
char_shift | charSize == 2 = 1
 | otherwise = 2

iconvEncoding :: String -> IO TextEncoding
iconvEncoding = mkIconvEncoding ErrorOnCodingFailure

mkIconvEncoding :: CodingFailureMode -> String -> IO TextEncoding
mkIconvEncoding cfm charset = do
 return (TextEncoding { 
 textEncodingName = charset,
		mkTextDecoder = newIConv raw_charset (haskellChar ++ suffix) (recoverDecode cfm) iconvDecode,
		mkTextEncoder = newIConv haskellChar charset (recoverEncode cfm) iconvEncode})
 where
 -- An annoying feature of GNU iconv is that the //PREFIXES only take
 -- effect when they appear on the tocode parameter to iconv_open:
 (raw_charset, suffix) = span (/= '/') charset

newIConv :: String -> String
 -> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b))
 -> (IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b))
 -> IO (BufferCodec a b ())
newIConv from to rec fn =
 -- Assume charset names are ASCII
 withCAString from $ \ from_str ->
 withCAString to $ \ to_str -> do
 iconvt <- throwErrnoIfMinus1 "mkTextEncoding" $ hs_iconv_open to_str from_str
 let iclose = throwErrnoIfMinus1_ "Iconv.close" $ hs_iconv_close iconvt
 return BufferCodec{
 encode = fn iconvt,
 recover = rec,
 close = iclose,
 -- iconv doesn't supply a way to save/restore the state
 getState = return (),
 setState = const $ return ()
 }

iconvDecode :: IConv -> DecodeBuffer
iconvDecode iconv_t ibuf obuf = iconvRecode iconv_t ibuf 0 obuf char_shift

iconvEncode :: IConv -> EncodeBuffer
iconvEncode iconv_t ibuf obuf = iconvRecode iconv_t ibuf char_shift obuf 0

iconvRecode :: IConv -> Buffer a -> Int -> Buffer b -> Int 
 -> IO (CodingProgress, Buffer a, Buffer b)
iconvRecode iconv_t
 input@Buffer{ bufRaw=iraw, bufL=ir, bufR=iw, bufSize=_ } iscale
 output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os } oscale
 = do
 iconv_trace ("haskelChar=" ++ show haskellChar)
 iconv_trace ("iconvRecode before, input=" ++ show (summaryBuffer input))
 iconv_trace ("iconvRecode before, output=" ++ show (summaryBuffer output))
 withRawBuffer iraw $ \ piraw -> do
 withRawBuffer oraw $ \ poraw -> do
 with (piraw `plusPtr` (ir `shiftL` iscale)) $ \ p_inbuf -> do
 with (poraw `plusPtr` (ow `shiftL` oscale)) $ \ p_outbuf -> do
 with (fromIntegral ((iw-ir) `shiftL` iscale)) $ \ p_inleft -> do
 with (fromIntegral ((os-ow) `shiftL` oscale)) $ \ p_outleft -> do
 res <- hs_iconv iconv_t p_inbuf p_inleft p_outbuf p_outleft
 new_inleft <- peek p_inleft
 new_outleft <- peek p_outleft
 let 
	 new_inleft' = fromIntegral new_inleft `shiftR` iscale
	 new_outleft' = fromIntegral new_outleft `shiftR` oscale
	 new_input 
 | new_inleft == 0 = input { bufL = 0, bufR = 0 }
	 | otherwise = input { bufL = iw - new_inleft' }
	 new_output = output{ bufR = os - new_outleft' }
 iconv_trace ("iconv res=" ++ show res)
 iconv_trace ("iconvRecode after, input=" ++ show (summaryBuffer new_input))
 iconv_trace ("iconvRecode after, output=" ++ show (summaryBuffer new_output))
 if (res /= -1)
	then do -- all input translated
	 return (InputUnderflow, new_input, new_output)
	else do
 errno <- getErrno
 case errno of
 e | e == e2BIG -> return (OutputUnderflow, new_input, new_output)
 | e == eINVAL -> return (InputUnderflow, new_input, new_output)
 -- Sometimes iconv reports EILSEQ for a
 -- character in the input even when there is no room
 -- in the output; in this case we might be about to
 -- change the encoding anyway, so the following bytes
 -- could very well be in a different encoding.
 --
 -- Because we can only say InvalidSequence if there is at least
 -- one element left in the output, we have to special case this.
 | e == eILSEQ -> return (if new_outleft' == 0 then OutputUnderflow else InvalidSequence, new_input, new_output)
 | otherwise -> do
 iconv_trace ("iconv returned error: " ++ show (errnoToIOError "iconv" e Nothing Nothing))
 throwErrno "iconvRecoder"

#endif /* !mingw32_HOST_OS */

AltStyle によって変換されたページ (->オリジナル) /