Haskell Code by HsColour

{-# OPTIONS_GHC -XNoImplicitPrelude #-}
-----------------------------------------------------------------------------
-- |
-- Module : GHC.IO.Encoding.Iconv
-- Copyright : (c) The University of Glasgow, 2008-2009
-- License : see libraries/base/LICENSE
-- 
-- Maintainer : libraries@haskell.org
-- Stability : internal
-- Portability : non-portable
--
-- This module provides text encoding/decoding using iconv
--
-----------------------------------------------------------------------------

-- #hide
module GHC.IO.Encoding.Iconv (
#if !defined(mingw32_HOST_OS)
 mkTextEncoding,
 latin1,
 utf8, 
 utf16, utf16le, utf16be,
 utf32, utf32le, utf32be,
 localeEncoding
#endif
 ) where

#include "MachDeps.h"
#include "HsBaseConfig.h"

#if !defined(mingw32_HOST_OS)

#undef DEBUG_DUMP

import Foreign
import Foreign.C
import Data.Maybe
import GHC.Base
import GHC.IO.Buffer
import GHC.IO.Encoding.Types
import GHC.Num
import GHC.Show
import GHC.Real
#ifdef DEBUG_DUMP
import System.Posix.Internals
#endif

iconv_trace :: String -> IO ()

#ifdef DEBUG_DUMP

iconv_trace s = puts s

puts :: String -> IO ()
puts s = do withCStringLen (s++"\n") $ \(p, len) -> 
 c_write 1 (castPtr p) (fromIntegral len)
 return ()

#else

iconv_trace _ = return ()

#endif

-- -----------------------------------------------------------------------------
-- iconv encoders/decoders

{-# NOINLINE latin1 #-}
latin1 :: TextEncoding
latin1 = unsafePerformIO (mkTextEncoding "Latin1")

{-# NOINLINE utf8 #-}
utf8 :: TextEncoding
utf8 = unsafePerformIO (mkTextEncoding "UTF8")

{-# NOINLINE utf16 #-}
utf16 :: TextEncoding
utf16 = unsafePerformIO (mkTextEncoding "UTF16")

{-# NOINLINE utf16le #-}
utf16le :: TextEncoding
utf16le = unsafePerformIO (mkTextEncoding "UTF16LE")

{-# NOINLINE utf16be #-}
utf16be :: TextEncoding
utf16be = unsafePerformIO (mkTextEncoding "UTF16BE")

{-# NOINLINE utf32 #-}
utf32 :: TextEncoding
utf32 = unsafePerformIO (mkTextEncoding "UTF32")

{-# NOINLINE utf32le #-}
utf32le :: TextEncoding
utf32le = unsafePerformIO (mkTextEncoding "UTF32LE")

{-# NOINLINE utf32be #-}
utf32be :: TextEncoding
utf32be = unsafePerformIO (mkTextEncoding "UTF32BE")

{-# NOINLINE localeEncoding #-}
localeEncoding :: TextEncoding
localeEncoding = unsafePerformIO $ do
#if HAVE_LANGINFO_H
 cstr <- c_localeEncoding -- use nl_langinfo(CODESET) to get the encoding
 -- if we have it
 r <- peekCString cstr
 mkTextEncoding r
#else
 mkTextEncoding "" -- GNU iconv accepts "" to mean the -- locale encoding.
#endif

-- We hope iconv_t is a storable type. It should be, since it has at least the
-- value -1, which is a possible return value from iconv_open.
type IConv = CLong -- ToDo: (#type iconv_t)

foreign import ccall unsafe "hs_iconv_open"
 hs_iconv_open :: CString -> CString -> IO IConv

foreign import ccall unsafe "hs_iconv_close"
 hs_iconv_close :: IConv -> IO CInt

foreign import ccall unsafe "hs_iconv"
 hs_iconv :: IConv -> Ptr CString -> Ptr CSize -> Ptr CString -> Ptr CSize
	 -> IO CSize

foreign import ccall unsafe "localeEncoding"
 c_localeEncoding :: IO CString

haskellChar :: String
#ifdef WORDS_BIGENDIAN
haskellChar | charSize == 2 = "UTF-16BE"
 | otherwise = "UTF-32BE"
#else
haskellChar | charSize == 2 = "UTF-16LE"
 | otherwise = "UTF-32LE"
#endif

char_shift :: Int
char_shift | charSize == 2 = 1
 | otherwise = 2

mkTextEncoding :: String -> IO TextEncoding
mkTextEncoding charset = do
 return (TextEncoding { 
		mkTextDecoder = newIConv charset haskellChar iconvDecode,
		mkTextEncoder = newIConv haskellChar charset iconvEncode})

newIConv :: String -> String
 -> (IConv -> Buffer a -> Buffer b -> IO (Buffer a, Buffer b))
 -> IO (BufferCodec a b ())
newIConv from to fn =
 withCString from $ \ from_str ->
 withCString to $ \ to_str -> do
 iconvt <- throwErrnoIfMinus1 "mkTextEncoding" $ hs_iconv_open to_str from_str
 let iclose = throwErrnoIfMinus1_ "Iconv.close" $ hs_iconv_close iconvt
 return BufferCodec{
 encode = fn iconvt,
 close = iclose,
 -- iconv doesn't supply a way to save/restore the state
 getState = return (),
 setState = const $ return ()
 }

iconvDecode :: IConv -> Buffer Word8 -> Buffer CharBufElem
	 -> IO (Buffer Word8, Buffer CharBufElem)
iconvDecode iconv_t ibuf obuf = iconvRecode iconv_t ibuf 0 obuf char_shift

iconvEncode :: IConv -> Buffer CharBufElem -> Buffer Word8
	 -> IO (Buffer CharBufElem, Buffer Word8)
iconvEncode iconv_t ibuf obuf = iconvRecode iconv_t ibuf char_shift obuf 0

iconvRecode :: IConv -> Buffer a -> Int -> Buffer b -> Int 
 -> IO (Buffer a, Buffer b)
iconvRecode iconv_t
 input@Buffer{ bufRaw=iraw, bufL=ir, bufR=iw, bufSize=_ } iscale
 output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os } oscale
 = do
 iconv_trace ("haskelChar=" ++ show haskellChar)
 iconv_trace ("iconvRecode before, input=" ++ show (summaryBuffer input))
 iconv_trace ("iconvRecode before, output=" ++ show (summaryBuffer output))
 withRawBuffer iraw $ \ piraw -> do
 withRawBuffer oraw $ \ poraw -> do
 with (piraw `plusPtr` (ir `shiftL` iscale)) $ \ p_inbuf -> do
 with (poraw `plusPtr` (ow `shiftL` oscale)) $ \ p_outbuf -> do
 with (fromIntegral ((iw-ir) `shiftL` iscale)) $ \ p_inleft -> do
 with (fromIntegral ((os-ow) `shiftL` oscale)) $ \ p_outleft -> do
 res <- hs_iconv iconv_t p_inbuf p_inleft p_outbuf p_outleft
 new_inleft <- peek p_inleft
 new_outleft <- peek p_outleft
 let 
	 new_inleft' = fromIntegral new_inleft `shiftR` iscale
	 new_outleft' = fromIntegral new_outleft `shiftR` oscale
	 new_input 
 | new_inleft == 0 = input { bufL = 0, bufR = 0 }
	 | otherwise = input { bufL = iw - new_inleft' }
	 new_output = output{ bufR = os - new_outleft' }
 iconv_trace ("iconv res=" ++ show res)
 iconv_trace ("iconvRecode after, input=" ++ show (summaryBuffer new_input))
 iconv_trace ("iconvRecode after, output=" ++ show (summaryBuffer new_output))
 if (res /= -1)
	then do -- all input translated
	 return (new_input, new_output)
	else do
 errno <- getErrno
 case errno of
	e | e == eINVAL 
 || (e == e2BIG || e == eILSEQ) && new_inleft' /= (iw-ir) -> do
 iconv_trace ("iconv ignoring error: " ++ show (errnoToIOError "iconv" e Nothing Nothing))
		-- Output overflow is relatively harmless, unless
		-- we made no progress at all. 
 --
 -- Similarly, we ignore EILSEQ unless we converted no
 -- characters. Sometimes iconv reports EILSEQ for a
 -- character in the input even when there is no room
 -- in the output; in this case we might be about to
 -- change the encoding anyway, so the following bytes
 -- could very well be in a different encoding.
 -- This also helps with pinpointing EILSEQ errors: we
 -- don't report it until the rest of the characters in
 -- the buffer have been drained.
 return (new_input, new_output)

	_other -> 
		throwErrno "iconvRecoder" 
			-- illegal sequence, or some other error

#endif /* !mingw32_HOST_OS */

AltStyle によって変換されたページ (->オリジナル) /