@@ -20,6 +20,12 @@ export encoding, encodings_list, Encoding, @enc_str
2020
2121abstract type  StringEncodingError end 
2222
23+ #  contiguous 1d byte arrays compatible with C `unsigned char *` API
24+ const  ByteVector=  Union{Vector{UInt8},
25+  Base. FastContiguousSubArray{UInt8,1 ,<: Array{UInt8,1} },
26+  Base. CodeUnits{UInt8, String}, Base. CodeUnits{UInt8, SubString{String}}}
27+ const  ByteString =  Union{String,SubString{String}}
28+ 2329#  Specified encodings or the combination are not supported by iconv
2430struct  InvalidEncodingError <:  StringEncodingError 
2531 args:: Tuple{String, String} 
@@ -31,7 +37,7 @@ message(::Type{InvalidEncodingError}) = "Conversion from <<1>> to <<2>> not supp
3137struct  InvalidSequenceError <:  StringEncodingError 
3238 args:: Tuple{String} 
3339end 
34- InvalidSequenceError (seq:: Vector {UInt8} ) =  InvalidSequenceError ((bytes2hex (seq),))
40+ InvalidSequenceError (seq:: AbstractVector {UInt8} ) =  InvalidSequenceError ((bytes2hex (seq),))
3541message (:: Type{InvalidSequenceError} ) =  " Byte sequence 0x<<1>> is invalid in source encoding or cannot be represented in target encoding" 
3642
3743struct  IConvError <:  StringEncodingError 
@@ -123,7 +129,7 @@ function finalize(s::Union{StringEncoder, StringDecoder})
123129 nothing 
124130end 
125131
126- function  iconv! (cd:: Ptr{Nothing} , inbuf:: Vector{UInt8} , outbuf:: Vector{UInt8} ,
132+ function  iconv! (cd:: Ptr{Nothing} , inbuf:: ByteVector , outbuf:: ByteVector ,
127133 inbufptr:: Ref{Ptr{UInt8}} , outbufptr:: Ref{Ptr{UInt8}} ,
128134 inbytesleft:: Ref{Csize_t} , outbytesleft:: Ref{Csize_t} )
129135 inbufptr[] =  pointer (inbuf)
@@ -499,14 +505,20 @@ end
499505# # Functions to encode/decode strings
500506
501507""" 
502-  decode([T,] a::Vector {UInt8}, enc) 
508+  decode([T,] a::AbstractVector {UInt8}, enc) 
503509
504510Convert an array of bytes `a` representing text in encoding `enc` to a string of type `T`. 
505511By default, a `String` is returned. 
506512
513+ To `decode` an `s::String` of data in non-UTF-8 encoding, use 
514+ `decode(codeunits(s), enc)` to act on the underlying byte array. 
515+ 
507516`enc` can be specified either as a string or as an `Encoding` object. 
517+ The input data `a` can be a `Vector{UInt8}` of bytes, a contiguous 
518+ subarray thereof, or the `codeunits` of a `String` (or substring 
519+ thereof). 
508520""" 
509- function  decode (:: Type{T} , a:: Vector{UInt8} , enc:: Encoding ) where  {T<: AbstractString }
521+ function  decode (:: Type{T} , a:: ByteVector , enc:: Encoding ) where  {T<: AbstractString }
510522 b =  IOBuffer (a)
511523 try 
512524 T (read (StringDecoder (b, enc, encoding (T))))
@@ -515,19 +527,19 @@ function decode(::Type{T}, a::Vector{UInt8}, enc::Encoding) where {T<:AbstractSt
515527 end 
516528end 
517529
518- decode (:: Type{T} , a:: Vector{UInt8} , enc:: AbstractString ) where  {T<: AbstractString } = 
530+ decode (:: Type{T} , a:: ByteVector , enc:: AbstractString ) where  {T<: AbstractString } = 
519531 decode (T, a, Encoding (enc))
520532
521- decode (a:: Vector{UInt8} , enc:: AbstractString ) =  decode (String, a, Encoding (enc))
522- decode (a:: Vector{UInt8} , enc:: Union{AbstractString, Encoding} ) =  decode (String, a, enc)
533+ decode (a:: ByteVector , enc:: Union{AbstractString, Encoding} ) =  decode (String, a, enc)
523534
524535""" 
525536 encode(s::AbstractString, enc) 
526537
527538Convert string `s` to an array of bytes representing text in encoding `enc`. 
528539`enc` can be specified either as a string or as an `Encoding` object. 
529540""" 
530- function  encode (s:: AbstractString , enc:: Encoding )
541+ encode (s:: AbstractString , enc:: Encoding ) =  encode (String (s), enc)
542+ function  encode (s:: ByteString , enc:: Encoding )
531543 b =  IOBuffer ()
532544 p =  StringEncoder (b, enc, encoding (typeof (s)))
533545 write (p, s)
0 commit comments