Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit dfd683d

Browse files
support data in more byte-array/string types (#54)
1 parent f37a33d commit dfd683d

File tree

4 files changed

+32
-9
lines changed

4 files changed

+32
-9
lines changed

‎Project.toml‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "StringEncodings"
22
uuid = "69024149-9ee7-55f6-a4c4-859efe599b68"
3-
version = "0.3.6"
3+
version = "0.3.7"
44

55
[deps]
66
Libiconv_jll = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"

‎src/StringEncodings.jl‎

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ export encoding, encodings_list, Encoding, @enc_str
2020

2121
abstract type StringEncodingError end
2222

23+
# contiguous 1d byte arrays compatible with C `unsigned char *` API
24+
const ByteVector= Union{Vector{UInt8},
25+
Base.FastContiguousSubArray{UInt8,1,<:Array{UInt8,1}},
26+
Base.CodeUnits{UInt8, String}, Base.CodeUnits{UInt8, SubString{String}}}
27+
const ByteString = Union{String,SubString{String}}
28+
2329
# Specified encodings or the combination are not supported by iconv
2430
struct InvalidEncodingError <: StringEncodingError
2531
args::Tuple{String, String}
@@ -31,7 +37,7 @@ message(::Type{InvalidEncodingError}) = "Conversion from <<1>> to <<2>> not supp
3137
struct InvalidSequenceError <: StringEncodingError
3238
args::Tuple{String}
3339
end
34-
InvalidSequenceError(seq::Vector{UInt8}) = InvalidSequenceError((bytes2hex(seq),))
40+
InvalidSequenceError(seq::AbstractVector{UInt8}) = InvalidSequenceError((bytes2hex(seq),))
3541
message(::Type{InvalidSequenceError}) = "Byte sequence 0x<<1>> is invalid in source encoding or cannot be represented in target encoding"
3642

3743
struct IConvError <: StringEncodingError
@@ -123,7 +129,7 @@ function finalize(s::Union{StringEncoder, StringDecoder})
123129
nothing
124130
end
125131

126-
function iconv!(cd::Ptr{Nothing}, inbuf::Vector{UInt8}, outbuf::Vector{UInt8},
132+
function iconv!(cd::Ptr{Nothing}, inbuf::ByteVector, outbuf::ByteVector,
127133
inbufptr::Ref{Ptr{UInt8}}, outbufptr::Ref{Ptr{UInt8}},
128134
inbytesleft::Ref{Csize_t}, outbytesleft::Ref{Csize_t})
129135
inbufptr[] = pointer(inbuf)
@@ -499,14 +505,20 @@ end
499505
## Functions to encode/decode strings
500506

501507
"""
502-
decode([T,] a::Vector{UInt8}, enc)
508+
decode([T,] a::AbstractVector{UInt8}, enc)
503509
504510
Convert an array of bytes `a` representing text in encoding `enc` to a string of type `T`.
505511
By default, a `String` is returned.
506512
513+
To `decode` an `s::String` of data in non-UTF-8 encoding, use
514+
`decode(codeunits(s), enc)` to act on the underlying byte array.
515+
507516
`enc` can be specified either as a string or as an `Encoding` object.
517+
The input data `a` can be a `Vector{UInt8}` of bytes, a contiguous
518+
subarray thereof, or the `codeunits` of a `String` (or substring
519+
thereof).
508520
"""
509-
function decode(::Type{T}, a::Vector{UInt8}, enc::Encoding) where {T<:AbstractString}
521+
function decode(::Type{T}, a::ByteVector, enc::Encoding) where {T<:AbstractString}
510522
b = IOBuffer(a)
511523
try
512524
T(read(StringDecoder(b, enc, encoding(T))))
@@ -515,19 +527,19 @@ function decode(::Type{T}, a::Vector{UInt8}, enc::Encoding) where {T<:AbstractSt
515527
end
516528
end
517529

518-
decode(::Type{T}, a::Vector{UInt8}, enc::AbstractString) where {T<:AbstractString} =
530+
decode(::Type{T}, a::ByteVector, enc::AbstractString) where {T<:AbstractString} =
519531
decode(T, a, Encoding(enc))
520532

521-
decode(a::Vector{UInt8}, enc::AbstractString) = decode(String, a, Encoding(enc))
522-
decode(a::Vector{UInt8}, enc::Union{AbstractString, Encoding}) = decode(String, a, enc)
533+
decode(a::ByteVector, enc::Union{AbstractString, Encoding}) = decode(String, a, enc)
523534

524535
"""
525536
encode(s::AbstractString, enc)
526537
527538
Convert string `s` to an array of bytes representing text in encoding `enc`.
528539
`enc` can be specified either as a string or as an `Encoding` object.
529540
"""
530-
function encode(s::AbstractString, enc::Encoding)
541+
encode(s::AbstractString, enc::Encoding) = encode(String(s), enc)
542+
function encode(s::ByteString, enc::Encoding)
531543
b = IOBuffer()
532544
p = StringEncoder(b, enc, encoding(typeof(s)))
533545
write(p, s)

‎src/encodings.jl‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ print(io::IO, ::Encoding{enc}) where {enc} = print(io, enc)
2323

2424
## Get the encoding used by a string type
2525
encoding(::Type{String}) = enc"UTF-8"
26+
encoding(::Type{SubString{String}}) = enc"UTF-8"
2627

2728
encodings_list = ["1026", "1046", "1047", "10646-1:1993", "10646-1:1993/UCS4",
2829
"437", "500", "500V1", "850", "851", "852", "855", "856", "857",

‎test/runtests.jl‎

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,16 @@ end
270270
@test_throws ArgumentError readavailable(p)
271271
end
272272

273+
# make sure encode/decode support various string/array types
274+
@testset "Array/String types" begin
275+
s = "Bendaña"
276+
enc = "Windows-1252"
277+
se = "Benda\xf1a"
278+
@test encode(Test.GenericString(s), enc) == codeunits(se)
279+
@test encode(SubString(s, 1:6), enc) == encode(s[1:6], enc) == codeunits(se)[1:6]
280+
@test s == decode(codeunits(se), enc) == decode(collect(codeunits(se)), enc)
281+
@test s[1:6] == decode(@view(collect(codeunits(se))[1:6]), enc)
282+
end
273283

274284
## Test encodings support
275285
b = IOBuffer()

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /