diff --git a/src/MAT_HDF5.jl b/src/MAT_HDF5.jl index 46da353..d25a9a8 100644 --- a/src/MAT_HDF5.jl +++ b/src/MAT_HDF5.jl @@ -872,14 +872,31 @@ function read(obj::Union{HDF5.Dataset,HDF5.Attribute}, ::Type{MatlabString}) data = reshape(data, sz[2:end]) end if ndims(data) == 1 - return String(convert(Vector{Char}, data)) + return convert_string(data) elseif ndims(data) == 2 - return datap = String[rstrip(String(convert(Vector{Char}, vec(data[i, :])))) for i = 1:size(data, 1)] + return String[convert_string(c) for c in eachrow(data)] else - return data + return stringify_eachrow(data) end end +function stringify_eachrow(data) + dims = size(data) + fixed_dims = [1, 3:length(dims)...] # all except 2 + output_dims = dims[fixed_dims] + output = Array{String}(undef, output_dims...) + for I in CartesianIndices(output_dims) + idx = ntuple(d -> d == 2 ? Colon() : I[d < 2 ? d : d-1], length(dims)) + slice = view(data, idx...) + output[I] = convert_string(slice) + end + return output +end + +function convert_string(v::AbstractArray{UInt16}) + return String(convert(Vector{Char}, v)) +end + ## Utilities for handling complex numbers function build_datatype_complex(T::Type) memtype = create_datatype(HDF5.API.H5T_COMPOUND, 2*sizeof(T)) diff --git a/src/MAT_v5.jl b/src/MAT_v5.jl index 5c6be1c..4919f3c 100644 --- a/src/MAT_v5.jl +++ b/src/MAT_v5.jl @@ -289,7 +289,7 @@ function read_string(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}) else data = Vector{String}(undef, dimensions[1]) for i = 1:dimensions[1] - data[i] = rstrip(String(chars[i:dimensions[1]:end])) + data[i] = String(chars[i:dimensions[1]:end]) end end elseif dtype <= 4 || dtype == 17 @@ -316,7 +316,7 @@ function read_string(f::IO, swap_bytes::Bool, dimensions::Vector{Int32}) elseif dimensions[1] == 1 data = String(take!(bufs[1])) else - data = String[rstrip(String(take!(buf))) for buf in bufs] + data = String[String(take!(buf)) for buf in bufs] end else error("Unsupported string type") @@ -372,7 +372,7 @@ function read_matrix(f::IO, swap_bytes::Bool, subsys::Subsystem) data = read_struct(f, swap_bytes, dimensions, class == mxOBJECT_CLASS, subsys) elseif class == mxSPARSE_CLASS data = read_sparse(f, swap_bytes, dimensions, flags) - elseif class == mxCHAR_CLASS && length(dimensions) <= 2 + elseif class == mxCHAR_CLASS data = read_string(f, swap_bytes, dimensions) elseif class == mxFUNCTION_CLASS data = read_matrix(f, swap_bytes, subsys) diff --git a/test/read.jl b/test/read.jl index 08dc429..11975b8 100644 --- a/test/read.jl +++ b/test/read.jl @@ -81,7 +81,7 @@ for _format in ["v6", "v7", "v7.3"] result = Dict( "simple_string" => "the quick brown fox", "accented_string" => "thé qüîck browñ fòx", - "concatenated_strings" => String["this is a string", "this is another string"], + "concatenated_strings" => String["this is a string ", "this is another string"], "cell_strings" => Any["this is a string" "this is another string"], "empty_string" => "" ) @@ -159,6 +159,16 @@ for _format in ["v6", "v7", "v7.3"] end +for _format in ["v7", "v7.3"] + result = Dict{String,Any}( + "s" => " aαβ ", # test α and β characters, not possible in v6 + "s2" => ["fòx", "aαβ", " ef", "ac "], + "s3" => reshape(["faò", "aeα", " xc", "fβ "], 2, 2), + "s4" => Any["fòx"; "aαβ"; " ef"; "ac ";;] + ) + check(joinpath(dirname(@__FILE__), _format, "char_arrays.mat"), result) +end + result = Dict( "index" => [8.8604784000000000e+04 9.8707212000000000e+04 1.0394035200000000e+05 1.1429712000000000e+05 1.5474923999999999e+05 1.5475435200000001e+05 1.5501074400000001e+05 1.5505315200000000e+05 1.5505718400000001e+05 1.5506168400000001e+05 1.5506524799999999e+05 5.4945741599999997e+05 5.6345896799999999e+05 5.9956981200000003e+05 7.0691623199999996e+05 7.9063657200000004e+05 8.4311938800000004e+05 9.2225131200000003e+05 1.1248994160000000e+06 1.2508148520000000e+06 1.4164141320000000e+06 1.4275988280000000e+06 1.4744331000000001e+06 1.4982212879999999e+06 1.5549058440000000e+06 1.5870300840000000e+06 1.6192005120000001e+06 1.6766071560000000e+06 1.9386816839999999e+06 1.9969427879999999e+06 2.0021861880000001e+06 2.3272494120000000e+06 2.5309351080000000e+06 2.6743788720000000e+06], "spikes" => [ diff --git a/test/v7.3/char_arrays.mat b/test/v7.3/char_arrays.mat new file mode 100644 index 0000000..5f46f60 Binary files /dev/null and b/test/v7.3/char_arrays.mat differ diff --git a/test/v7/char_arrays.mat b/test/v7/char_arrays.mat new file mode 100644 index 0000000..8b30772 Binary files /dev/null and b/test/v7/char_arrays.mat differ