11# This file is a part of StringEncodings.jl. License is MIT: http://julialang.org/license
22
33module StringEncodings
4- import Base: close, eof, flush, read, readall, write, show
4+ import Base: close, eachline, eof, flush, isreadable, iswritable,
5+ open, read, readline, readlines, show, write
56import Base. Libc: errno, strerror, E2BIG, EINVAL, EILSEQ
67import Compat: read
78
@@ -82,6 +83,7 @@ const BUFSIZE = 100
8283
8384type StringEncoder{S<: IO } <: IO
8485 ostream:: S
86+ closestream:: Bool
8587 cd:: Ptr{Void}
8688 inbuf:: Vector{UInt8}
8789 outbuf:: Vector{UInt8}
9395
9496type StringDecoder{S<: IO } <: IO
9597 istream:: S
98+ closestream:: Bool
9699 cd:: Ptr{Void}
97100 inbuf:: Vector{UInt8}
98101 outbuf:: Vector{UInt8}
178181 StringEncoder(istream, to, from=enc"UTF-8")
179182
180183Returns a new write-only I/O stream, which converts any text in the encoding `from`
181- written to it into text in the encoding `to` written to ostream. Calling `close` on the
184+ written to it into text in the encoding `to` written to ` ostream` . Calling `close` on the
182185stream is necessary to complete the encoding (but does not close `ostream`).
183186
184187`to` and `from` can be specified either as a string or as an `Encoding` object.
@@ -187,7 +190,7 @@ function StringEncoder(ostream::IO, to::Encoding, from::Encoding=enc"UTF-8")
187190 cd = iconv_open (ASCIIString (to), ASCIIString (from))
188191 inbuf = Vector {UInt8} (BUFSIZE)
189192 outbuf = Vector {UInt8} (BUFSIZE)
190- s = StringEncoder (ostream, cd, inbuf, outbuf,
193+ s = StringEncoder (ostream, false , cd, inbuf, outbuf,
191194 Ref {Ptr{UInt8}} (pointer (inbuf)), Ref {Ptr{UInt8}} (pointer (outbuf)),
192195 Ref {Csize_t} (0 ), Ref {Csize_t} (BUFSIZE))
193196 finalizer (s, finalize)
@@ -221,6 +224,9 @@ function close(s::StringEncoder)
221224 iconv_reset! (s)
222225 # Make sure C memory/resources are returned
223226 finalize (s)
227+ if s. closestream
228+ close (s. ostream)
229+ end
224230 # flush() wasn't able to empty input buffer, which cannot happen with correct data
225231 s. inbytesleft[] == 0 || throw (IncompleteSequenceError ())
226232end
238244 StringDecoder(istream, from, to=enc"UTF-8")
239245
240246Returns a new read-only I/O stream, which converts text in the encoding `from`
241- read from `istream` into text in the encoding `to`.
247+ read from `istream` into text in the encoding `to`. Calling `close` on the
248+ stream does not close `ostream`.
242249
243250`to` and `from` can be specified either as a string or as an `Encoding` object.
244251
@@ -249,7 +256,7 @@ function StringDecoder(istream::IO, from::Encoding, to::Encoding=enc"UTF-8")
249256 cd = iconv_open (ASCIIString (to), ASCIIString (from))
250257 inbuf = Vector {UInt8} (BUFSIZE)
251258 outbuf = Vector {UInt8} (BUFSIZE)
252- s = StringDecoder (istream, cd, inbuf, outbuf,
259+ s = StringDecoder (istream, false , cd, inbuf, outbuf,
253260 Ref {Ptr{UInt8}} (pointer (inbuf)), Ref {Ptr{UInt8}} (pointer (outbuf)),
254261 Ref {Csize_t} (0 ), Ref {Csize_t} (BUFSIZE), 0 )
255262 finalizer (s, finalize)
@@ -293,6 +300,9 @@ function close(s::StringDecoder)
293300 iconv_reset! (s)
294301 # Make sure C memory/resources are returned
295302 finalize (s)
303+ if s. closestream
304+ close (s. istream)
305+ end
296306 # iconv_reset!() wasn't able to empty input buffer, which cannot happen with correct data
297307 s. inbytesleft[] == 0 || throw (IncompleteSequenceError ())
298308end
@@ -301,26 +311,105 @@ function read(s::StringDecoder, ::Type{UInt8})
301311 eof (s) ? throw (EOFError ()) : s. outbuf[s. skip+= 1 ]
302312end
303313
314+ isreadable (s:: StringDecoder ) = isreadable (s. istream)
315+ iswritable (s:: StringDecoder ) = false
316+
317+ isreadable (s:: StringEncoder ) = false
318+ iswritable (s:: StringEncoder ) = iswritable (s. ostream)
319+
304320
305321# # Convenience I/O functions
322+ function wrap_stream (s:: IO , enc:: Encoding )
323+ if iswritable (s) && isreadable (s) # Should never happen
324+ throw (ArgumentError (" cannot open encoded text files in read and write/append modes at the same time" ))
325+ end
326+ s = iswritable (s) ? StringEncoder (s, enc) : StringDecoder (s, enc)
327+ s. closestream = true
328+ s
329+ end
330+
331+ """
332+ open(filename::AbstractString, enc::Encoding[, args...])
333+
334+ Open a text file in encoding `enc`, converting its contents to UTF-8 on the fly
335+ using `StringDecoder` (when reading) or `StringEncoder` (when writing).
336+ `args` is passed to `open`, so this function can be used as a replacement for all `open`
337+ variants for working with files.
338+
339+ Note that calling `close` on the returned I/O stream will also close the associated file handle;
340+ this operation is necessary to complete the encoding in write mode. Opening a file for both
341+ reading and writing/appending is not supported.
342+
343+ The returned I/O stream can be passed to functions working on strings without
344+ specifying the encoding again.
345+ """
346+ open (fname:: AbstractString , enc:: Encoding , args... ) = wrap_stream (open (fname, args... ), enc)
347+
348+ function open (fname:: AbstractString , enc:: Encoding ,
349+ rd:: Bool , wr:: Bool , cr:: Bool , tr:: Bool , ff:: Bool )
350+ if rd && (wr || ff)
351+ throw (ArgumentError (" cannot open encoded text files in read and write/append modes at the same time" ))
352+ end
353+ wrap_stream (open (fname, rd, wr, cr, tr, ff), enc)
354+ end
355+
356+ function open (fname:: AbstractString , enc:: Encoding , mode:: AbstractString )
357+ if mode in (" r+" , " w+" , " a+" )
358+ throw (ArgumentError (" cannot open encoded text files in read and write/append modes at the same time" ))
359+ end
360+ wrap_stream (open (fname, mode), enc)
361+ end
362+
306363if isdefined (Base, :readstring )
307364 @doc """
308- readstring(stream or filename, enc::Encoding)
365+ readstring(stream::IO, enc::Encoding)
366+ readstring(filename::AbstractString, enc::Encoding)
309367
310- Read the entire contents of an I/O stream or a file in encoding `enc` as a string .
368+ Methods to read text in character encoding `enc`.
311369 """ ->
312370 Base. readstring (s:: IO , enc:: Encoding ) = readstring (StringDecoder (s, enc))
313371 Base. readstring (filename:: AbstractString , enc:: Encoding ) = open (io-> readstring (io, enc), filename)
314372else # Compatibility with Julia 0.4
315373 @doc """
316- readall(stream or filename, enc::Encoding)
374+ readall(stream::IO, enc::Encoding)
375+ readall(filename::AbstractString, enc::Encoding)
317376
318- Read the entire contents of an I/O stream or a file in encoding `enc` as a string .
377+ Methods to read text in character encoding `enc`.
319378 """ ->
320379 Base. readall (s:: IO , enc:: Encoding ) = readall (StringDecoder (s, enc))
321380 Base. readall (filename:: AbstractString , enc:: Encoding ) = open (io-> readall (io, enc), filename)
322381end
323382
383+ """
384+ readline(stream::IO, enc::Encoding)
385+ readline(filename::AbstractString, enc::Encoding)
386+
387+ Methods to read text in character encoding `enc`.
388+ """
389+ readline (s:: IO , enc:: Encoding ) = readline (StringDecoder (s, enc))
390+ readline (filename:: AbstractString , enc:: Encoding ) = open (io-> readline (io, enc), filename)
391+
392+ """
393+ readlines(stream::IO, enc::Encoding)
394+ readlines(filename::AbstractString, enc::Encoding)
395+
396+ Methods to read text in character encoding `enc`.
397+ """
398+ readlines (s:: IO , enc:: Encoding ) = readlines (StringDecoder (s, enc))
399+ readlines (filename:: AbstractString , enc:: Encoding ) = open (io-> readlines (io, enc), filename)
400+
401+ """
402+ eachline(stream::IO, enc::Encoding)
403+ eachline(filename::AbstractString, enc::Encoding)
404+
405+ Methods to read text in character encoding `enc`. Decoding is performed on the fly.
406+ """
407+ eachline (s:: IO , enc:: Encoding ) = eachline (StringDecoder (s, enc))
408+ function eachline (filename:: AbstractString , enc:: Encoding )
409+ s = open (filename, enc)
410+ EachLine (s, ()-> close (s))
411+ end
412+
324413
325414# # Functions to encode/decode strings
326415
@@ -359,7 +448,7 @@ function encode(s::AbstractString, enc::Encoding)
359448 b = IOBuffer ()
360449 p = StringEncoder (b, enc, encoding (typeof (s)))
361450 write (p, s)
362- close (p)
451+ flush (p)
363452 takebuf_array (b)
364453end
365454
0 commit comments