@@ -24,6 +24,9 @@ dispbench(res) # Displays the results in a pretty format
2424isdefined (Main, :STRS_SETUP ) || include (" setup.jl" )
2525@static V6_COMPAT || (using Serialization)
2626
27+ @static V6_COMPAT || (Base. iterate (it:: Union{CodePoints,CodeUnits} ) = iterate (it, 1 ))
28+
29+
2730const inppath = " textsamples"
2831const gutpath = " gutenberg"
2932const smppath = " samples"
@@ -227,6 +230,7 @@ function dispres(io, xres)
227230 end
228231 for i = 2 : length (res)
229232 rn = res[i]
233+ rn[1 ] == " UniStr" && continue
230234 pr "\( io)\n\% -12.12s(rn[1])\% 6.3f(sizes[i]/stats.len)"
231235 tn = rn[3 ]
232236 minres = min (length (t1), length (tn))
@@ -292,66 +296,34 @@ function douppercase(lines::Vector{<:AbstractString})
292296 cnt
293297end
294298
295- @inline function iteratechars (text:: AbstractString )
296- cnt = 0
297- for ch in text
298- cnt += is_digit (ch)
299- end
300- cnt
301- end
302-
303- @inline function iteratecps (text:: AbstractString )
304- cnt = 0
305- for ch in codepoints (text)
306- cnt += is_digit (ch)
307- end
308- cnt
309- end
310-
311- @inline function iteratecus (text:: AbstractString )
312- cnt = 0
313- for ch in codeunits (text)
314- cnt += is_digit (ch)
315- end
316- cnt
317- end
318-
319299function countchars (lines:: Vector{T} ) where {T<: AbstractString }
320300 cnt = 0
321- for text in lines
322- cnt += iteratechars (text)
301+ @inbounds for text in lines, ch in text
302+ cnt += 1
323303 end
324304 cnt
325305end
326306
327307function countcps (lines:: Vector{T} ) where {T<: AbstractString }
328308 cnt = 0
329- for text in lines
330- cnt += iteratecps (text)
331- end
332- cnt
333- end
334-
335- function countsize (lines:: Vector{<:AbstractString} )
336- cnt = 0
337- for text in lines
338- cnt += sizeof (text)
309+ for text in lines, ch in codepoints (text)
310+ cnt += 1
339311 end
340312 cnt
341313end
342314
343- @inline function countcodeunits (text :: AbstractString )
315+ function countcus (lines :: Vector{T} ) where {T <: AbstractString }
344316 cnt = 0
345- @inbounds for cu in codeunits (text)
346- cnt += cu
317+ @inbounds for text in lines, cu in codeunits (text)
318+ cnt += 1
347319 end
348320 cnt
349321end
350322
351- function countsize2 (lines:: Vector{<:AbstractString} )
323+ function countsize (lines:: Vector{<:AbstractString} )
352324 cnt = 0
353325 for text in lines
354- cnt += countcodeunits (text)
326+ cnt += sizeof (text)
355327 end
356328 cnt
357329end
@@ -409,88 +381,35 @@ end
409381 cnt
410382end
411383
412- @inline function iteratefunchars (fun, text)
413- cnt = 0
414- for ch in text
415- cnt += fun (ch)
416- end
417- cnt
418- end
419-
420- @inline function iteratefuncps (fun, text)
384+ function checkcus (fun, lines:: Vector{<:AbstractString} )
421385 cnt = 0
422- for ch in codepoints (text)
423- cnt += fun (ch)
424- end
425- cnt
426- end
427-
428- @inline function iteratefuncus (fun, text)
429- cnt = 0
430- for ch in codeunits (text)
431- cnt += fun (ch)
432- end
433- cnt
434- end
435-
436- function checkchars (fun, lines:: Vector{<:AbstractString} )
437- cnt = 0
438- for text in lines
439- cnt += iteratefunchars (fun, text)
386+ for text in lines, cu in codeunits (text)
387+ cnt += fun (cu)
440388 end
441389 cnt
442390end
443391
444392function checkcp (fun, lines:: Vector{<:AbstractString} )
445393 cnt = 0
446- for text in lines
447- cnt += iteratefuncps ( fun, text )
394+ for text in lines, cp in text
395+ cnt += fun (cp )
448396 end
449397 cnt
450398end
451399
452400checkjoin (lines) = sizeof (join (lines))
453401
454- @inline function sumchars (text)
455- t = 0
456- for ch in text
457- t += UInt32 (ch)
458- end
459- t
460- end
461- @inline function sumcp (text)
462- t = 0
463- for ch in codepoints (text)
464- t += UInt32 (ch)
465- end
466- t
467- end
468- @inline function sumcu (text)
469- t = 0
470- for ch in codeunits (text)
471- t += UInt32 (ch)
472- end
473- t
474- end
475-
476402function sumcharvals (lines:: Vector{<:AbstractString} )
477403 t = 0
478- for text in lines
479- t += sumchars (text)
480- end
481- t
482- end
483- function sumcodepnts (lines:: Vector{<:AbstractString} )
484- t = 0
485- for text in lines
486- t += sumcp (text)
404+ for text in lines, ch in text
405+ t += UInt32 (ch)
487406 end
488407 t
489408end
490409function sumcodeunits (lines:: Vector{<:AbstractString} )
491410 t = 0
492- for text in lines
493- t += sumcu (text )
411+ for text in lines, cu in codeunits (text)
412+ t += UInt32 (cu )
494413 end
495414 t
496415end
@@ -653,16 +572,10 @@ checkrepeat80c(l) = checktext(repeat80c, l)
653572checkreverse (l) = checktext (reverse, l)
654573
655574checknextind (l) = checkstr (iteratenextind, l)
656- countchars (l) = checkstr (iteratechars, l)
657- countcps (l) = checkstr (iteratecps, l)
658- countcus (l) = checkstr (iteratecus, l)
659575
660576validstr (l) = checkstr (is_valid, l)
661577asciistr (l) = checkstr (is_ascii, l)
662578
663- validchars (l) = checkchars (is_valid, l)
664- asciichars (l) = checkchars (is_ascii, l)
665-
666579checkvalid (l) = checkcp (is_valid, l)
667580checkascii (l) = checkcp (is_ascii, l)
668581checkcntrl (l) = checkcp (is_control, l)
@@ -754,17 +667,17 @@ const tests =
754667 (checksplit, " split\n line" ),
755668 (checkreverse, " reverse" ),
756669 (checkrepeat1, " repeat 1\n string" ),
757- (checkrepeat10, " repeat 10\n string" ),
670+ # (checkrepeat10, "repeat 10\nstring"),
758671 (searchstr, " search\n string" ),
759672 (searchchar, " search\n char" ),
760673 (searchreg, " search\n regex" ),
761674# (rsearchstr, "rsearch\nstring"),
762675# (rsearchchar, "rsearch\nchar"),
763- (checkrepeat1c, " repeat 1\n char" ),
764- (checkrepeat80c, " repeat 80\n char" ),
676+ # (checkrepeat1c, "repeat 1\nchar"),
677+ # (checkrepeat80c, "repeat 80\nchar"),
765678# (countsklength, "length\nSK"),
766679# (countoldlength, "length\nOld"),
767- (countchars, " iteration\n Char " ),
680+ (countchars, " iteration\n char " ),
768681 (sumcharvals, " sum\n char vals" ),
769682 (asciistr, " isascii\n string" ),
770683 (validstr, " isvalid\n string" ),
0 commit comments