Skip to content

Commit 2c2390e

Browse files
committed
Update benchmark tests
1 parent 1b4ea3d commit 2c2390e

File tree

2 files changed

+29
-116
lines changed

2 files changed

+29
-116
lines changed

REQUIRE

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ Unicode_Entities 0.2.8
88
Emoji_Entities 0.2.6
99
Format 0.7.2
1010
ModuleInterfaceTools 0.1.7
11-
StrAPI 0.1.8
12-
CharSetEncodings 0.1.8
11+
StrAPI 0.1.9
12+
CharSetEncodings 0.1.9
1313
ChrBase 0.1.10
1414
MurmurHash3 0.1.5
1515
StrLiterals 0.1.6
16-
StrBase 0.1.10
16+
StrBase 0.1.11
1717
StrRegex 0.1.7
1818
StrFormat 0.1.5
1919
StrEntities 0.1.3

test/bench.jl

Lines changed: 26 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ dispbench(res) # Displays the results in a pretty format
2424
isdefined(Main, :STRS_SETUP) || include("setup.jl")
2525
@static V6_COMPAT || (using Serialization)
2626

27+
@static V6_COMPAT || (Base.iterate(it::Union{CodePoints,CodeUnits}) = iterate(it, 1))
28+
29+
2730
const inppath = "textsamples"
2831
const gutpath = "gutenberg"
2932
const smppath = "samples"
@@ -227,6 +230,7 @@ function dispres(io, xres)
227230
end
228231
for i = 2:length(res)
229232
rn = res[i]
233+
rn[1] == "UniStr" && continue
230234
pr"\(io)\n\%-12.12s(rn[1])\%6.3f(sizes[i]/stats.len)"
231235
tn = rn[3]
232236
minres = min(length(t1), length(tn))
@@ -292,66 +296,34 @@ function douppercase(lines::Vector{<:AbstractString})
292296
cnt
293297
end
294298

295-
@inline function iteratechars(text::AbstractString)
296-
cnt = 0
297-
for ch in text
298-
cnt += is_digit(ch)
299-
end
300-
cnt
301-
end
302-
303-
@inline function iteratecps(text::AbstractString)
304-
cnt = 0
305-
for ch in codepoints(text)
306-
cnt += is_digit(ch)
307-
end
308-
cnt
309-
end
310-
311-
@inline function iteratecus(text::AbstractString)
312-
cnt = 0
313-
for ch in codeunits(text)
314-
cnt += is_digit(ch)
315-
end
316-
cnt
317-
end
318-
319299
function countchars(lines::Vector{T}) where {T<:AbstractString}
320300
cnt = 0
321-
for text in lines
322-
cnt += iteratechars(text)
301+
@inbounds for text in lines, ch in text
302+
cnt += 1
323303
end
324304
cnt
325305
end
326306

327307
function countcps(lines::Vector{T}) where {T<:AbstractString}
328308
cnt = 0
329-
for text in lines
330-
cnt += iteratecps(text)
331-
end
332-
cnt
333-
end
334-
335-
function countsize(lines::Vector{<:AbstractString})
336-
cnt = 0
337-
for text in lines
338-
cnt += sizeof(text)
309+
for text in lines, ch in codepoints(text)
310+
cnt += 1
339311
end
340312
cnt
341313
end
342314

343-
@inline function countcodeunits(text::AbstractString)
315+
function countcus(lines::Vector{T}) where {T<:AbstractString}
344316
cnt = 0
345-
@inbounds for cu in codeunits(text)
346-
cnt += cu
317+
@inbounds for text in lines, cu in codeunits(text)
318+
cnt += 1
347319
end
348320
cnt
349321
end
350322

351-
function countsize2(lines::Vector{<:AbstractString})
323+
function countsize(lines::Vector{<:AbstractString})
352324
cnt = 0
353325
for text in lines
354-
cnt += countcodeunits(text)
326+
cnt += sizeof(text)
355327
end
356328
cnt
357329
end
@@ -409,88 +381,35 @@ end
409381
cnt
410382
end
411383

412-
@inline function iteratefunchars(fun, text)
413-
cnt = 0
414-
for ch in text
415-
cnt += fun(ch)
416-
end
417-
cnt
418-
end
419-
420-
@inline function iteratefuncps(fun, text)
384+
function checkcus(fun, lines::Vector{<:AbstractString})
421385
cnt = 0
422-
for ch in codepoints(text)
423-
cnt += fun(ch)
424-
end
425-
cnt
426-
end
427-
428-
@inline function iteratefuncus(fun, text)
429-
cnt = 0
430-
for ch in codeunits(text)
431-
cnt += fun(ch)
432-
end
433-
cnt
434-
end
435-
436-
function checkchars(fun, lines::Vector{<:AbstractString})
437-
cnt = 0
438-
for text in lines
439-
cnt += iteratefunchars(fun, text)
386+
for text in lines, cu in codeunits(text)
387+
cnt += fun(cu)
440388
end
441389
cnt
442390
end
443391

444392
function checkcp(fun, lines::Vector{<:AbstractString})
445393
cnt = 0
446-
for text in lines
447-
cnt += iteratefuncps(fun, text)
394+
for text in lines, cp in text
395+
cnt += fun(cp)
448396
end
449397
cnt
450398
end
451399

452400
checkjoin(lines) = sizeof(join(lines))
453401

454-
@inline function sumchars(text)
455-
t = 0
456-
for ch in text
457-
t += UInt32(ch)
458-
end
459-
t
460-
end
461-
@inline function sumcp(text)
462-
t = 0
463-
for ch in codepoints(text)
464-
t += UInt32(ch)
465-
end
466-
t
467-
end
468-
@inline function sumcu(text)
469-
t = 0
470-
for ch in codeunits(text)
471-
t += UInt32(ch)
472-
end
473-
t
474-
end
475-
476402
function sumcharvals(lines::Vector{<:AbstractString})
477403
t = 0
478-
for text in lines
479-
t += sumchars(text)
480-
end
481-
t
482-
end
483-
function sumcodepnts(lines::Vector{<:AbstractString})
484-
t = 0
485-
for text in lines
486-
t += sumcp(text)
404+
for text in lines, ch in text
405+
t += UInt32(ch)
487406
end
488407
t
489408
end
490409
function sumcodeunits(lines::Vector{<:AbstractString})
491410
t = 0
492-
for text in lines
493-
t += sumcu(text)
411+
for text in lines, cu in codeunits(text)
412+
t += UInt32(cu)
494413
end
495414
t
496415
end
@@ -653,16 +572,10 @@ checkrepeat80c(l) = checktext(repeat80c, l)
653572
checkreverse(l) = checktext(reverse, l)
654573

655574
checknextind(l) = checkstr(iteratenextind, l)
656-
countchars(l) = checkstr(iteratechars, l)
657-
countcps(l) = checkstr(iteratecps, l)
658-
countcus(l) = checkstr(iteratecus, l)
659575

660576
validstr(l) = checkstr(is_valid, l)
661577
asciistr(l) = checkstr(is_ascii, l)
662578

663-
validchars(l) = checkchars(is_valid, l)
664-
asciichars(l) = checkchars(is_ascii, l)
665-
666579
checkvalid(l) = checkcp(is_valid, l)
667580
checkascii(l) = checkcp(is_ascii, l)
668581
checkcntrl(l) = checkcp(is_control, l)
@@ -754,17 +667,17 @@ const tests =
754667
(checksplit, "split\nline"),
755668
(checkreverse, "reverse"),
756669
(checkrepeat1, "repeat 1\nstring"),
757-
(checkrepeat10, "repeat 10\nstring"),
670+
# (checkrepeat10, "repeat 10\nstring"),
758671
(searchstr, "search\nstring"),
759672
(searchchar, "search\nchar"),
760673
(searchreg, "search\nregex"),
761674
# (rsearchstr, "rsearch\nstring"),
762675
# (rsearchchar, "rsearch\nchar"),
763-
(checkrepeat1c, "repeat 1\nchar"),
764-
(checkrepeat80c, "repeat 80\nchar"),
676+
# (checkrepeat1c, "repeat 1\nchar"),
677+
# (checkrepeat80c, "repeat 80\nchar"),
765678
# (countsklength, "length\nSK"),
766679
# (countoldlength, "length\nOld"),
767-
(countchars, "iteration\nChar"),
680+
(countchars, "iteration\nchar"),
768681
(sumcharvals, "sum\nchar vals"),
769682
(asciistr, "isascii\nstring"),
770683
(validstr, "isvalid\nstring"),

0 commit comments

Comments
 (0)