1+ using BenchmarkTools, Random
2+ using LinearAlgebra, RecursiveFactorization, VectorizationBase
3+ nc = min (Int (VectorizationBase. num_cores ()), Threads. nthreads ())
4+ BLAS. set_num_threads (nc)
5+ BenchmarkTools. DEFAULT_PARAMETERS. seconds = 0.5
6+
7+ function luflop (m, n = m; innerflop = 2 )
8+ sum (1 : min (m, n)) do k
9+ invflop = 1
10+ scaleflop = isempty ((k + 1 ): m) ? 0 : sum ((k + 1 ): m)
11+ updateflop = isempty ((k + 1 ): n) ? 0 :
12+ sum ((k + 1 ): n) do j
13+ isempty ((k + 1 ): m) ? 0 : sum ((k + 1 ): m) do i
14+ innerflop
15+ end
16+ end
17+ invflop + scaleflop + updateflop
18+ end
19+ end
20+
21+ bas_mflops = Float64[]
22+ rec_mflops = Float64[]
23+ rec4_mflops = Float64[]
24+ rec800_mflops = Float64[]
25+ ref_mflops = Float64[]
26+ ns = 4 : 8 : 500
27+ for n in ns
28+ @info " $n × $n "
29+ rng = MersenneTwister (123 )
30+ global A = rand (rng, n, n)
31+ bt = @belapsed LinearAlgebra. lu! (B) setup= (B = copy (A))
32+ push! (bas_mflops, luflop (n) / bt / 1e9 )
33+
34+ rt = @belapsed RecursiveFactorization. lu! (B) setup= (B = copy (A))
35+ push! (rec_mflops, luflop (n) / rt / 1e9 )
36+
37+ rt4 = @belapsed RecursiveFactorization. lu! (B; threshold = 4 ) setup= (B = copy (A))
38+ push! (rec4_mflops, luflop (n) / rt4 / 1e9 )
39+
40+ rt800 = @belapsed RecursiveFactorization. lu! (B; threshold = 800 ) setup= (B = copy (A))
41+ push! (rec800_mflops, luflop (n) / rt800 / 1e9 )
42+
43+ ref = @belapsed LinearAlgebra. generic_lufact! (B) setup= (B = copy (A))
44+ push! (ref_mflops, luflop (n) / ref / 1e9 )
45+ end
46+
47+ using DataFrames, VegaLite
48+ blaslib = if VERSION ≥ v " 1.7.0-beta2"
49+ config = BLAS. get_config (). loaded_libs
50+ occursin (" mkl_rt" , config[1 ]. libname) ? :MKL : :OpenBLAS
51+ else
52+ BLAS. vendor () === :mkl ? :MKL : :OpenBLAS
53+ end
54+ df = DataFrame (Size = ns,
55+ Reference = ref_mflops)
56+ setproperty! (df, blaslib, bas_mflops)
57+ setproperty! (df, Symbol (" RF with default threshold" ), rec_mflops)
58+ setproperty! (df, Symbol (" RF fully recursive" ), rec4_mflops)
59+ setproperty! (df, Symbol (" RF fully iterative" ), rec800_mflops)
60+ df = stack (df,
61+ [Symbol (" RF with default threshold" ),
62+ Symbol (" RF fully recursive" ),
63+ Symbol (" RF fully iterative" ),
64+ blaslib,
65+ :Reference ], variable_name = :Library , value_name = :GFLOPS )
66+ plt = df |> @vlplot (:line , color= {:Library , scale = {scheme = " category10" }},
67+ x= {:Size }, y= {:GFLOPS },
68+ width= 1000 , height= 600 )
69+ save (joinpath (homedir (), " Pictures" ,
70+ " lu_float64_$(VERSION ) _$(Sys. CPU_NAME) _$(nc) cores_$blaslib .png" ), plt)
71+
72+ #=
73+ using Plot
74+ plt = plot(ns, bas_mflops, legend=:bottomright, lab="OpenBLAS", title="LU Factorization Benchmark", marker=:auto, dpi=300)
75+ plot!(plt, ns, rec_mflops, lab="RecursiveFactorization", marker=:auto)
76+ plot!(plt, ns, ref_mflops, lab="Reference", marker=:auto)
77+ xaxis!(plt, "size (N x N)")
78+ yaxis!(plt, "GFLOPS")
79+ savefig("lubench.png")
80+ savefig("lubench.pdf")
81+ =#
0 commit comments