From 77002f4bb495013b57aaee8bd7a0656c2a8a5805 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Sun, 8 Jun 2025 15:04:19 +0200 Subject: [PATCH 01/16] Decompression for CuSparseMatrixCSC --- Project.toml | 5 +- ext/SparseMatrixColoringsCUDAExt.jl | 79 +++++++++++++++++++++++++++++ src/graph.jl | 4 +- src/matrices.jl | 2 +- src/result.jl | 35 +++++++++---- test/cuda.jl | 53 +++++++++++++++++++ test/utils.jl | 30 ++++++----- 7 files changed, 183 insertions(+), 25 deletions(-) create mode 100644 ext/SparseMatrixColoringsCUDAExt.jl create mode 100644 test/cuda.jl diff --git a/Project.toml b/Project.toml index 33e2f6d9..6656e125 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "SparseMatrixColorings" uuid = "0a514795-09f3-496d-8182-132a7b665d35" authors = ["Guillaume Dalle", "Alexis Montoison"] -version = "0.4.20" +version = "0.4.21" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" @@ -12,15 +12,18 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [weakdeps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" CliqueTrees = "60701a23-6482-424a-84db-faee86b9b1f8" Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" [extensions] +SparseMatrixColoringsCUDAExt = "CUDA" SparseMatrixColoringsCliqueTreesExt = "CliqueTrees" SparseMatrixColoringsColorsExt = "Colors" [compat] ADTypes = "1.2.1" +CUDA = "5.8.2" CliqueTrees = "1" Colors = "0.12.11, 0.13" DocStringExtensions = "0.8,0.9" diff --git a/ext/SparseMatrixColoringsCUDAExt.jl b/ext/SparseMatrixColoringsCUDAExt.jl new file mode 100644 index 00000000..d1c7dfd4 --- /dev/null +++ b/ext/SparseMatrixColoringsCUDAExt.jl @@ -0,0 +1,79 @@ +module SparseMatrixColoringsCUDAExt + +import SparseMatrixColorings as SMC +using SparseArrays: SparseMatrixCSC, rowvals, nnz, nzrange +using CUDA: CuVector, CuMatrix +using CUDA.CUSPARSE: AbstractCuSparseMatrix, CuSparseMatrixCSC, CuSparseMatrixCSR + +SMC.matrix_versions(A::AbstractCuSparseMatrix) = (A,) + +## Compression (slow, through CPU) + +function SMC.compress( + A::AbstractCuSparseMatrix, result::SMC.AbstractColoringResult{structure,:column} +) where {structure} + return CuMatrix(SMC.compress(SparseMatrixCSC(A), result)) +end + +function SMC.compress( + A::AbstractCuSparseMatrix, result::SMC.AbstractColoringResult{structure,:row} +) where {structure} + return CuMatrix(SMC.compress(SparseMatrixCSC(A), result)) +end + +## CSC + +function SMC.ColumnColoringResult( + A::CuSparseMatrixCSC, bg::SMC.BipartiteGraph{T}, color::Vector{<:Integer} +) where {T<:Integer} + A_cpu = SparseMatrixCSC(A) + result_cpu = SMC.ColumnColoringResult(A_cpu, bg, color) + compressed_indices = CuVector(result_cpu.compressed_indices) + return SMC.ColumnColoringResult(A, bg, color, result_cpu.group, compressed_indices) +end + +function SMC.RowColoringResult( + A::CuSparseMatrixCSC, bg::SMC.BipartiteGraph{T}, color::Vector{<:Integer} +) where {T<:Integer} + A_cpu = SparseMatrixCSC(A) + result_cpu = SMC.RowColoringResult(A_cpu, bg, color) + compressed_indices = CuVector(result_cpu.compressed_indices) + return SMC.RowColoringResult(A, bg, color, result_cpu.group, compressed_indices) +end + +function SMC.StarSetColoringResult( + A::CuSparseMatrixCSC, + ag::SMC.AdjacencyGraph{T}, + color::Vector{<:Integer}, + star_set::SMC.StarSet{<:Integer}, +) where {T<:Integer} + A_cpu = SparseMatrixCSC(A) + result_cpu = SMC.StarSetColoringResult(A_cpu, ag, color, star_set) + compressed_indices = CuVector(result_cpu.compressed_indices) + return SMC.StarSetColoringResult(A, ag, color, result_cpu.group, compressed_indices) +end + +function SMC.decompress!( + A::CuSparseMatrixCSC, B::CuMatrix, result::SMC.ColumnColoringResult{<:CuSparseMatrixCSC} +) + A.nzVal .= getindex.(Ref(B), result.compressed_indices) + return A +end + +function SMC.decompress!( + A::CuSparseMatrixCSC, B::CuMatrix, result::SMC.RowColoringResult{<:CuSparseMatrixCSC} +) + A.nzVal .= getindex.(Ref(B), result.compressed_indices) + return A +end + +function SMC.decompress!( + A::CuSparseMatrixCSC, + B::CuMatrix, + result::SMC.StarSetColoringResult{<:CuSparseMatrixCSC}, +) + A.nzVal .= getindex.(Ref(B), result.compressed_indices) + return A +end + +end diff --git a/src/graph.jl b/src/graph.jl index 23331789..56d27d4c 100644 --- a/src/graph.jl +++ b/src/graph.jl @@ -100,7 +100,7 @@ end Return a [`SparsityPatternCSC`](@ref) corresponding to the matrix `[0 Aᵀ; A 0]`, with a minimum of allocations. """ function bidirectional_pattern(A::AbstractMatrix; symmetric_pattern::Bool) - bidirectional_pattern(SparsityPatternCSC(SparseMatrixCSC(A)); symmetric_pattern) + return bidirectional_pattern(SparsityPatternCSC(SparseMatrixCSC(A)); symmetric_pattern) end function bidirectional_pattern(S::SparsityPatternCSC{T}; symmetric_pattern::Bool) where {T} @@ -345,6 +345,8 @@ end Base.eltype(::BipartiteGraph{T}) where {T} = T +Base.transpose(bg::BipartiteGraph) = BipartiteGraph(bg.S2, bg.S1) + function BipartiteGraph(A::AbstractMatrix; symmetric_pattern::Bool=false) return BipartiteGraph(SparseMatrixCSC(A); symmetric_pattern) end diff --git a/src/matrices.jl b/src/matrices.jl index 795983c8..eaf79882 100644 --- a/src/matrices.jl +++ b/src/matrices.jl @@ -10,7 +10,7 @@ Return various versions of the same matrix: Used for internal testing. """ -function matrix_versions(A) +function matrix_versions(A::AbstractMatrix) A_dense = Matrix(A) A_sparse = sparse(A) versions = [ diff --git a/src/result.jl b/src/result.jl index ad2ba348..5e21d425 100644 --- a/src/result.jl +++ b/src/result.jl @@ -146,18 +146,23 @@ $TYPEDFIELDS - [`AbstractColoringResult`](@ref) """ struct ColumnColoringResult{ - M<:AbstractMatrix,T<:Integer,G<:BipartiteGraph{T},GT<:AbstractGroups{T} + M<:AbstractMatrix, + T<:Integer, + G<:BipartiteGraph{T}, + CT<:AbstractVector{T}, + GT<:AbstractGroups{T}, + VT<:AbstractVector{T}, } <: AbstractColoringResult{:nonsymmetric,:column,:direct} "matrix that was colored" A::M "bipartite graph that was used for coloring" bg::G "one integer color for each column or row (depending on `partition`)" - color::Vector{T} + color::CT "color groups for columns or rows (depending on `partition`)" group::GT - "flattened indices mapping the compressed matrix `B` to the uncompressed matrix `A` when `A isa SparseMatrixCSC`. They satisfy `nonzeros(A)[k] = vec(B)[compressed_indices[k]]`" - compressed_indices::Vector{T} + "flattened indices mapping the compressed matrix `B` to the uncompressed matrix `A`. When `A isa SparseMatrixCSC`, they satisfy `nonzeros(A)[k] = vec(B)[compressed_indices[k]]`." + compressed_indices::VT end function ColumnColoringResult( @@ -195,13 +200,18 @@ $TYPEDFIELDS - [`AbstractColoringResult`](@ref) """ struct RowColoringResult{ - M<:AbstractMatrix,T<:Integer,G<:BipartiteGraph{T},GT<:AbstractGroups{T} + M<:AbstractMatrix, + T<:Integer, + G<:BipartiteGraph{T}, + CT<:AbstractVector{T}, + GT<:AbstractGroups{T}, + VT<:AbstractVector{T}, } <: AbstractColoringResult{:nonsymmetric,:row,:direct} A::M bg::G - color::Vector{T} + color::CT group::GT - compressed_indices::Vector{T} + compressed_indices::VT end function RowColoringResult( @@ -239,13 +249,18 @@ $TYPEDFIELDS - [`AbstractColoringResult`](@ref) """ struct StarSetColoringResult{ - M<:AbstractMatrix,T<:Integer,G<:AdjacencyGraph{T},GT<:AbstractGroups{T} + M<:AbstractMatrix, + T<:Integer, + G<:AdjacencyGraph{T}, + CT<:AbstractVector{T}, + GT<:AbstractGroups{T}, + VT<:AbstractVector{T}, } <: AbstractColoringResult{:symmetric,:column,:direct} A::M ag::G - color::Vector{T} + color::CT group::GT - compressed_indices::Vector{T} + compressed_indices::VT end function StarSetColoringResult( diff --git a/test/cuda.jl b/test/cuda.jl new file mode 100644 index 00000000..280044db --- /dev/null +++ b/test/cuda.jl @@ -0,0 +1,53 @@ +using CUDA.CUSPARSE: CuSparseMatrixCSC, CuSparseMatrixCSR +using LinearAlgebra +using SparseArrays +using SparseMatrixColorings +using StableRNGs +using Test + +rng = StableRNG(63) + +asymmetric_params = vcat( + [(10, 20, p) for p in (0.0:0.2:0.5)], + [(20, 10, p) for p in (0.0:0.2:0.5)], + [(100, 200, p) for p in (0.01:0.02:0.05)], + [(200, 100, p) for p in (0.01:0.02:0.05)], +) + +symmetric_params = vcat( + [(10, p) for p in (0.0:0.2:0.5)], # + [(100, p) for p in (0.01:0.02:0.05)], +) + +@testset "Column coloring & decompression" begin + problem = ColoringProblem(; structure=:nonsymmetric, partition=:column) + algo = GreedyColoringAlgorithm(; decompression=:direct) + @testset for T in (CuSparseMatrixCSC,) + @testset "$((; m, n, p))" for (m, n, p) in asymmetric_params + A0 = T(sprand(rng, m, n, p)) + test_coloring_decompression(A0, problem, algo; gpu=true) + end + end +end; + +@testset "Row coloring & decompression" begin + problem = ColoringProblem(; structure=:nonsymmetric, partition=:row) + algo = GreedyColoringAlgorithm(; decompression=:direct) + @testset for T in (CuSparseMatrixCSC,) + @testset "$((; m, n, p))" for (m, n, p) in asymmetric_params + A0 = T(sprand(rng, m, n, p)) + test_coloring_decompression(A0, problem, algo; gpu=true) + end + end +end; + +@testset "Symmetric coloring & direct decompression" begin + problem = ColoringProblem(; structure=:symmetric, partition=:column) + algo = GreedyColoringAlgorithm(; postprocessing=false, decompression=:direct) + @testset for T in (CuSparseMatrixCSC,) + @testset "$((; n, p))" for (n, p) in symmetric_params + A0 = T(sparse(Symmetric(sprand(rng, n, n, p)))) + test_coloring_decompression(A0, problem, algo; gpu=true) + end + end +end; diff --git a/test/utils.jl b/test/utils.jl index fb3d42c5..bb80f95f 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -22,6 +22,7 @@ function test_coloring_decompression( B0=nothing, color0=nothing, test_fast=false, + gpu=false, ) where {structure,partition,decompression} color_vec = Vector{Int}[] @testset "$(typeof(A))" for A in matrix_versions(A0) @@ -60,6 +61,17 @@ function test_coloring_decompression( !isnothing(B0) && @test B == B0 end + @testset "Full decompression" begin + @test decompress(B, result) ≈ A0 + @test decompress(B, result) ≈ A0 # check result wasn't modified + @test decompress!(respectful_similar(A, eltype(B)), B, result) ≈ A0 + @test decompress!(respectful_similar(A, eltype(B)), B, result) ≈ A0 + end + + if gpu + continue + end + @testset "Recoverability" begin # TODO: find tests for recoverability for substitution decompression if decompression == :direct @@ -81,13 +93,6 @@ function test_coloring_decompression( end end - @testset "Full decompression" begin - @test decompress(B, result) ≈ A0 - @test decompress(B, result) ≈ A0 # check result wasn't modified - @test decompress!(respectful_similar(A, eltype(B)), B, result) ≈ A0 - @test decompress!(respectful_similar(A, eltype(B)), B, result) ≈ A0 - end - @testset "Single-color decompression" begin if decompression == :direct # TODO: implement for :substitution too A2 = respectful_similar(A, eltype(B)) @@ -194,11 +199,6 @@ function test_bicoloring_decompression( end end - if decompression == :direct - @testset "Recoverability" begin - @test structurally_biorthogonal(A0, row_color, column_color) - end - end @testset "Full decompression" begin @test decompress(Br, Bc, result) ≈ A0 @test decompress(Br, Bc, result) ≈ A0 # check result wasn't modified @@ -209,6 +209,12 @@ function test_bicoloring_decompression( respectful_similar(A, promote_eltype(Br, Bc)), Br, Bc, result ) ≈ A0 end + + if decompression == :direct + @testset "Recoverability" begin + @test structurally_biorthogonal(A0, row_color, column_color) + end + end end end From 649a3c8fe30ee1ba7743847b1b522dd0b7435997 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Mon, 9 Jun 2025 18:44:26 +0200 Subject: [PATCH 02/16] Add test --- test/Project.toml | 1 + test/runtests.jl | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/test/Project.toml b/test/Project.toml index 00ee7348..ab595aba 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -6,6 +6,7 @@ BandedMatrices = "aae01518-5342-5314-be14-df237901396f" BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e" BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de" CliqueTrees = "60701a23-6482-424a-84db-faee86b9b1f8" Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" diff --git a/test/runtests.jl b/test/runtests.jl index 1ddb8413..b77ebddf 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -84,4 +84,9 @@ include("utils.jl") include("allocations.jl") end end + @testset verbose = true "GPU" begin + @testset "CUDA" begin + include("cuda.jl") + end + end end From 52d9c6a619480be45a6e2ccdbb23a13cb827c394 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Mon, 9 Jun 2025 18:45:04 +0200 Subject: [PATCH 03/16] CuRef --- ext/SparseMatrixColoringsCUDAExt.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ext/SparseMatrixColoringsCUDAExt.jl b/ext/SparseMatrixColoringsCUDAExt.jl index d1c7dfd4..25f7548e 100644 --- a/ext/SparseMatrixColoringsCUDAExt.jl +++ b/ext/SparseMatrixColoringsCUDAExt.jl @@ -2,7 +2,7 @@ module SparseMatrixColoringsCUDAExt import SparseMatrixColorings as SMC using SparseArrays: SparseMatrixCSC, rowvals, nnz, nzrange -using CUDA: CuVector, CuMatrix +using CUDA: CuVector, CuMatrix, CuRef using CUDA.CUSPARSE: AbstractCuSparseMatrix, CuSparseMatrixCSC, CuSparseMatrixCSR SMC.matrix_versions(A::AbstractCuSparseMatrix) = (A,) @@ -56,14 +56,14 @@ end function SMC.decompress!( A::CuSparseMatrixCSC, B::CuMatrix, result::SMC.ColumnColoringResult{<:CuSparseMatrixCSC} ) - A.nzVal .= getindex.(Ref(B), result.compressed_indices) + A.nzVal .= getindex.(CuRef(B), result.compressed_indices) return A end function SMC.decompress!( A::CuSparseMatrixCSC, B::CuMatrix, result::SMC.RowColoringResult{<:CuSparseMatrixCSC} ) - A.nzVal .= getindex.(Ref(B), result.compressed_indices) + A.nzVal .= getindex.(CuRef(B), result.compressed_indices) return A end @@ -72,7 +72,7 @@ function SMC.decompress!( B::CuMatrix, result::SMC.StarSetColoringResult{<:CuSparseMatrixCSC}, ) - A.nzVal .= getindex.(Ref(B), result.compressed_indices) + A.nzVal .= getindex.(CuRef(B), result.compressed_indices) return A end From 2d8fc5a91abd1df98ccb1029f0db39f9502dc498 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Mon, 9 Jun 2025 18:56:04 +0200 Subject: [PATCH 04/16] Broken test --- test/runtests.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index b77ebddf..906e00ec 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -86,7 +86,12 @@ include("utils.jl") end @testset verbose = true "GPU" begin @testset "CUDA" begin - include("cuda.jl") + using CUDA + if CUDA.functional() + include("cuda.jl") + else + @test_broken CUDA.functional() + end end end end From 94d9ff449ce4282e3b682808b2ad4b26bbf46d87 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Tue, 10 Jun 2025 07:37:39 +0200 Subject: [PATCH 05/16] Self-hosted --- .github/workflows/Test.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml index 1f0046a1..322b2952 100644 --- a/.github/workflows/Test.yml +++ b/.github/workflows/Test.yml @@ -17,7 +17,10 @@ permissions: jobs: test: - runs-on: ubuntu-latest + runs-on: self-hosted + env: + CUDA_VISIBLE_DEVICES: 1 + JULIA_DEPOT_PATH: /scratch/github-actions/julia_depot_smc strategy: matrix: julia-version: ['1.10', '1'] From 4334c2ab2c7935400b6cf50b5d149f10735c2b2d Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Tue, 10 Jun 2025 07:42:34 +0200 Subject: [PATCH 06/16] Revert self-hosted --- .github/workflows/Test.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml index 322b2952..1f0046a1 100644 --- a/.github/workflows/Test.yml +++ b/.github/workflows/Test.yml @@ -17,10 +17,7 @@ permissions: jobs: test: - runs-on: self-hosted - env: - CUDA_VISIBLE_DEVICES: 1 - JULIA_DEPOT_PATH: /scratch/github-actions/julia_depot_smc + runs-on: ubuntu-latest strategy: matrix: julia-version: ['1.10', '1'] From 51a7b54cba038c59d1dd5673ea019d5069a2f52b Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Tue, 10 Jun 2025 07:48:04 +0200 Subject: [PATCH 07/16] Separate GPU test --- .github/workflows/Test-GPU.yml | 46 ++++++++++ test/runtests.jl | 149 ++++++++++++++++----------------- 2 files changed, 119 insertions(+), 76 deletions(-) create mode 100644 .github/workflows/Test-GPU.yml diff --git a/.github/workflows/Test-GPU.yml b/.github/workflows/Test-GPU.yml new file mode 100644 index 00000000..0d1d8d2c --- /dev/null +++ b/.github/workflows/Test-GPU.yml @@ -0,0 +1,46 @@ +name: Test-GPU + +on: + push: + branches: + - main + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} + +# needed to allow julia-actions/cache to delete old caches that it has created +permissions: + actions: write + contents: read + +jobs: + test: + runs-on: self-hosted + env: + CUDA_VISIBLE_DEVICES: 1 + JULIA_DEPOT_PATH: /scratch/github-actions/julia_depot_smc + JULIA_SMC_TEST_GROUP: "GPU" + strategy: + matrix: + julia-version: ['1.10', '1'] + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v2 + with: + version: ${{ matrix.julia-version }} + arch: x64 + - uses: julia-actions/julia-downgrade-compat@v1 + if: ${{ matrix.version == '1.10' }} + with: + skip: LinearAlgebra, Random, SparseArrays + - uses: julia-actions/cache@v2 + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v5 + with: + files: lcov.info + token: ${{ secrets.CODECOV_TOKEN }} + fail_ci_if_error: false \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 906e00ec..81c4ecc9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,86 +11,83 @@ using Colors: Colors include("utils.jl") @testset verbose = true "SparseMatrixColorings" begin - @testset verbose = true "Code quality" begin - @testset "Aqua" begin - Aqua.test_all(SparseMatrixColorings; stale_deps=(; ignore=[:Requires],)) - end - @testset "JET" begin - JET.test_package(SparseMatrixColorings; target_defined_modules=true) - end - @testset "JuliaFormatter" begin - @test JuliaFormatter.format( - SparseMatrixColorings; verbose=false, overwrite=false - ) - end - @testset "Doctests" begin - Documenter.doctest(SparseMatrixColorings) - end - end - @testset verbose = true "Internals" begin - @testset "Graph" begin - include("graph.jl") - end - @testset "Forest" begin - include("forest.jl") - end - @testset "Order" begin - include("order.jl") - end - @testset "Check" begin - include("check.jl") - end - @testset "Matrices" begin - include("matrices.jl") - end - @testset "Constructors" begin - include("constructors.jl") - end - @testset "Result" begin - include("result.jl") - end - @testset "Constant coloring" begin - include("constant.jl") - end - @testset "ADTypes coloring algorithms" begin - include("adtypes.jl") - end - @testset "Visualization" begin - include("show_colors.jl") - end - end - @testset verbose = true "Correctness" begin - @testset "Small instances" begin - include("small.jl") - end - @testset "Random instances" begin - include("random.jl") - end - @testset "Structured matrices" begin - include("structured.jl") - end - @testset "Instances with known colorings" begin - include("theory.jl") + if get(ENV, "JULIA_SMC_TEST_GROUP", nothing) == "GPU" + @testset "CUDA" begin + using CUDA + include("cuda.jl") end - @testset "SuiteSparse" begin - include("suitesparse.jl") + else + @testset verbose = true "Code quality" begin + @testset "Aqua" begin + Aqua.test_all(SparseMatrixColorings; stale_deps=(; ignore=[:Requires],)) + end + @testset "JET" begin + JET.test_package(SparseMatrixColorings; target_defined_modules=true) + end + @testset "JuliaFormatter" begin + @test JuliaFormatter.format( + SparseMatrixColorings; verbose=false, overwrite=false + ) + end + @testset "Doctests" begin + Documenter.doctest(SparseMatrixColorings) + end end - end - @testset verbose = true "Performance" begin - @testset "Type stability" begin - include("type_stability.jl") + @testset verbose = true "Internals" begin + @testset "Graph" begin + include("graph.jl") + end + @testset "Forest" begin + include("forest.jl") + end + @testset "Order" begin + include("order.jl") + end + @testset "Check" begin + include("check.jl") + end + @testset "Matrices" begin + include("matrices.jl") + end + @testset "Constructors" begin + include("constructors.jl") + end + @testset "Result" begin + include("result.jl") + end + @testset "Constant coloring" begin + include("constant.jl") + end + @testset "ADTypes coloring algorithms" begin + include("adtypes.jl") + end + @testset "Visualization" begin + include("show_colors.jl") + end end - @testset "Allocations" begin - include("allocations.jl") + @testset verbose = true "Correctness" begin + @testset "Small instances" begin + include("small.jl") + end + @testset "Random instances" begin + include("random.jl") + end + @testset "Structured matrices" begin + include("structured.jl") + end + @testset "Instances with known colorings" begin + include("theory.jl") + end + @testset "SuiteSparse" begin + include("suitesparse.jl") + end end - end - @testset verbose = true "GPU" begin - @testset "CUDA" begin - using CUDA - if CUDA.functional() - include("cuda.jl") - else - @test_broken CUDA.functional() + @testset verbose = true "Performance" begin + @testset "Type stability" begin + include("type_stability.jl") + end + @testset "Allocations" begin + include("allocations.jl") end end end From a6110ecb72b0273a0c7f911e3301e790a6dd4e4c Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Tue, 10 Jun 2025 08:01:31 +0200 Subject: [PATCH 08/16] Remove CuRef --- ext/SparseMatrixColoringsCUDAExt.jl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ext/SparseMatrixColoringsCUDAExt.jl b/ext/SparseMatrixColoringsCUDAExt.jl index 25f7548e..ca82752c 100644 --- a/ext/SparseMatrixColoringsCUDAExt.jl +++ b/ext/SparseMatrixColoringsCUDAExt.jl @@ -2,7 +2,7 @@ module SparseMatrixColoringsCUDAExt import SparseMatrixColorings as SMC using SparseArrays: SparseMatrixCSC, rowvals, nnz, nzrange -using CUDA: CuVector, CuMatrix, CuRef +using CUDA: CuVector, CuMatrix using CUDA.CUSPARSE: AbstractCuSparseMatrix, CuSparseMatrixCSC, CuSparseMatrixCSR SMC.matrix_versions(A::AbstractCuSparseMatrix) = (A,) @@ -53,17 +53,19 @@ function SMC.StarSetColoringResult( return SMC.StarSetColoringResult(A, ag, color, result_cpu.group, compressed_indices) end +# TODO: write a kernel + function SMC.decompress!( A::CuSparseMatrixCSC, B::CuMatrix, result::SMC.ColumnColoringResult{<:CuSparseMatrixCSC} ) - A.nzVal .= getindex.(CuRef(B), result.compressed_indices) + A.nzVal .= getindex.(Ref(B), result.compressed_indices) return A end function SMC.decompress!( A::CuSparseMatrixCSC, B::CuMatrix, result::SMC.RowColoringResult{<:CuSparseMatrixCSC} ) - A.nzVal .= getindex.(CuRef(B), result.compressed_indices) + A.nzVal .= getindex.(Ref(B), result.compressed_indices) return A end @@ -72,7 +74,7 @@ function SMC.decompress!( B::CuMatrix, result::SMC.StarSetColoringResult{<:CuSparseMatrixCSC}, ) - A.nzVal .= getindex.(CuRef(B), result.compressed_indices) + A.nzVal .= getindex.(Ref(B), result.compressed_indices) return A end From f7daa2fa463bdd7056a2f2df15c1841be96def9b Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Tue, 10 Jun 2025 10:03:50 +0200 Subject: [PATCH 09/16] Write CUDA kernel --- ext/SparseMatrixColoringsCUDAExt.jl | 46 ++++++++++++++++------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/ext/SparseMatrixColoringsCUDAExt.jl b/ext/SparseMatrixColoringsCUDAExt.jl index ca82752c..512fd69b 100644 --- a/ext/SparseMatrixColoringsCUDAExt.jl +++ b/ext/SparseMatrixColoringsCUDAExt.jl @@ -2,7 +2,8 @@ module SparseMatrixColoringsCUDAExt import SparseMatrixColorings as SMC using SparseArrays: SparseMatrixCSC, rowvals, nnz, nzrange -using CUDA: CuVector, CuMatrix +using CUDA: + @cuda, CuVector, CuMatrix, blockIdx, blockDim, gridDim, threadIdx, launch_configuration using CUDA.CUSPARSE: AbstractCuSparseMatrix, CuSparseMatrixCSC, CuSparseMatrixCSR SMC.matrix_versions(A::AbstractCuSparseMatrix) = (A,) @@ -53,29 +54,32 @@ function SMC.StarSetColoringResult( return SMC.StarSetColoringResult(A, ag, color, result_cpu.group, compressed_indices) end -# TODO: write a kernel - -function SMC.decompress!( - A::CuSparseMatrixCSC, B::CuMatrix, result::SMC.ColumnColoringResult{<:CuSparseMatrixCSC} -) - A.nzVal .= getindex.(Ref(B), result.compressed_indices) - return A -end - -function SMC.decompress!( - A::CuSparseMatrixCSC, B::CuMatrix, result::SMC.RowColoringResult{<:CuSparseMatrixCSC} +function update_nzval_from_matrix!( + nzVal::AbstractVector, B::AbstractMatrix, compressed_indices::AbstractVector{<:Integer} ) - A.nzVal .= getindex.(Ref(B), result.compressed_indices) - return A + index = (blockIdx().x - 1) * blockDim().x + threadIdx().x + stride = gridDim().x * blockDim().x + for k in index:stride:length(nzVal) + nzVal[k] = B[compressed_indices[k]] + end + return nothing end -function SMC.decompress!( - A::CuSparseMatrixCSC, - B::CuMatrix, - result::SMC.StarSetColoringResult{<:CuSparseMatrixCSC}, -) - A.nzVal .= getindex.(Ref(B), result.compressed_indices) - return A +for R in (:ColumnColoringResult, :RowColoringResult, :StarSetColoringResult) + # loop to avoid method ambiguity + @eval function SMC.decompress!( + A::CuSparseMatrixCSC, B::CuMatrix, result::SMC.$R{<:CuSparseMatrixCSC} + ) + A.nnz == 0 && return A + kernel = @cuda launch = false update_nzval_from_matrix!( + A.nzVal, B, result.compressed_indices + ) + config = launch_configuration(kernel.fun) + threads = min(A.nnz, config.threads) + blocks = cld(A.nnz, threads) + kernel(A.nzVal, B, result.compressed_indices; threads, blocks) + return A + end end end From 92594c4f81a03cd10d2d48c11deaf6b83288898b Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Tue, 10 Jun 2025 10:56:54 +0200 Subject: [PATCH 10/16] Store CSR indices as additional info --- ext/SparseMatrixColoringsCUDAExt.jl | 90 ++++++++++++++++++++++++----- src/graph.jl | 2 - src/result.jl | 73 ++++++++++++++++++++--- test/cuda.jl | 14 +++-- 4 files changed, 147 insertions(+), 32 deletions(-) diff --git a/ext/SparseMatrixColoringsCUDAExt.jl b/ext/SparseMatrixColoringsCUDAExt.jl index 512fd69b..523bd68f 100644 --- a/ext/SparseMatrixColoringsCUDAExt.jl +++ b/ext/SparseMatrixColoringsCUDAExt.jl @@ -22,24 +22,26 @@ function SMC.compress( return CuMatrix(SMC.compress(SparseMatrixCSC(A), result)) end -## CSC +## CSC Result function SMC.ColumnColoringResult( A::CuSparseMatrixCSC, bg::SMC.BipartiteGraph{T}, color::Vector{<:Integer} ) where {T<:Integer} - A_cpu = SparseMatrixCSC(A) - result_cpu = SMC.ColumnColoringResult(A_cpu, bg, color) - compressed_indices = CuVector(result_cpu.compressed_indices) - return SMC.ColumnColoringResult(A, bg, color, result_cpu.group, compressed_indices) + group = SMC.group_by_color(T, color) + compressed_indices = SMC.column_csc_indices(bg, color) + additional_info = (; compressed_indices_gpu_csc=CuVector(compressed_indices)) + return SMC.ColumnColoringResult( + A, bg, color, group, compressed_indices, additional_info + ) end function SMC.RowColoringResult( A::CuSparseMatrixCSC, bg::SMC.BipartiteGraph{T}, color::Vector{<:Integer} ) where {T<:Integer} - A_cpu = SparseMatrixCSC(A) - result_cpu = SMC.RowColoringResult(A_cpu, bg, color) - compressed_indices = CuVector(result_cpu.compressed_indices) - return SMC.RowColoringResult(A, bg, color, result_cpu.group, compressed_indices) + group = SMC.group_by_color(T, color) + compressed_indices = SMC.row_csc_indices(bg, color) + additional_info = (; compressed_indices_gpu_csc=CuVector(compressed_indices)) + return SMC.RowColoringResult(A, bg, color, group, compressed_indices, additional_info) end function SMC.StarSetColoringResult( @@ -48,12 +50,54 @@ function SMC.StarSetColoringResult( color::Vector{<:Integer}, star_set::SMC.StarSet{<:Integer}, ) where {T<:Integer} - A_cpu = SparseMatrixCSC(A) - result_cpu = SMC.StarSetColoringResult(A_cpu, ag, color, star_set) - compressed_indices = CuVector(result_cpu.compressed_indices) - return SMC.StarSetColoringResult(A, ag, color, result_cpu.group, compressed_indices) + group = SMC.group_by_color(T, color) + compressed_indices = SMC.star_csc_indices(ag, color, star_set) + additional_info = (; compressed_indices_gpu_csc=CuVector(compressed_indices)) + return SMC.StarSetColoringResult( + A, ag, color, group, compressed_indices, additional_info + ) +end + +## CSR Result + +function SMC.ColumnColoringResult( + A::CuSparseMatrixCSR, bg::SMC.BipartiteGraph{T}, color::Vector{<:Integer} +) where {T<:Integer} + group = SMC.group_by_color(T, color) + compressed_indices = SMC.column_csc_indices(bg, color) + compressed_indices_csr = SMC.column_csr_indices(bg, color) + additional_info = (; compressed_indices_gpu_csr=CuVector(compressed_indices_csr)) + return SMC.ColumnColoringResult( + A, bg, color, group, compressed_indices, additional_info + ) +end + +function SMC.RowColoringResult( + A::CuSparseMatrixCSR, bg::SMC.BipartiteGraph{T}, color::Vector{<:Integer} +) where {T<:Integer} + group = SMC.group_by_color(T, color) + compressed_indices = SMC.row_csc_indices(bg, color) + compressed_indices_csr = SMC.row_csr_indices(bg, color) + additional_info = (; compressed_indices_gpu_csr=CuVector(compressed_indices_csr)) + return SMC.RowColoringResult(A, bg, color, group, compressed_indices, additional_info) +end + +function SMC.StarSetColoringResult( + A::CuSparseMatrixCSR, + ag::SMC.AdjacencyGraph{T}, + color::Vector{<:Integer}, + star_set::SMC.StarSet{<:Integer}, +) where {T<:Integer} + group = SMC.group_by_color(T, color) + compressed_indices = SMC.star_csc_indices(ag, color, star_set) + additional_info = (; compressed_indices_gpu_csr=CuVector(compressed_indices)) + return SMC.StarSetColoringResult( + A, ag, color, group, compressed_indices, additional_info + ) end +## Decompression + function update_nzval_from_matrix!( nzVal::AbstractVector, B::AbstractMatrix, compressed_indices::AbstractVector{<:Integer} ) @@ -70,14 +114,30 @@ for R in (:ColumnColoringResult, :RowColoringResult, :StarSetColoringResult) @eval function SMC.decompress!( A::CuSparseMatrixCSC, B::CuMatrix, result::SMC.$R{<:CuSparseMatrixCSC} ) + compressed_indices = result.additional_info.compressed_indices_gpu_csc + A.nnz == 0 && return A + kernel = @cuda launch = false update_nzval_from_matrix!( + A.nzVal, B, compressed_indices + ) + config = launch_configuration(kernel.fun) + threads = min(A.nnz, config.threads) + blocks = cld(A.nnz, threads) + kernel(A.nzVal, B, compressed_indices; threads, blocks) + return A + end + + @eval function SMC.decompress!( + A::CuSparseMatrixCSR, B::CuMatrix, result::SMC.$R{<:CuSparseMatrixCSR} + ) + compressed_indices = result.additional_info.compressed_indices_gpu_csr A.nnz == 0 && return A kernel = @cuda launch = false update_nzval_from_matrix!( - A.nzVal, B, result.compressed_indices + A.nzVal, B, compressed_indices ) config = launch_configuration(kernel.fun) threads = min(A.nnz, config.threads) blocks = cld(A.nnz, threads) - kernel(A.nzVal, B, result.compressed_indices; threads, blocks) + kernel(A.nzVal, B, compressed_indices; threads, blocks) return A end end diff --git a/src/graph.jl b/src/graph.jl index 56d27d4c..9b53a36d 100644 --- a/src/graph.jl +++ b/src/graph.jl @@ -345,8 +345,6 @@ end Base.eltype(::BipartiteGraph{T}) where {T} = T -Base.transpose(bg::BipartiteGraph) = BipartiteGraph(bg.S2, bg.S1) - function BipartiteGraph(A::AbstractMatrix; symmetric_pattern::Bool=false) return BipartiteGraph(SparseMatrixCSC(A); symmetric_pattern) end diff --git a/src/result.jl b/src/result.jl index 5e21d425..b5b9cdd2 100644 --- a/src/result.jl +++ b/src/result.jl @@ -152,6 +152,7 @@ struct ColumnColoringResult{ CT<:AbstractVector{T}, GT<:AbstractGroups{T}, VT<:AbstractVector{T}, + A, } <: AbstractColoringResult{:nonsymmetric,:column,:direct} "matrix that was colored" A::M @@ -161,15 +162,22 @@ struct ColumnColoringResult{ color::CT "color groups for columns or rows (depending on `partition`)" group::GT - "flattened indices mapping the compressed matrix `B` to the uncompressed matrix `A`. When `A isa SparseMatrixCSC`, they satisfy `nonzeros(A)[k] = vec(B)[compressed_indices[k]]`." + "flattened indices mapping the compressed matrix `B` to the uncompressed matrix `A` when `A isa SparseMatrixCSC`. They satisfy `nonzeros(A)[k] = vec(B)[compressed_indices[k]]`" compressed_indices::VT + "optional data used for decompressing into specific matrix types" + additional_info::A end function ColumnColoringResult( A::AbstractMatrix, bg::BipartiteGraph{T}, color::Vector{<:Integer} ) where {T<:Integer} - S = bg.S2 group = group_by_color(T, color) + compressed_indices = column_csc_indices(bg, color) + return ColumnColoringResult(A, bg, color, group, compressed_indices, nothing) +end + +function column_csc_indices(bg::BipartiteGraph{T}, color::Vector{<:Integer}) where {T} + S = bg.S2 n = size(S, 1) rv = rowvals(S) compressed_indices = zeros(T, nnz(S)) @@ -181,7 +189,23 @@ function ColumnColoringResult( compressed_indices[k] = (c - 1) * n + i end end - return ColumnColoringResult(A, bg, color, group, compressed_indices) + return compressed_indices +end + +function column_csr_indices(bg::BipartiteGraph{T}, color::Vector{<:Integer}) where {T} + Sᵀ = bg.S1 # CSC storage of transpose(A) + n = size(Sᵀ, 2) + rv = rowvals(Sᵀ) + compressed_indices = zeros(T, nnz(Sᵀ)) + for i in axes(Sᵀ, 2) + for k in nzrange(Sᵀ, i) + j = rv[k] + c = color[j] + # A[i, j] = B[i, c] + compressed_indices[k] = (c - 1) * n + i + end + end + return compressed_indices end """ @@ -206,20 +230,27 @@ struct RowColoringResult{ CT<:AbstractVector{T}, GT<:AbstractGroups{T}, VT<:AbstractVector{T}, + A, } <: AbstractColoringResult{:nonsymmetric,:row,:direct} A::M bg::G color::CT group::GT compressed_indices::VT + additional_info::A end function RowColoringResult( A::AbstractMatrix, bg::BipartiteGraph{T}, color::Vector{<:Integer} ) where {T<:Integer} - S = bg.S2 group = group_by_color(T, color) - C = length(group) # ncolors + compressed_indices = row_csc_indices(bg, color) + return RowColoringResult(A, bg, color, group, compressed_indices, nothing) +end + +function row_csc_indices(bg::BipartiteGraph{T}, color::Vector{<:Integer}) where {T} + S = bg.S2 + C = maximum(color) # ncolors rv = rowvals(S) compressed_indices = zeros(T, nnz(S)) for j in axes(S, 2) @@ -230,7 +261,23 @@ function RowColoringResult( compressed_indices[k] = (j - 1) * C + c end end - return RowColoringResult(A, bg, color, group, compressed_indices) + return compressed_indices +end + +function row_csr_indices(bg::BipartiteGraph{T}, color::Vector{<:Integer}) where {T} + Sᵀ = bg.S1 # CSC storage of transpose(A) + C = maximum(color) # ncolors + rv = rowvals(Sᵀ) + compressed_indices = zeros(T, nnz(Sᵀ)) + for i in axes(Sᵀ, 2) + for k in nzrange(Sᵀ, i) + j = rv[k] + c = color[i] + # A[i, j] = B[c, j] + compressed_indices[k] = (j - 1) * C + c + end + end + return compressed_indices end """ @@ -255,12 +302,14 @@ struct StarSetColoringResult{ CT<:AbstractVector{T}, GT<:AbstractGroups{T}, VT<:AbstractVector{T}, + A, } <: AbstractColoringResult{:symmetric,:column,:direct} A::M ag::G color::CT group::GT compressed_indices::VT + additional_info::A end function StarSetColoringResult( @@ -269,11 +318,18 @@ function StarSetColoringResult( color::Vector{<:Integer}, star_set::StarSet{<:Integer}, ) where {T<:Integer} + group = group_by_color(T, color) + compressed_indices = star_csc_indices(ag, color, star_set) + return StarSetColoringResult(A, ag, color, group, compressed_indices, nothing) +end + +function star_csc_indices( + ag::AdjacencyGraph{T}, color::Vector{<:Integer}, star_set +) where {T} (; star, hub) = star_set S = pattern(ag) edge_to_index = edge_indices(ag) n = S.n - group = group_by_color(T, color) rvS = rowvals(S) compressed_indices = zeros(T, nnz(S)) # needs to be independent from the storage in the graph, in case the graph gets reused for j in axes(S, 2) @@ -302,8 +358,7 @@ function StarSetColoringResult( end end end - - return StarSetColoringResult(A, ag, color, group, compressed_indices) + return compressed_indices end """ diff --git a/test/cuda.jl b/test/cuda.jl index 280044db..514a764b 100644 --- a/test/cuda.jl +++ b/test/cuda.jl @@ -5,6 +5,8 @@ using SparseMatrixColorings using StableRNGs using Test +include("utils.jl") + rng = StableRNG(63) asymmetric_params = vcat( @@ -19,10 +21,10 @@ symmetric_params = vcat( [(100, p) for p in (0.01:0.02:0.05)], ) -@testset "Column coloring & decompression" begin +@testset verbose = true "Column coloring & decompression" begin problem = ColoringProblem(; structure=:nonsymmetric, partition=:column) algo = GreedyColoringAlgorithm(; decompression=:direct) - @testset for T in (CuSparseMatrixCSC,) + @testset for T in (CuSparseMatrixCSC, CuSparseMatrixCSR) @testset "$((; m, n, p))" for (m, n, p) in asymmetric_params A0 = T(sprand(rng, m, n, p)) test_coloring_decompression(A0, problem, algo; gpu=true) @@ -30,10 +32,10 @@ symmetric_params = vcat( end end; -@testset "Row coloring & decompression" begin +@testset verbose = true "Row coloring & decompression" begin problem = ColoringProblem(; structure=:nonsymmetric, partition=:row) algo = GreedyColoringAlgorithm(; decompression=:direct) - @testset for T in (CuSparseMatrixCSC,) + @testset for T in (CuSparseMatrixCSC, CuSparseMatrixCSR) @testset "$((; m, n, p))" for (m, n, p) in asymmetric_params A0 = T(sprand(rng, m, n, p)) test_coloring_decompression(A0, problem, algo; gpu=true) @@ -41,10 +43,10 @@ end; end end; -@testset "Symmetric coloring & direct decompression" begin +@testset verbose = true "Symmetric coloring & direct decompression" begin problem = ColoringProblem(; structure=:symmetric, partition=:column) algo = GreedyColoringAlgorithm(; postprocessing=false, decompression=:direct) - @testset for T in (CuSparseMatrixCSC,) + @testset for T in (CuSparseMatrixCSC, CuSparseMatrixCSR) @testset "$((; n, p))" for (n, p) in symmetric_params A0 = T(sparse(Symmetric(sprand(rng, n, n, p)))) test_coloring_decompression(A0, problem, algo; gpu=true) From 82e11a0005e373b7a53e6503ed90d00ac73ded4b Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Tue, 10 Jun 2025 14:51:28 +0200 Subject: [PATCH 11/16] Exclude kernel from coverage --- ext/SparseMatrixColoringsCUDAExt.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ext/SparseMatrixColoringsCUDAExt.jl b/ext/SparseMatrixColoringsCUDAExt.jl index 523bd68f..e362896b 100644 --- a/ext/SparseMatrixColoringsCUDAExt.jl +++ b/ext/SparseMatrixColoringsCUDAExt.jl @@ -98,6 +98,7 @@ end ## Decompression +# COV_EXCL_START function update_nzval_from_matrix!( nzVal::AbstractVector, B::AbstractMatrix, compressed_indices::AbstractVector{<:Integer} ) @@ -108,6 +109,7 @@ function update_nzval_from_matrix!( end return nothing end +# COV_EXCL_STOP for R in (:ColumnColoringResult, :RowColoringResult, :StarSetColoringResult) # loop to avoid method ambiguity From 8de25932bd0ef97445c11cc820845e101071b9a2 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Tue, 10 Jun 2025 18:57:08 +0200 Subject: [PATCH 12/16] Remove kernel --- ext/SparseMatrixColoringsCUDAExt.jl | 34 +++-------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/ext/SparseMatrixColoringsCUDAExt.jl b/ext/SparseMatrixColoringsCUDAExt.jl index e362896b..36623a26 100644 --- a/ext/SparseMatrixColoringsCUDAExt.jl +++ b/ext/SparseMatrixColoringsCUDAExt.jl @@ -2,8 +2,7 @@ module SparseMatrixColoringsCUDAExt import SparseMatrixColorings as SMC using SparseArrays: SparseMatrixCSC, rowvals, nnz, nzrange -using CUDA: - @cuda, CuVector, CuMatrix, blockIdx, blockDim, gridDim, threadIdx, launch_configuration +using CUDA: CuVector, CuMatrix using CUDA.CUSPARSE: AbstractCuSparseMatrix, CuSparseMatrixCSC, CuSparseMatrixCSR SMC.matrix_versions(A::AbstractCuSparseMatrix) = (A,) @@ -98,33 +97,13 @@ end ## Decompression -# COV_EXCL_START -function update_nzval_from_matrix!( - nzVal::AbstractVector, B::AbstractMatrix, compressed_indices::AbstractVector{<:Integer} -) - index = (blockIdx().x - 1) * blockDim().x + threadIdx().x - stride = gridDim().x * blockDim().x - for k in index:stride:length(nzVal) - nzVal[k] = B[compressed_indices[k]] - end - return nothing -end -# COV_EXCL_STOP - for R in (:ColumnColoringResult, :RowColoringResult, :StarSetColoringResult) # loop to avoid method ambiguity @eval function SMC.decompress!( A::CuSparseMatrixCSC, B::CuMatrix, result::SMC.$R{<:CuSparseMatrixCSC} ) compressed_indices = result.additional_info.compressed_indices_gpu_csc - A.nnz == 0 && return A - kernel = @cuda launch = false update_nzval_from_matrix!( - A.nzVal, B, compressed_indices - ) - config = launch_configuration(kernel.fun) - threads = min(A.nnz, config.threads) - blocks = cld(A.nnz, threads) - kernel(A.nzVal, B, compressed_indices; threads, blocks) + map!(Base.Fix1(getindex, B), A.nzVal, compressed_indices) return A end @@ -132,14 +111,7 @@ for R in (:ColumnColoringResult, :RowColoringResult, :StarSetColoringResult) A::CuSparseMatrixCSR, B::CuMatrix, result::SMC.$R{<:CuSparseMatrixCSR} ) compressed_indices = result.additional_info.compressed_indices_gpu_csr - A.nnz == 0 && return A - kernel = @cuda launch = false update_nzval_from_matrix!( - A.nzVal, B, compressed_indices - ) - config = launch_configuration(kernel.fun) - threads = min(A.nnz, config.threads) - blocks = cld(A.nnz, threads) - kernel(A.nzVal, B, compressed_indices; threads, blocks) + map!(Base.Fix1(getindex, B), A.nzVal, compressed_indices) return A end end From 25b1dc7aa41fca8e11ec17c62222581829032a37 Mon Sep 17 00:00:00 2001 From: Alexis Montoison <35051714+amontoison@users.noreply.github.com> Date: Wed, 11 Jun 2025 00:07:42 -0500 Subject: [PATCH 13/16] Update .github/workflows/Test-GPU.yml --- .github/workflows/Test-GPU.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Test-GPU.yml b/.github/workflows/Test-GPU.yml index 0d1d8d2c..3165c49c 100644 --- a/.github/workflows/Test-GPU.yml +++ b/.github/workflows/Test-GPU.yml @@ -43,4 +43,4 @@ jobs: with: files: lcov.info token: ${{ secrets.CODECOV_TOKEN }} - fail_ci_if_error: false \ No newline at end of file + fail_ci_if_error: false From 998d4dbb71ae324470939f4f54e9cb1b0527539a Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Wed, 11 Jun 2025 23:19:02 +0200 Subject: [PATCH 14/16] Use `copyto!` of a `view` --- ext/SparseMatrixColoringsCUDAExt.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/SparseMatrixColoringsCUDAExt.jl b/ext/SparseMatrixColoringsCUDAExt.jl index 36623a26..1c94e4b6 100644 --- a/ext/SparseMatrixColoringsCUDAExt.jl +++ b/ext/SparseMatrixColoringsCUDAExt.jl @@ -103,7 +103,7 @@ for R in (:ColumnColoringResult, :RowColoringResult, :StarSetColoringResult) A::CuSparseMatrixCSC, B::CuMatrix, result::SMC.$R{<:CuSparseMatrixCSC} ) compressed_indices = result.additional_info.compressed_indices_gpu_csc - map!(Base.Fix1(getindex, B), A.nzVal, compressed_indices) + copyto!(A.nzVal, view(B, compressed_indices)) return A end @@ -111,7 +111,7 @@ for R in (:ColumnColoringResult, :RowColoringResult, :StarSetColoringResult) A::CuSparseMatrixCSR, B::CuMatrix, result::SMC.$R{<:CuSparseMatrixCSR} ) compressed_indices = result.additional_info.compressed_indices_gpu_csr - map!(Base.Fix1(getindex, B), A.nzVal, compressed_indices) + copyto!(A.nzVal, view(B, compressed_indices)) return A end end From dd94814cb4934241627809c24b0a1ff5d65b4073 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Thu, 12 Jun 2025 00:07:22 +0200 Subject: [PATCH 15/16] Add error --- ext/SparseMatrixColoringsCUDAExt.jl | 24 ++++++++++++++++++++++-- src/decompression.jl | 8 ++++++++ test/cuda.jl | 7 +++++++ test/result.jl | 7 ++++++- 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/ext/SparseMatrixColoringsCUDAExt.jl b/ext/SparseMatrixColoringsCUDAExt.jl index 1c94e4b6..7b6eb647 100644 --- a/ext/SparseMatrixColoringsCUDAExt.jl +++ b/ext/SparseMatrixColoringsCUDAExt.jl @@ -100,16 +100,36 @@ end for R in (:ColumnColoringResult, :RowColoringResult, :StarSetColoringResult) # loop to avoid method ambiguity @eval function SMC.decompress!( - A::CuSparseMatrixCSC, B::CuMatrix, result::SMC.$R{<:CuSparseMatrixCSC} + A::CuSparseMatrixCSC, + B::CuMatrix, + result::SMC.$R{<:CuSparseMatrixCSC}, + uplo::Symbol=:F, ) + if uplo != :F + throw( + SMC.UnsupportedDecompressionError( + "Single-triangle decompression is not supported on GPU matrices" + ), + ) + end compressed_indices = result.additional_info.compressed_indices_gpu_csc copyto!(A.nzVal, view(B, compressed_indices)) return A end @eval function SMC.decompress!( - A::CuSparseMatrixCSR, B::CuMatrix, result::SMC.$R{<:CuSparseMatrixCSR} + A::CuSparseMatrixCSR, + B::CuMatrix, + result::SMC.$R{<:CuSparseMatrixCSR}, + uplo::Symbol=:F, ) + if uplo != :F + throw( + SMC.UnsupportedDecompressionError( + "Single-triangle decompression is not supported on GPU matrices" + ), + ) + end compressed_indices = result.additional_info.compressed_indices_gpu_csr copyto!(A.nzVal, view(B, compressed_indices)) return A diff --git a/src/decompression.jl b/src/decompression.jl index bc04394c..a3dd471a 100644 --- a/src/decompression.jl +++ b/src/decompression.jl @@ -106,6 +106,14 @@ function compress( return Br, Bc end +struct UnsupportedDecompressionError + msg::String +end + +function Base.showerror(io::IO, err::UnsupportedDecompressionError) + return print(io, "UnsupportedDecompressionError: $(err.msg)") +end + """ decompress(B::AbstractMatrix, result::AbstractColoringResult{_,:column/:row}) decompress(Br::AbstractMatrix, Bc::AbstractMatrix, result::AbstractColoringResult{_,:bidirectional}) diff --git a/test/cuda.jl b/test/cuda.jl index 514a764b..6549b168 100644 --- a/test/cuda.jl +++ b/test/cuda.jl @@ -2,6 +2,7 @@ using CUDA.CUSPARSE: CuSparseMatrixCSC, CuSparseMatrixCSR using LinearAlgebra using SparseArrays using SparseMatrixColorings +import SparseMatrixColorings as SMC using StableRNGs using Test @@ -51,5 +52,11 @@ end; A0 = T(sparse(Symmetric(sprand(rng, n, n, p)))) test_coloring_decompression(A0, problem, algo; gpu=true) end + A0 = T(sparse(Diagonal(ones(10)))) + result = coloring(A0, problem, algo) + B = compress(A0, result) + @test_throws SMC.UnsupportedDecompressionError decompress!( + similar(A0), B, result, :U + ) end end; diff --git a/test/result.jl b/test/result.jl index 661297f0..0bea3353 100644 --- a/test/result.jl +++ b/test/result.jl @@ -1,4 +1,4 @@ -using SparseMatrixColorings: group_by_color +using SparseMatrixColorings: group_by_color, UnsupportedDecompressionError using Test @testset "Group by color" begin @@ -31,3 +31,8 @@ end B = compress(A, coloring(A, problem, algo)) @test size(B, 1) == 0 end + +@testset "Errors" begin + e = SparseMatrixColorings.UnsupportedDecompressionError("hello") + @test sprint(showerror, e) == "UnsupportedDecompressionError: hello" +end From 5d9e198509ebac8be04ff3d2e01b0652b28ea5e3 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Mon, 16 Jun 2025 07:53:00 +0200 Subject: [PATCH 16/16] No uplo for row or column --- ext/SparseMatrixColoringsCUDAExt.jl | 62 ++++++++++++++++++----------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/ext/SparseMatrixColoringsCUDAExt.jl b/ext/SparseMatrixColoringsCUDAExt.jl index 7b6eb647..2750964d 100644 --- a/ext/SparseMatrixColoringsCUDAExt.jl +++ b/ext/SparseMatrixColoringsCUDAExt.jl @@ -97,43 +97,59 @@ end ## Decompression -for R in (:ColumnColoringResult, :RowColoringResult, :StarSetColoringResult) +for R in (:ColumnColoringResult, :RowColoringResult) # loop to avoid method ambiguity @eval function SMC.decompress!( - A::CuSparseMatrixCSC, - B::CuMatrix, - result::SMC.$R{<:CuSparseMatrixCSC}, - uplo::Symbol=:F, + A::CuSparseMatrixCSC, B::CuMatrix, result::SMC.$R{<:CuSparseMatrixCSC} ) - if uplo != :F - throw( - SMC.UnsupportedDecompressionError( - "Single-triangle decompression is not supported on GPU matrices" - ), - ) - end compressed_indices = result.additional_info.compressed_indices_gpu_csc copyto!(A.nzVal, view(B, compressed_indices)) return A end @eval function SMC.decompress!( - A::CuSparseMatrixCSR, - B::CuMatrix, - result::SMC.$R{<:CuSparseMatrixCSR}, - uplo::Symbol=:F, + A::CuSparseMatrixCSR, B::CuMatrix, result::SMC.$R{<:CuSparseMatrixCSR} ) - if uplo != :F - throw( - SMC.UnsupportedDecompressionError( - "Single-triangle decompression is not supported on GPU matrices" - ), - ) - end compressed_indices = result.additional_info.compressed_indices_gpu_csr copyto!(A.nzVal, view(B, compressed_indices)) return A end end +function SMC.decompress!( + A::CuSparseMatrixCSC, + B::CuMatrix, + result::SMC.StarSetColoringResult{<:CuSparseMatrixCSC}, + uplo::Symbol=:F, +) + if uplo != :F + throw( + SMC.UnsupportedDecompressionError( + "Single-triangle decompression is not supported on GPU matrices" + ), + ) + end + compressed_indices = result.additional_info.compressed_indices_gpu_csc + copyto!(A.nzVal, view(B, compressed_indices)) + return A +end + +function SMC.decompress!( + A::CuSparseMatrixCSR, + B::CuMatrix, + result::SMC.StarSetColoringResult{<:CuSparseMatrixCSR}, + uplo::Symbol=:F, +) + if uplo != :F + throw( + SMC.UnsupportedDecompressionError( + "Single-triangle decompression is not supported on GPU matrices" + ), + ) + end + compressed_indices = result.additional_info.compressed_indices_gpu_csr + copyto!(A.nzVal, view(B, compressed_indices)) + return A +end + end