@@ -488,42 +488,114 @@ def prox_vec(w, z, penalty, lipschitz):
488488 return w
489489
490490
491+ POWER_MAX_ITER = 20
492+ TOL = 1e-6 ** 2
493+
494+
491495@njit
492- def power_method (X_data , X_indptr , X_indices , n_iter ):
493- """Power method to compute largest eigenvalue of sparse matrix X.
494-
496+ def spectral_norm2 (X_data , X_indptr , X_indices , n_samples ):
497+ """Compute the squared spectral norm of sparse matrix ``X``.
498+
499+ Find the largest eigenvalue of ``X @ X.T`` using the power method.
500+
495501 Parameters
496502 ----------
497503 X_data : array, shape (n_elements,)
498- `data` attribute of the sparse CSC matrix X .
504+ `data` attribute of the sparse CSC matrix ``X`` .
499505
500506 X_indptr : array, shape (n_features + 1,)
501- `indptr` attribute of the sparse CSC matrix X .
507+ `indptr` attribute of the sparse CSC matrix ``X`` .
502508
503509 X_indices : array, shape (n_elements,)
504- `indices` attribute of the sparse CSC matrix X .
505-
506- n_iter : int
507- Number of iterations for the power method .
508-
510+ `indices` attribute of the sparse CSC matrix ``X`` .
511+
512+ n_samples : int
513+ number of rows of ``X`` .
514+
509515 Returns
510516 -------
511- rayleigh : float
512- Rayleigh quotient or spectral radius of X^TX
517+ eigenvalue : float
518+ The largest eigenvalue value of ``X.T @ X``, aka the squared spectral of ``X``.
519+
520+ References
521+ ----------
522+ .. [1] Alfio Quarteroni, Riccardo Sacco, Fausto Saleri "Numerical Mathematics",
523+ chapiter 5, page 192-195.
513524 """
525+ # init vec with norm(vec) == 1.
526+ eigenvector = np .full (n_samples , 1 / np .sqrt (n_samples ))
527+ eigenvalue = 1.
528+
529+ for _ in range (POWER_MAX_ITER ):
530+ vec = _XXT_dot_vec (X_data , X_indptr , X_indices , eigenvector , n_samples )
531+ norm_vec = norm (vec )
532+ eigenvalue = vec @ eigenvector
533+
534+ # norm(X @ X.T @ eigenvector - eigenvalue * eigenvector) <= tol
535+ if norm_vec ** 2 - eigenvalue ** 2 <= TOL :
536+ break
537+
538+ eigenvector = vec / norm_vec
539+
540+ return eigenvalue
541+
542+
543+ @njit
544+ def _XXT_dot_vec (X_data , X_indptr , X_indices , vec , n_samples ):
545+ # computes X @ X.T @ vec, with X csc encoded
546+ return _X_dot_vec (X_data , X_indptr , X_indices ,
547+ _XT_dot_vec (X_data , X_indptr , X_indices , vec ),
548+ n_samples )
549+
550+
551+ @njit
552+ def _X_dot_vec (X_data , X_indptr , X_indices , vec , n_samples ):
553+ # compute X @ vec, with X csc encoded
554+ result = np .zeros (n_samples )
555+
556+ # loop over features
557+ for j in range (len (X_indptr ) - 1 ):
558+ if vec [j ] == 0 :
559+ continue
560+
561+ col_j_rows_idx = slice (X_indptr [j ], X_indptr [j + 1 ])
562+ result [X_indices [col_j_rows_idx ]] += vec [j ] * X_data [col_j_rows_idx ]
563+
564+ return result
565+
566+
567+ @njit
568+ def _XT_dot_vec (X_data , X_indptr , X_indices , vec ):
569+ # compute X.T @ vec, with X csc encoded
514570 n_features = len (X_indptr ) - 1
515- b_k = np .random .rand (n_features )
516- for _ in range (n_iter ):
517- b_k1 = np .zeros (n_features )
518- for j in range (n_features ):
519- b_k1 [j ] =
520- b_k1 = A @ b_k # write the full loop
521- b_k1_nrm = norm (b_k1 )
522- b_k = b_k1 / b_k1_nrm
523- rayleigh = b_k1 @ b_k / (norm (b_k ) ** 2 )
524- return rayleigh
571+ result = np .zeros (n_features )
572+
573+ for j in range (n_features ):
574+ for idx in range (X_indptr [j ], X_indptr [j + 1 ]):
575+ result [j ] += X_data [idx ] * vec [X_indices [idx ]]
576+
577+ return result
578+
525579
580+ if __name__ == '__main__' :
581+ from scipy .sparse import csc_matrix , random
582+ import time
583+ n_samples , n_features = 500 , 600
584+ A = random (n_samples , n_features , density = 0.5 , format = 'csc' )
526585
586+ X = csc_matrix (A )
587+ X_dense = X .toarray ()
527588
589+ # cache numba compilation
590+ M = random (5 , 7 , density = 0.9 , format = 'csc' )
591+ spectral_norm2 (M .data , M .indptr , M .indices , 5 )
528592
593+ start = time .time ()
594+ spectral_norm2 (X .data , X .indptr , X .indices , n_samples )
595+ end = time .time ()
596+ print ("our: " , end - start )
529597
598+ start = time .time ()
599+ norm (X_dense , ord = 2 ) ** 2
600+ end = time .time ()
601+ print ("np: " , end - start )
0 commit comments