symrcm.cc

Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (C) 2007-2012 Michael Weitzel
00004 
00005 This file is part of Octave.
00006 
00007 Octave is free software; you can redistribute it and/or modify it
00008 under the terms of the GNU General Public License as published by the
00009 Free Software Foundation; either version 3 of the License, or (at your
00010 option) any later version.
00011 
00012 Octave is distributed in the hope that it will be useful, but WITHOUT
00013 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00014 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
00015 for more details.
00016 
00017 You should have received a copy of the GNU General Public License
00018 along with Octave; see the file COPYING.  If not, see
00019 <http://www.gnu.org/licenses/>.
00020 
00021 */
00022 
00023 /*
00024 An implementation of the Reverse Cuthill-McKee algorithm (symrcm)
00025 
00026 The implementation of this algorithm is based in the descriptions found in
00027 
00028 @INPROCEEDINGS{,
00029         author = {E. Cuthill and J. McKee},
00030         title = {Reducing the Bandwidth of Sparse Symmetric Matrices},
00031         booktitle = {Proceedings of the 24th ACM National Conference},
00032         publisher = {Brandon Press},
00033         pages = {157 -- 172},
00034         location = {New Jersey},
00035         year = {1969}
00036 }
00037 
00038 @BOOK{,
00039         author = {Alan George and Joseph W. H. Liu},
00040         title = {Computer Solution of Large Sparse Positive Definite Systems},
00041         publisher = {Prentice Hall Series in Computational Mathematics},
00042         ISBN = {0-13-165274-5},
00043         year = {1981}
00044 }
00045 
00046 The algorithm represents a heuristic approach to the NP-complete minimum
00047 bandwidth problem.
00048 
00049 Written by Michael Weitzel <michael.weitzel@@uni-siegen.de>
00050                            <weitzel@@ldknet.org>
00051 */
00052 
00053 #ifdef HAVE_CONFIG_H
00054 #include <config.h>
00055 #endif
00056 
00057 #include "ov.h"
00058 #include "defun-dld.h"
00059 #include "error.h"
00060 #include "gripes.h"
00061 #include "utils.h"
00062 #include "oct-locbuf.h"
00063 
00064 #include "ov-re-mat.h"
00065 #include "ov-re-sparse.h"
00066 #include "ov-cx-sparse.h"
00067 #include "oct-sparse.h"
00068 
00069 // A node struct for the Cuthill-McKee algorithm
00070 struct CMK_Node
00071 {
00072   // the node's id (matrix row index)
00073   octave_idx_type id;
00074   // the node's degree
00075   octave_idx_type deg;
00076   // minimal distance to the root of the spanning tree
00077   octave_idx_type dist;
00078 };
00079 
00080 // A simple queue.
00081 // Queues Q have a fixed maximum size N (rows,cols of the matrix) and are
00082 // stored in an array. qh and qt point to queue head and tail.
00083 
00084 // Enqueue operation (adds a node "o" at the tail)
00085 
00086 inline static void
00087 Q_enq (CMK_Node *Q, octave_idx_type N, octave_idx_type& qt, const CMK_Node& o)
00088 {
00089   Q[qt] = o;
00090   qt = (qt + 1) % (N + 1);
00091 }
00092 
00093 // Dequeue operation (removes a node from the head)
00094 
00095 inline static CMK_Node
00096 Q_deq (CMK_Node * Q, octave_idx_type N, octave_idx_type& qh)
00097 {
00098   CMK_Node r = Q[qh];
00099   qh = (qh + 1) % (N + 1);
00100   return r;
00101 }
00102 
00103 // Predicate (queue empty)
00104 #define Q_empty(Q, N, qh, qt)   ((qh) == (qt))
00105 
00106 // A simple, array-based binary heap (used as a priority queue for nodes)
00107 
00108 // the left descendant of entry i
00109 #define LEFT(i)         (((i) << 1) + 1)        // = (2*(i)+1)
00110 // the right descendant of entry i
00111 #define RIGHT(i)        (((i) << 1) + 2)        // = (2*(i)+2)
00112 // the parent of entry i
00113 #define PARENT(i)       (((i) - 1) >> 1)        // = floor(((i)-1)/2)
00114 
00115 // Builds a min-heap (the root contains the smallest element). A is an array
00116 // with the graph's nodes, i is a starting position, size is the length of A.
00117 
00118 static void
00119 H_heapify_min (CMK_Node *A, octave_idx_type i, octave_idx_type size)
00120 {
00121   octave_idx_type j = i;
00122   for (;;)
00123     {
00124       octave_idx_type l = LEFT(j);
00125       octave_idx_type r = RIGHT(j);
00126 
00127       octave_idx_type smallest;
00128       if (l < size && A[l].deg < A[j].deg)
00129         smallest = l;
00130       else
00131         smallest = j;
00132 
00133       if (r < size && A[r].deg < A[smallest].deg)
00134         smallest = r;
00135 
00136       if (smallest != j)
00137         {
00138           CMK_Node tmp = A[j];
00139           A[j] = A[smallest];
00140           A[smallest] = tmp;
00141           j = smallest;
00142         }
00143       else
00144         break;
00145     }
00146 }
00147 
00148 // Heap operation insert. Running time is O(log(n))
00149 
00150 static void
00151 H_insert (CMK_Node *H, octave_idx_type& h, const CMK_Node& o)
00152 {
00153   octave_idx_type i = h++;
00154 
00155   H[i] = o;
00156 
00157   if (i == 0)
00158     return;
00159   do
00160     {
00161       octave_idx_type p = PARENT(i);
00162       if (H[i].deg < H[p].deg)
00163         {
00164           CMK_Node tmp = H[i];
00165           H[i] = H[p];
00166           H[p] = tmp;
00167 
00168           i = p;
00169         }
00170       else
00171         break;
00172     }
00173   while (i > 0);
00174 }
00175 
00176 // Heap operation remove-min. Removes the smalles element in O(1) and
00177 // reorganizes the heap optionally in O(log(n))
00178 
00179 inline static CMK_Node
00180 H_remove_min (CMK_Node *H, octave_idx_type& h, int reorg/*=1*/)
00181 {
00182   CMK_Node r = H[0];
00183   H[0] = H[--h];
00184   if (reorg)
00185     H_heapify_min(H, 0, h);
00186   return r;
00187 }
00188 
00189 // Predicate (heap empty)
00190 #define H_empty(H, h)   ((h) == 0)
00191 
00192 // Helper function for the Cuthill-McKee algorithm. Tries to determine a
00193 // pseudo-peripheral node of the graph as starting node.
00194 
00195 static octave_idx_type
00196 find_starting_node (octave_idx_type N, const octave_idx_type *ridx,
00197                     const octave_idx_type *cidx, const octave_idx_type *ridx2,
00198                     const octave_idx_type *cidx2, octave_idx_type *D,
00199                     octave_idx_type start)
00200 {
00201   CMK_Node w;
00202 
00203   OCTAVE_LOCAL_BUFFER (CMK_Node, Q, N+1);
00204   boolNDArray btmp (dim_vector (1, N), false);
00205   bool *visit = btmp.fortran_vec ();
00206 
00207   octave_idx_type qh = 0;
00208   octave_idx_type qt = 0;
00209   CMK_Node x;
00210   x.id = start;
00211   x.deg = D[start];
00212   x.dist = 0;
00213   Q_enq (Q, N, qt, x);
00214   visit[start] = true;
00215 
00216   // distance level
00217   octave_idx_type level = 0;
00218   // current largest "eccentricity"
00219   octave_idx_type max_dist = 0;
00220 
00221   for (;;)
00222     {
00223       while (! Q_empty (Q, N, qh, qt))
00224         {
00225           CMK_Node v = Q_deq (Q, N, qh);
00226 
00227           if (v.dist > x.dist || (v.id != x.id && v.deg > x.deg))
00228             x = v;
00229 
00230           octave_idx_type i = v.id;
00231 
00232           // add all unvisited neighbors to the queue
00233           octave_idx_type j1 = cidx[i];
00234           octave_idx_type j2 = cidx2[i];
00235           while (j1 < cidx[i+1] || j2 < cidx2[i+1])
00236             {
00237               OCTAVE_QUIT;
00238 
00239               if (j1 == cidx[i+1])
00240                 {
00241                   octave_idx_type r2 = ridx2[j2++];
00242                   if (! visit[r2])
00243                     {
00244                       // the distance of node j is dist(i)+1
00245                       w.id = r2;
00246                       w.deg = D[r2];
00247                       w.dist = v.dist+1;
00248                       Q_enq (Q, N, qt, w);
00249                       visit[r2] = true;
00250 
00251                       if (w.dist > level)
00252                         level = w.dist;
00253                     }
00254                 }
00255               else if (j2 == cidx2[i+1])
00256                 {
00257                   octave_idx_type r1 = ridx[j1++];
00258                   if (! visit[r1])
00259                     {
00260                       // the distance of node j is dist(i)+1
00261                       w.id = r1;
00262                       w.deg = D[r1];
00263                       w.dist = v.dist+1;
00264                       Q_enq (Q, N, qt, w);
00265                       visit[r1] = true;
00266 
00267                       if (w.dist > level)
00268                         level = w.dist;
00269                     }
00270                 }
00271               else
00272                 {
00273                   octave_idx_type r1 = ridx[j1];
00274                   octave_idx_type r2 = ridx2[j2];
00275                   if (r1 <= r2)
00276                     {
00277                       if (! visit[r1])
00278                         {
00279                           w.id = r1;
00280                           w.deg = D[r1];
00281                           w.dist = v.dist+1;
00282                           Q_enq (Q, N, qt, w);
00283                           visit[r1] = true;
00284 
00285                           if (w.dist > level)
00286                             level = w.dist;
00287                         }
00288                       j1++;
00289                       if (r1 == r2)
00290                         j2++;
00291                     }
00292                   else
00293                     {
00294                       if (! visit[r2])
00295                         {
00296                           w.id = r2;
00297                           w.deg = D[r2];
00298                           w.dist = v.dist+1;
00299                           Q_enq (Q, N, qt, w);
00300                           visit[r2] = true;
00301 
00302                           if (w.dist > level)
00303                             level = w.dist;
00304                         }
00305                       j2++;
00306                     }
00307                 }
00308             }
00309         } // finish of BFS
00310 
00311       if (max_dist < x.dist)
00312         {
00313           max_dist = x.dist;
00314 
00315           for (octave_idx_type i = 0; i < N; i++)
00316             visit[i] = false;
00317 
00318           visit[x.id] = true;
00319           x.dist = 0;
00320           qt = qh = 0;
00321           Q_enq (Q, N, qt, x);
00322         }
00323       else
00324         break;
00325     }
00326   return x.id;
00327 }
00328 
00329 // Calculates the node's degrees. This means counting the non-zero elements
00330 // in the symmetric matrix' rows. This works for non-symmetric matrices
00331 // as well.
00332 
00333 static octave_idx_type
00334 calc_degrees (octave_idx_type N, const octave_idx_type *ridx,
00335               const octave_idx_type *cidx, octave_idx_type *D)
00336 {
00337   octave_idx_type max_deg = 0;
00338 
00339   for (octave_idx_type i = 0; i < N; i++)
00340     D[i] = 0;
00341 
00342   for (octave_idx_type j = 0; j < N; j++)
00343     {
00344       for (octave_idx_type i = cidx[j]; i < cidx[j+1]; i++)
00345         {
00346           OCTAVE_QUIT;
00347           octave_idx_type k = ridx[i];
00348           // there is a non-zero element (k,j)
00349           D[k]++;
00350           if (D[k] > max_deg)
00351             max_deg = D[k];
00352           // if there is no element (j,k) there is one in
00353           // the symmetric matrix:
00354           if (k != j)
00355             {
00356               bool found = false;
00357               for (octave_idx_type l = cidx[k]; l < cidx[k + 1]; l++)
00358                 {
00359                   OCTAVE_QUIT;
00360 
00361                   if (ridx[l] == j)
00362                     {
00363                       found = true;
00364                       break;
00365                     }
00366                   else if (ridx[l] > j)
00367                     break;
00368                 }
00369 
00370               if (! found)
00371                 {
00372                   // A(j,k) == 0
00373                   D[j]++;
00374                   if (D[j] > max_deg)
00375                     max_deg = D[j];
00376                 }
00377             }
00378         }
00379     }
00380   return max_deg;
00381 }
00382 
00383 // Transpose of the structure of a square sparse matrix
00384 
00385 static void
00386 transpose (octave_idx_type N, const octave_idx_type *ridx,
00387            const octave_idx_type *cidx, octave_idx_type *ridx2,
00388            octave_idx_type *cidx2)
00389 {
00390   octave_idx_type nz = cidx[N];
00391 
00392   OCTAVE_LOCAL_BUFFER (octave_idx_type, w, N + 1);
00393   for (octave_idx_type i = 0; i < N; i++)
00394     w[i] = 0;
00395   for (octave_idx_type i = 0; i < nz; i++)
00396     w[ridx[i]]++;
00397   nz = 0;
00398   for (octave_idx_type i = 0; i < N; i++)
00399     {
00400       OCTAVE_QUIT;
00401       cidx2[i] = nz;
00402       nz += w[i];
00403       w[i] = cidx2[i];
00404     }
00405   cidx2[N] = nz;
00406   w[N] = nz;
00407 
00408   for (octave_idx_type j = 0; j < N; j++)
00409     for (octave_idx_type k = cidx[j]; k < cidx[j + 1]; k++)
00410       {
00411         OCTAVE_QUIT;
00412         octave_idx_type q = w [ridx[k]]++;
00413         ridx2[q] = j;
00414       }
00415 }
00416 
00417 // An implementation of the Cuthill-McKee algorithm.
00418 DEFUN_DLD (symrcm, args, ,
00419   "-*- texinfo -*-\n\
00420 @deftypefn {Loadable Function} {@var{p} =} symrcm (@var{S})\n\
00421 Return the symmetric reverse Cuthill-McKee permutation of @var{S}.\n\
00422 @var{p} is a permutation vector such that\n\
00423 @code{@var{S}(@var{p}, @var{p})} tends to have its diagonal elements\n\
00424 closer to the diagonal than @var{S}.  This is a good preordering for LU\n\
00425 or Cholesky@tie{}factorization of matrices that come from 'long, skinny'\n\
00426 problems.  It works for both symmetric and asymmetric @var{S}.\n\
00427 \n\
00428 The algorithm represents a heuristic approach to the NP-complete\n\
00429 bandwidth minimization problem.  The implementation is based in the\n\
00430 descriptions found in\n\
00431 \n\
00432 E. Cuthill, J. McKee. @cite{Reducing the Bandwidth of Sparse Symmetric\n\
00433 Matrices}. Proceedings of the 24th ACM National Conference, 157--172\n\
00434 1969, Brandon Press, New Jersey.\n\
00435 \n\
00436 A. George, J.W.H. Liu. @cite{Computer Solution of Large Sparse\n\
00437 Positive Definite Systems}, Prentice Hall Series in Computational\n\
00438 Mathematics, ISBN 0-13-165274-5, 1981.\n\
00439 \n\
00440 @seealso{colperm, colamd, symamd}\n\
00441 @end deftypefn")
00442 {
00443   octave_value retval;
00444   int nargin = args.length ();
00445 
00446   if (nargin != 1)
00447     {
00448       print_usage ();
00449       return retval;
00450     }
00451 
00452   octave_value arg = args(0);
00453 
00454   // the parameter of the matrix is converted into a sparse matrix
00455   //(if necessary)
00456   octave_idx_type *cidx;
00457   octave_idx_type *ridx;
00458   SparseMatrix Ar;
00459   SparseComplexMatrix Ac;
00460 
00461   if (arg.is_real_type ())
00462     {
00463       Ar = arg.sparse_matrix_value ();
00464       // Note cidx/ridx are const, so use xridx and xcidx...
00465       cidx = Ar.xcidx ();
00466       ridx = Ar.xridx ();
00467     }
00468   else
00469     {
00470       Ac = arg.sparse_complex_matrix_value ();
00471       cidx = Ac.xcidx ();
00472       ridx = Ac.xridx ();
00473     }
00474 
00475   if (error_state)
00476     return retval;
00477 
00478   octave_idx_type nr = arg.rows ();
00479   octave_idx_type nc = arg.columns ();
00480 
00481   if (nr != nc)
00482     {
00483       gripe_square_matrix_required ("symrcm");
00484       return retval;
00485     }
00486 
00487   if (nr == 0 && nc == 0)
00488     return octave_value (NDArray (dim_vector (1, 0)));
00489 
00490   // sizes of the heaps
00491   octave_idx_type s = 0;
00492 
00493   // head- and tail-indices for the queue
00494   octave_idx_type qt = 0, qh = 0;
00495   CMK_Node v, w;
00496   // dimension of the matrix
00497   octave_idx_type N = nr;
00498 
00499   OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx2, N + 1);
00500   OCTAVE_LOCAL_BUFFER (octave_idx_type, ridx2, cidx[N]);
00501   transpose (N, ridx, cidx, ridx2, cidx2);
00502 
00503   // the permutation vector
00504   NDArray P (dim_vector (1, N));
00505 
00506   // compute the node degrees
00507   OCTAVE_LOCAL_BUFFER (octave_idx_type, D, N);
00508   octave_idx_type max_deg = calc_degrees (N, ridx, cidx, D);
00509 
00510   // if none of the nodes has a degree > 0 (a matrix of zeros)
00511   // the return value corresponds to the identity permutation
00512   if (max_deg == 0)
00513     {
00514       for (octave_idx_type i = 0; i < N; i++)
00515         P(i) = i;
00516       return octave_value (P);
00517     }
00518 
00519   // a heap for the a node's neighbors. The number of neighbors is
00520   // limited by the maximum degree max_deg:
00521   OCTAVE_LOCAL_BUFFER (CMK_Node, S, max_deg);
00522 
00523   // a queue for the BFS. The array is always one element larger than
00524   // the number of entries that are stored.
00525   OCTAVE_LOCAL_BUFFER (CMK_Node, Q, N+1);
00526 
00527   // a counter (for building the permutation)
00528   octave_idx_type c = -1;
00529 
00530   // upper bound for the bandwidth (=quality of solution)
00531   // initialize the bandwidth of the graph with 0. B contains the
00532   // the maximum of the theoretical lower limits of the subgraphs
00533   // bandwidths.
00534   octave_idx_type B = 0;
00535 
00536   // mark all nodes as unvisited; with the exception of the nodes
00537   // that have degree==0 and build a CC of the graph.
00538 
00539   boolNDArray btmp (dim_vector (1, N), false);
00540   bool *visit = btmp.fortran_vec ();
00541 
00542   do
00543     {
00544       // locate an unvisited starting node of the graph
00545       octave_idx_type i;
00546       for (i = 0; i < N; i++)
00547         if (! visit[i])
00548           break;
00549 
00550       // locate a probably better starting node
00551       v.id = find_starting_node (N, ridx, cidx, ridx2, cidx2, D, i);
00552 
00553       // mark the node as visited and enqueue it (a starting node
00554       // for the BFS). Since the node will be a root of a spanning
00555       // tree, its dist is 0.
00556       v.deg = D[v.id];
00557       v.dist = 0;
00558       visit[v.id] = true;
00559       Q_enq (Q, N, qt, v);
00560 
00561       // lower bound for the bandwidth of a subgraph
00562       // keep a "level" in the spanning tree (= min. distance to the
00563       // root) for determining the bandwidth of the computed
00564       // permutation P
00565       octave_idx_type Bsub = 0;
00566       // min. dist. to the root is 0
00567       octave_idx_type level = 0;
00568       // the root is the first/only node on level 0
00569       octave_idx_type level_N = 1;
00570 
00571       while (! Q_empty (Q, N, qh, qt))
00572         {
00573           v = Q_deq (Q, N, qh);
00574           i = v.id;
00575 
00576           c++;
00577 
00578           // for computing the inverse permutation P where
00579           // A(inv(P),inv(P)) or P'*A*P is banded
00580           //         P(i) = c;
00581 
00582           // for computing permutation P where
00583           // A(P(i),P(j)) or P*A*P' is banded
00584           P(c) = i;
00585 
00586           // put all unvisited neighbors j of node i on the heap
00587           s = 0;
00588           octave_idx_type j1 = cidx[i];
00589           octave_idx_type j2 = cidx2[i];
00590 
00591           OCTAVE_QUIT;
00592           while (j1 < cidx[i+1] || j2 < cidx2[i+1])
00593             {
00594               OCTAVE_QUIT;
00595               if (j1 == cidx[i+1])
00596                 {
00597                   octave_idx_type r2 = ridx2[j2++];
00598                   if (! visit[r2])
00599                     {
00600                       // the distance of node j is dist(i)+1
00601                       w.id = r2;
00602                       w.deg = D[r2];
00603                       w.dist = v.dist+1;
00604                       H_insert(S, s, w);
00605                       visit[r2] = true;
00606                     }
00607                 }
00608               else if (j2 == cidx2[i+1])
00609                 {
00610                   octave_idx_type r1 = ridx[j1++];
00611                   if (! visit[r1])
00612                     {
00613                       w.id = r1;
00614                       w.deg = D[r1];
00615                       w.dist = v.dist+1;
00616                       H_insert(S, s, w);
00617                       visit[r1] = true;
00618                     }
00619                 }
00620               else
00621                 {
00622                   octave_idx_type r1 = ridx[j1];
00623                   octave_idx_type r2 = ridx2[j2];
00624                   if (r1 <= r2)
00625                     {
00626                       if (! visit[r1])
00627                         {
00628                           w.id = r1;
00629                           w.deg = D[r1];
00630                           w.dist = v.dist+1;
00631                           H_insert(S, s, w);
00632                           visit[r1] = true;
00633                         }
00634                       j1++;
00635                       if (r1 == r2)
00636                         j2++;
00637                     }
00638                   else
00639                     {
00640                       if (! visit[r2])
00641                         {
00642                           w.id = r2;
00643                           w.deg = D[r2];
00644                           w.dist = v.dist+1;
00645                           H_insert(S, s, w);
00646                           visit[r2] = true;
00647                         }
00648                       j2++;
00649                     }
00650                 }
00651             }
00652 
00653           // add the neighbors to the queue (sorted by node degree)
00654           while (! H_empty (S, s))
00655             {
00656               OCTAVE_QUIT;
00657 
00658               // locate a neighbor of i with minimal degree in O(log(N))
00659               v = H_remove_min(S, s, 1);
00660 
00661               // entered the BFS a new level?
00662               if (v.dist > level)
00663                 {
00664                   // adjustment of bandwith:
00665                   // "[...] the minimum bandwidth that
00666                   // can be obtained [...] is the
00667                   //  maximum number of nodes per level"
00668                   if (Bsub < level_N)
00669                     Bsub = level_N;
00670 
00671                   level = v.dist;
00672                   // v is the first node on the new level
00673                   level_N = 1;
00674                 }
00675               else
00676                 {
00677                   // there is no new level but another node on
00678                   // this level:
00679                   level_N++;
00680                 }
00681 
00682               // enqueue v in O(1)
00683               Q_enq (Q, N, qt, v);
00684             }
00685 
00686           // synchronize the bandwidth with level_N once again:
00687           if (Bsub < level_N)
00688             Bsub = level_N;
00689         }
00690       // finish of BFS. If there are still unvisited nodes in the graph
00691       // then it is split into CCs. The computed bandwidth is the maximum
00692       // of all subgraphs. Update:
00693       if (Bsub > B)
00694         B = Bsub;
00695     }
00696   // are there any nodes left?
00697   while (c+1 < N);
00698 
00699   // compute the reverse-ordering
00700   s = N / 2 - 1;
00701   for (octave_idx_type i = 0, j = N - 1; i <= s; i++, j--)
00702     {
00703       double tmp = P.elem(i);
00704       P.elem(i) = P.elem(j);
00705       P.elem(j) = tmp;
00706     }
00707 
00708   // increment all indices, since Octave is not C
00709   return octave_value (P+1);
00710 }
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines