GNU Octave  3.8.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
symrcm.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2007-2013 Michael Weitzel
4 
5 This file is part of Octave.
6 
7 Octave is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3 of the License, or (at your
10 option) any later version.
11 
12 Octave is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Octave; see the file COPYING. If not, see
19 <http://www.gnu.org/licenses/>.
20 
21 */
22 
23 /*
24 An implementation of the Reverse Cuthill-McKee algorithm (symrcm)
25 
26 The implementation of this algorithm is based in the descriptions found in
27 
28 @INPROCEEDINGS{,
29  author = {E. Cuthill and J. McKee},
30  title = {Reducing the Bandwidth of Sparse Symmetric Matrices},
31  booktitle = {Proceedings of the 24th ACM National Conference},
32  publisher = {Brandon Press},
33  pages = {157 -- 172},
34  location = {New Jersey},
35  year = {1969}
36 }
37 
38 @BOOK{,
39  author = {Alan George and Joseph W. H. Liu},
40  title = {Computer Solution of Large Sparse Positive Definite Systems},
41  publisher = {Prentice Hall Series in Computational Mathematics},
42  ISBN = {0-13-165274-5},
43  year = {1981}
44 }
45 
46 The algorithm represents a heuristic approach to the NP-complete minimum
47 bandwidth problem.
48 
49 Written by Michael Weitzel <michael.weitzel@@uni-siegen.de>
50  <weitzel@@ldknet.org>
51 */
52 
53 #ifdef HAVE_CONFIG_H
54 #include <config.h>
55 #endif
56 
57 #include "ov.h"
58 #include "defun-dld.h"
59 #include "error.h"
60 #include "gripes.h"
61 #include "utils.h"
62 #include "oct-locbuf.h"
63 
64 #include "ov-re-mat.h"
65 #include "ov-re-sparse.h"
66 #include "ov-cx-sparse.h"
67 #include "oct-sparse.h"
68 
69 // A node struct for the Cuthill-McKee algorithm
70 struct CMK_Node
71 {
72  // the node's id (matrix row index)
74  // the node's degree
76  // minimal distance to the root of the spanning tree
78 };
79 
80 // A simple queue.
81 // Queues Q have a fixed maximum size N (rows,cols of the matrix) and are
82 // stored in an array. qh and qt point to queue head and tail.
83 
84 // Enqueue operation (adds a node "o" at the tail)
85 
86 inline static void
88 {
89  Q[qt] = o;
90  qt = (qt + 1) % (N + 1);
91 }
92 
93 // Dequeue operation (removes a node from the head)
94 
95 inline static CMK_Node
97 {
98  CMK_Node r = Q[qh];
99  qh = (qh + 1) % (N + 1);
100  return r;
101 }
102 
103 // Predicate (queue empty)
104 #define Q_empty(Q, N, qh, qt) ((qh) == (qt))
105 
106 // A simple, array-based binary heap (used as a priority queue for nodes)
107 
108 // the left descendant of entry i
109 #define LEFT(i) (((i) << 1) + 1) // = (2*(i)+1)
110 // the right descendant of entry i
111 #define RIGHT(i) (((i) << 1) + 2) // = (2*(i)+2)
112 // the parent of entry i
113 #define PARENT(i) (((i) - 1) >> 1) // = floor(((i)-1)/2)
114 
115 // Builds a min-heap (the root contains the smallest element). A is an array
116 // with the graph's nodes, i is a starting position, size is the length of A.
117 
118 static void
120 {
121  octave_idx_type j = i;
122  for (;;)
123  {
124  octave_idx_type l = LEFT(j);
125  octave_idx_type r = RIGHT(j);
126 
127  octave_idx_type smallest;
128  if (l < size && A[l].deg < A[j].deg)
129  smallest = l;
130  else
131  smallest = j;
132 
133  if (r < size && A[r].deg < A[smallest].deg)
134  smallest = r;
135 
136  if (smallest != j)
137  {
138  std::swap (A[j], A[smallest]);
139  j = smallest;
140  }
141  else
142  break;
143  }
144 }
145 
146 // Heap operation insert. Running time is O(log(n))
147 
148 static void
150 {
151  octave_idx_type i = h++;
152 
153  H[i] = o;
154 
155  if (i == 0)
156  return;
157  do
158  {
159  octave_idx_type p = PARENT(i);
160  if (H[i].deg < H[p].deg)
161  {
162  std::swap (H[i], H[p]);
163 
164  i = p;
165  }
166  else
167  break;
168  }
169  while (i > 0);
170 }
171 
172 // Heap operation remove-min. Removes the smalles element in O(1) and
173 // reorganizes the heap optionally in O(log(n))
174 
175 inline static CMK_Node
176 H_remove_min (CMK_Node *H, octave_idx_type& h, int reorg/*=1*/)
177 {
178  CMK_Node r = H[0];
179  H[0] = H[--h];
180  if (reorg)
181  H_heapify_min (H, 0, h);
182  return r;
183 }
184 
185 // Predicate (heap empty)
186 #define H_empty(H, h) ((h) == 0)
187 
188 // Helper function for the Cuthill-McKee algorithm. Tries to determine a
189 // pseudo-peripheral node of the graph as starting node.
190 
191 static octave_idx_type
193  const octave_idx_type *cidx, const octave_idx_type *ridx2,
194  const octave_idx_type *cidx2, octave_idx_type *D,
195  octave_idx_type start)
196 {
197  CMK_Node w;
198 
200  boolNDArray btmp (dim_vector (1, N), false);
201  bool *visit = btmp.fortran_vec ();
202 
203  octave_idx_type qh = 0;
204  octave_idx_type qt = 0;
205  CMK_Node x;
206  x.id = start;
207  x.deg = D[start];
208  x.dist = 0;
209  Q_enq (Q, N, qt, x);
210  visit[start] = true;
211 
212  // distance level
213  octave_idx_type level = 0;
214  // current largest "eccentricity"
215  octave_idx_type max_dist = 0;
216 
217  for (;;)
218  {
219  while (! Q_empty (Q, N, qh, qt))
220  {
221  CMK_Node v = Q_deq (Q, N, qh);
222 
223  if (v.dist > x.dist || (v.id != x.id && v.deg > x.deg))
224  x = v;
225 
226  octave_idx_type i = v.id;
227 
228  // add all unvisited neighbors to the queue
229  octave_idx_type j1 = cidx[i];
230  octave_idx_type j2 = cidx2[i];
231  while (j1 < cidx[i+1] || j2 < cidx2[i+1])
232  {
233  OCTAVE_QUIT;
234 
235  if (j1 == cidx[i+1])
236  {
237  octave_idx_type r2 = ridx2[j2++];
238  if (! visit[r2])
239  {
240  // the distance of node j is dist(i)+1
241  w.id = r2;
242  w.deg = D[r2];
243  w.dist = v.dist+1;
244  Q_enq (Q, N, qt, w);
245  visit[r2] = true;
246 
247  if (w.dist > level)
248  level = w.dist;
249  }
250  }
251  else if (j2 == cidx2[i+1])
252  {
253  octave_idx_type r1 = ridx[j1++];
254  if (! visit[r1])
255  {
256  // the distance of node j is dist(i)+1
257  w.id = r1;
258  w.deg = D[r1];
259  w.dist = v.dist+1;
260  Q_enq (Q, N, qt, w);
261  visit[r1] = true;
262 
263  if (w.dist > level)
264  level = w.dist;
265  }
266  }
267  else
268  {
269  octave_idx_type r1 = ridx[j1];
270  octave_idx_type r2 = ridx2[j2];
271  if (r1 <= r2)
272  {
273  if (! visit[r1])
274  {
275  w.id = r1;
276  w.deg = D[r1];
277  w.dist = v.dist+1;
278  Q_enq (Q, N, qt, w);
279  visit[r1] = true;
280 
281  if (w.dist > level)
282  level = w.dist;
283  }
284  j1++;
285  if (r1 == r2)
286  j2++;
287  }
288  else
289  {
290  if (! visit[r2])
291  {
292  w.id = r2;
293  w.deg = D[r2];
294  w.dist = v.dist+1;
295  Q_enq (Q, N, qt, w);
296  visit[r2] = true;
297 
298  if (w.dist > level)
299  level = w.dist;
300  }
301  j2++;
302  }
303  }
304  }
305  } // finish of BFS
306 
307  if (max_dist < x.dist)
308  {
309  max_dist = x.dist;
310 
311  for (octave_idx_type i = 0; i < N; i++)
312  visit[i] = false;
313 
314  visit[x.id] = true;
315  x.dist = 0;
316  qt = qh = 0;
317  Q_enq (Q, N, qt, x);
318  }
319  else
320  break;
321  }
322  return x.id;
323 }
324 
325 // Calculates the node's degrees. This means counting the non-zero elements
326 // in the symmetric matrix' rows. This works for non-symmetric matrices
327 // as well.
328 
329 static octave_idx_type
331  const octave_idx_type *cidx, octave_idx_type *D)
332 {
333  octave_idx_type max_deg = 0;
334 
335  for (octave_idx_type i = 0; i < N; i++)
336  D[i] = 0;
337 
338  for (octave_idx_type j = 0; j < N; j++)
339  {
340  for (octave_idx_type i = cidx[j]; i < cidx[j+1]; i++)
341  {
342  OCTAVE_QUIT;
343  octave_idx_type k = ridx[i];
344  // there is a non-zero element (k,j)
345  D[k]++;
346  if (D[k] > max_deg)
347  max_deg = D[k];
348  // if there is no element (j,k) there is one in
349  // the symmetric matrix:
350  if (k != j)
351  {
352  bool found = false;
353  for (octave_idx_type l = cidx[k]; l < cidx[k + 1]; l++)
354  {
355  OCTAVE_QUIT;
356 
357  if (ridx[l] == j)
358  {
359  found = true;
360  break;
361  }
362  else if (ridx[l] > j)
363  break;
364  }
365 
366  if (! found)
367  {
368  // A(j,k) == 0
369  D[j]++;
370  if (D[j] > max_deg)
371  max_deg = D[j];
372  }
373  }
374  }
375  }
376  return max_deg;
377 }
378 
379 // Transpose of the structure of a square sparse matrix
380 
381 static void
383  const octave_idx_type *cidx, octave_idx_type *ridx2,
384  octave_idx_type *cidx2)
385 {
386  octave_idx_type nz = cidx[N];
387 
389  for (octave_idx_type i = 0; i < N; i++)
390  w[i] = 0;
391  for (octave_idx_type i = 0; i < nz; i++)
392  w[ridx[i]]++;
393  nz = 0;
394  for (octave_idx_type i = 0; i < N; i++)
395  {
396  OCTAVE_QUIT;
397  cidx2[i] = nz;
398  nz += w[i];
399  w[i] = cidx2[i];
400  }
401  cidx2[N] = nz;
402  w[N] = nz;
403 
404  for (octave_idx_type j = 0; j < N; j++)
405  for (octave_idx_type k = cidx[j]; k < cidx[j + 1]; k++)
406  {
407  OCTAVE_QUIT;
408  octave_idx_type q = w[ridx[k]]++;
409  ridx2[q] = j;
410  }
411 }
412 
413 // An implementation of the Cuthill-McKee algorithm.
414 DEFUN_DLD (symrcm, args, ,
415  "-*- texinfo -*-\n\
416 @deftypefn {Loadable Function} {@var{p} =} symrcm (@var{S})\n\
417 Return the symmetric reverse Cuthill-McKee permutation of @var{S}.\n\
418 @var{p} is a permutation vector such that\n\
419 @code{@var{S}(@var{p}, @var{p})} tends to have its diagonal elements\n\
420 closer to the diagonal than @var{S}. This is a good preordering for LU\n\
421 or Cholesky@tie{}factorization of matrices that come from ``long, skinny''\n\
422 problems. It works for both symmetric and asymmetric @var{S}.\n\
423 \n\
424 The algorithm represents a heuristic approach to the NP-complete\n\
425 bandwidth minimization problem. The implementation is based in the\n\
426 descriptions found in\n\
427 \n\
428 E. Cuthill, J. McKee. @cite{Reducing the Bandwidth of Sparse Symmetric\n\
429 Matrices}. Proceedings of the 24th ACM National Conference, 157--172\n\
430 1969, Brandon Press, New Jersey.\n\
431 \n\
432 A. George, J.W.H. Liu. @cite{Computer Solution of Large Sparse\n\
433 Positive Definite Systems}, Prentice Hall Series in Computational\n\
434 Mathematics, ISBN 0-13-165274-5, 1981.\n\
435 \n\
436 @seealso{colperm, colamd, symamd}\n\
437 @end deftypefn")
438 {
439  octave_value retval;
440  int nargin = args.length ();
441 
442  if (nargin != 1)
443  {
444  print_usage ();
445  return retval;
446  }
447 
448  octave_value arg = args(0);
449 
450  // the parameter of the matrix is converted into a sparse matrix
451  //(if necessary)
452  octave_idx_type *cidx;
453  octave_idx_type *ridx;
454  SparseMatrix Ar;
456 
457  if (arg.is_real_type ())
458  {
459  Ar = arg.sparse_matrix_value ();
460  // Note cidx/ridx are const, so use xridx and xcidx...
461  cidx = Ar.xcidx ();
462  ridx = Ar.xridx ();
463  }
464  else
465  {
466  Ac = arg.sparse_complex_matrix_value ();
467  cidx = Ac.xcidx ();
468  ridx = Ac.xridx ();
469  }
470 
471  if (error_state)
472  return retval;
473 
474  octave_idx_type nr = arg.rows ();
475  octave_idx_type nc = arg.columns ();
476 
477  if (nr != nc)
478  {
479  gripe_square_matrix_required ("symrcm");
480  return retval;
481  }
482 
483  if (nr == 0 && nc == 0)
484  return octave_value (NDArray (dim_vector (1, 0)));
485 
486  // sizes of the heaps
487  octave_idx_type s = 0;
488 
489  // head- and tail-indices for the queue
490  octave_idx_type qt = 0, qh = 0;
491  CMK_Node v, w;
492  // dimension of the matrix
493  octave_idx_type N = nr;
494 
495  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx2, N + 1);
496  OCTAVE_LOCAL_BUFFER (octave_idx_type, ridx2, cidx[N]);
497  transpose (N, ridx, cidx, ridx2, cidx2);
498 
499  // the permutation vector
500  NDArray P (dim_vector (1, N));
501 
502  // compute the node degrees
504  octave_idx_type max_deg = calc_degrees (N, ridx, cidx, D);
505 
506  // if none of the nodes has a degree > 0 (a matrix of zeros)
507  // the return value corresponds to the identity permutation
508  if (max_deg == 0)
509  {
510  for (octave_idx_type i = 0; i < N; i++)
511  P(i) = i;
512  return octave_value (P);
513  }
514 
515  // a heap for the a node's neighbors. The number of neighbors is
516  // limited by the maximum degree max_deg:
517  OCTAVE_LOCAL_BUFFER (CMK_Node, S, max_deg);
518 
519  // a queue for the BFS. The array is always one element larger than
520  // the number of entries that are stored.
522 
523  // a counter (for building the permutation)
524  octave_idx_type c = -1;
525 
526  // upper bound for the bandwidth (=quality of solution)
527  // initialize the bandwidth of the graph with 0. B contains the
528  // the maximum of the theoretical lower limits of the subgraphs
529  // bandwidths.
530  octave_idx_type B = 0;
531 
532  // mark all nodes as unvisited; with the exception of the nodes
533  // that have degree==0 and build a CC of the graph.
534 
535  boolNDArray btmp (dim_vector (1, N), false);
536  bool *visit = btmp.fortran_vec ();
537 
538  do
539  {
540  // locate an unvisited starting node of the graph
541  octave_idx_type i;
542  for (i = 0; i < N; i++)
543  if (! visit[i])
544  break;
545 
546  // locate a probably better starting node
547  v.id = find_starting_node (N, ridx, cidx, ridx2, cidx2, D, i);
548 
549  // mark the node as visited and enqueue it (a starting node
550  // for the BFS). Since the node will be a root of a spanning
551  // tree, its dist is 0.
552  v.deg = D[v.id];
553  v.dist = 0;
554  visit[v.id] = true;
555  Q_enq (Q, N, qt, v);
556 
557  // lower bound for the bandwidth of a subgraph
558  // keep a "level" in the spanning tree (= min. distance to the
559  // root) for determining the bandwidth of the computed
560  // permutation P
561  octave_idx_type Bsub = 0;
562  // min. dist. to the root is 0
563  octave_idx_type level = 0;
564  // the root is the first/only node on level 0
565  octave_idx_type level_N = 1;
566 
567  while (! Q_empty (Q, N, qh, qt))
568  {
569  v = Q_deq (Q, N, qh);
570  i = v.id;
571 
572  c++;
573 
574  // for computing the inverse permutation P where
575  // A(inv(P),inv(P)) or P'*A*P is banded
576  // P(i) = c;
577 
578  // for computing permutation P where
579  // A(P(i),P(j)) or P*A*P' is banded
580  P(c) = i;
581 
582  // put all unvisited neighbors j of node i on the heap
583  s = 0;
584  octave_idx_type j1 = cidx[i];
585  octave_idx_type j2 = cidx2[i];
586 
587  OCTAVE_QUIT;
588  while (j1 < cidx[i+1] || j2 < cidx2[i+1])
589  {
590  OCTAVE_QUIT;
591  if (j1 == cidx[i+1])
592  {
593  octave_idx_type r2 = ridx2[j2++];
594  if (! visit[r2])
595  {
596  // the distance of node j is dist(i)+1
597  w.id = r2;
598  w.deg = D[r2];
599  w.dist = v.dist+1;
600  H_insert (S, s, w);
601  visit[r2] = true;
602  }
603  }
604  else if (j2 == cidx2[i+1])
605  {
606  octave_idx_type r1 = ridx[j1++];
607  if (! visit[r1])
608  {
609  w.id = r1;
610  w.deg = D[r1];
611  w.dist = v.dist+1;
612  H_insert (S, s, w);
613  visit[r1] = true;
614  }
615  }
616  else
617  {
618  octave_idx_type r1 = ridx[j1];
619  octave_idx_type r2 = ridx2[j2];
620  if (r1 <= r2)
621  {
622  if (! visit[r1])
623  {
624  w.id = r1;
625  w.deg = D[r1];
626  w.dist = v.dist+1;
627  H_insert (S, s, w);
628  visit[r1] = true;
629  }
630  j1++;
631  if (r1 == r2)
632  j2++;
633  }
634  else
635  {
636  if (! visit[r2])
637  {
638  w.id = r2;
639  w.deg = D[r2];
640  w.dist = v.dist+1;
641  H_insert (S, s, w);
642  visit[r2] = true;
643  }
644  j2++;
645  }
646  }
647  }
648 
649  // add the neighbors to the queue (sorted by node degree)
650  while (! H_empty (S, s))
651  {
652  OCTAVE_QUIT;
653 
654  // locate a neighbor of i with minimal degree in O(log(N))
655  v = H_remove_min (S, s, 1);
656 
657  // entered the BFS a new level?
658  if (v.dist > level)
659  {
660  // adjustment of bandwith:
661  // "[...] the minimum bandwidth that
662  // can be obtained [...] is the
663  // maximum number of nodes per level"
664  if (Bsub < level_N)
665  Bsub = level_N;
666 
667  level = v.dist;
668  // v is the first node on the new level
669  level_N = 1;
670  }
671  else
672  {
673  // there is no new level but another node on
674  // this level:
675  level_N++;
676  }
677 
678  // enqueue v in O(1)
679  Q_enq (Q, N, qt, v);
680  }
681 
682  // synchronize the bandwidth with level_N once again:
683  if (Bsub < level_N)
684  Bsub = level_N;
685  }
686  // finish of BFS. If there are still unvisited nodes in the graph
687  // then it is split into CCs. The computed bandwidth is the maximum
688  // of all subgraphs. Update:
689  if (Bsub > B)
690  B = Bsub;
691  }
692  // are there any nodes left?
693  while (c+1 < N);
694 
695  // compute the reverse-ordering
696  s = N / 2 - 1;
697  for (octave_idx_type i = 0, j = N - 1; i <= s; i++, j--)
698  std::swap (P.elem (i), P.elem (j));
699 
700  // increment all indices, since Octave is not C
701  return octave_value (P+1);
702 }