GNU Octave  4.4.1
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
__ilu__.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2014-2018 Eduardo Ramos Fernández <eduradical951@gmail.com>
4 Copyright (C) 2013-2018 Kai T. Ohlhus <k.ohlhus@gmail.com>
5 
6 This file is part of Octave.
7 
8 Octave is free software: you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12 
13 Octave is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with Octave; see the file COPYING. If not, see
20 <https://www.gnu.org/licenses/>.
21 
22 */
23 
24 #if defined (HAVE_CONFIG_H)
25 # include "config.h"
26 #endif
27 
28 #include "oct-locbuf.h"
29 #include "oct-norm.h"
30 
31 #include "defun.h"
32 #include "error.h"
33 
34 #include "builtin-defun-decls.h"
35 
36 // This function implements the IKJ and JKI variants of Gaussian elimination to
37 // perform the ILU0 decomposition. The behavior is controlled by milu
38 // parameter. If milu = ['off'|'col'] the JKI version is performed taking
39 // advantage of CCS format of the input matrix. If milu = 'row' the input
40 // matrix has to be transposed to obtain the equivalent CRS structure so we can
41 // work efficiently with rows. In this case IKJ version is used.
42 template <typename octave_matrix_t, typename T>
43 void ilu_0 (octave_matrix_t& sm, const std::string milu = "off")
44 {
45  const octave_idx_type n = sm.cols ();
46  octave_idx_type j1, j2, jrow, jw, i, j, k, jj;
47  T r;
48  T tl = 0;
49 
50  enum {OFF, ROW, COL};
51  char opt;
52  if (milu == "row")
53  {
54  opt = ROW;
55  sm = sm.transpose ();
56  }
57  else if (milu == "col")
58  opt = COL;
59  else
60  opt = OFF;
61 
62  // Input matrix pointers
63  octave_idx_type *cidx = sm.cidx ();
64  octave_idx_type *ridx = sm.ridx ();
65  T *data = sm.data ();
66 
67  // Working arrays
70 
71  // Initialize working arrays
72  for (i = 0; i < n; i++)
73  iw[i] = -1;
74 
75  // Loop over all columns
76  for (k = 0; k < n; k++)
77  {
78  j1 = cidx[k];
79  j2 = cidx[k+1];
80 
81  if (j1 == j2)
82  error ("ilu: A has a zero on the diagonal");
83 
84  for (j = j1; j < j2; j++)
85  iw[ridx[j]] = j;
86 
87  r = 0;
88  j = j1;
89  jrow = ridx[j1];
90  while ((jrow < k) && (j < j2))
91  {
92  if (opt == ROW)
93  {
94  tl = data[j] / data[uptr[jrow]];
95  data[j] = tl;
96  }
97  for (jj = uptr[jrow] + 1; jj < cidx[jrow+1]; jj++)
98  {
99  jw = iw[ridx[jj]];
100  if (jw != -1)
101  if (opt == ROW)
102  data[jw] -= tl * data[jj];
103  else
104  data[jw] -= data[j] * data[jj];
105 
106  else
107  // That is for the milu='row'
108  if (opt == ROW)
109  r += tl * data[jj];
110  else if (opt == COL)
111  r += data[j] * data[jj];
112  }
113  j++;
114  jrow = ridx[j];
115  }
116  uptr[k] = j;
117  if (opt != OFF)
118  data[uptr[k]] -= r;
119 
120  if (opt != ROW)
121  for (jj = uptr[k] + 1; jj < cidx[k+1]; jj++)
122  data[jj] /= data[uptr[k]];
123 
124  if (k != jrow)
125  error ("ilu: A has a zero on the diagonal");
126 
127  if (data[j] == T(0))
128  error ("ilu: encountered a pivot equal to 0");
129 
130  for (i = j1; i < j2; i++)
131  iw[ridx[i]] = -1;
132  }
133 
134  if (opt == ROW)
135  sm = sm.transpose ();
136 }
137 
138 DEFUN (__ilu0__, args, ,
139  doc: /* -*- texinfo -*-
140 @deftypefn {} {[@var{L}, @var{U}] =} __ilu0__ (@var{A}, @var{milu})
141 @deftypefnx {} {[@var{L}, @var{U}, @var{P}] =} __ilu0__ (@var{A}, @var{milu})
142 Undocumented internal function.
143 @end deftypefn */)
144 {
145  if (args.length () != 2)
146  print_usage ();
147 
149 
150  std::string milu = args(1).string_value ();
151 
152  // In ILU0 algorithm the zero-pattern of the input matrix is preserved so
153  // its structure does not change during the algorithm. The same input
154  // matrix is used to build the output matrix due to that fact.
155  octave_value_list arg_list;
156  if (! args(0).iscomplex ())
157  {
158  SparseMatrix sm = args(0).sparse_matrix_value ();
159  SparseMatrix speye (DiagMatrix (sm.cols (), sm.cols (), 1.0));
160 
161  ilu_0 <SparseMatrix, double> (sm, milu);
162 
163  retval(0) = speye + Ftril (ovl (sm, -1))(0).sparse_matrix_value ();
164  retval(1) = Ftriu (ovl (sm))(0).sparse_matrix_value ();
165  }
166  else
167  {
168  SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
169  SparseMatrix speye (DiagMatrix (sm.cols (), sm.cols (), 1.0));
170 
171  ilu_0 <SparseComplexMatrix, Complex> (sm, milu);
172 
173  retval(0) = speye +
174  Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ();
175  retval(1) = Ftriu (ovl (sm))(0).sparse_complex_matrix_value ();
176  }
177 
178  return retval;
179 }
180 
181 template <typename octave_matrix_t, typename T>
182 void ilu_crout (octave_matrix_t& sm_l, octave_matrix_t& sm_u,
183  octave_matrix_t& L, octave_matrix_t& U, T *cols_norm,
184  T *rows_norm, const T droptol = 0,
185  const std::string milu = "off")
186 {
187  // Map the strings into chars for faster comparing inside loops
188  char opt;
189  enum {OFF, ROW, COL};
190  if (milu == "row")
191  opt = ROW;
192  else if (milu == "col")
193  opt = COL;
194  else
195  opt = OFF;
196 
197  octave_idx_type jrow, i, j, k, jj, total_len_l, total_len_u, max_len_u,
198  max_len_l, w_len_u, w_len_l, cols_list_len, rows_list_len;
199 
200  const octave_idx_type n = sm_u.cols ();
201  sm_u = sm_u.transpose ();
202 
203  max_len_u = sm_u.nnz ();
204  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
205  max_len_l = sm_l.nnz ();
206  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
207 
208  // Extract pointers to the arrays for faster access inside loops
209  octave_idx_type *cidx_in_u = sm_u.cidx ();
210  octave_idx_type *ridx_in_u = sm_u.ridx ();
211  T *data_in_u = sm_u.data ();
212  octave_idx_type *cidx_in_l = sm_l.cidx ();
213  octave_idx_type *ridx_in_l = sm_l.ridx ();
214  T *data_in_l = sm_l.data ();
215 
216  // L output arrays
217  Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
218  octave_idx_type *ridx_l = ridx_out_l.fortran_vec ();
219  Array <T> data_out_l (dim_vector (max_len_l, 1));
220  T *data_l = data_out_l.fortran_vec ();
221 
222  // U output arrays
223  Array <octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
224  octave_idx_type *ridx_u = ridx_out_u.fortran_vec ();
225  Array <T> data_out_u (dim_vector (max_len_u, 1));
226  T *data_u = data_out_u.fortran_vec ();
227 
228  // Working arrays
229  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_l, n + 1);
230  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_u, n + 1);
231  OCTAVE_LOCAL_BUFFER (octave_idx_type, cols_list, n);
232  OCTAVE_LOCAL_BUFFER (octave_idx_type, rows_list, n);
233  OCTAVE_LOCAL_BUFFER (T, w_data_l, n);
234  OCTAVE_LOCAL_BUFFER (T, w_data_u, n);
237  OCTAVE_LOCAL_BUFFER (T, cr_sum, n);
238 
239  T zero = T (0);
240 
241  // Initialize working arrays
242  cidx_u[0] = cidx_in_u[0];
243  cidx_l[0] = cidx_in_l[0];
244  for (i = 0; i < n; i++)
245  {
246  w_data_u[i] = zero;
247  w_data_l[i] = zero;
248  cr_sum[i] = zero;
249  }
250 
251  total_len_u = 0;
252  total_len_l = 0;
253  cols_list_len = 0;
254  rows_list_len = 0;
255 
256  // Loop over all columns
257  for (k = 0; k < n; k++)
258  {
259  // Load the working column and working row
260  for (i = cidx_in_l[k]; i < cidx_in_l[k+1]; i++)
261  w_data_l[ridx_in_l[i]] = data_in_l[i];
262 
263  for (i = cidx_in_u[k]; i < cidx_in_u[k+1]; i++)
264  w_data_u[ridx_in_u[i]] = data_in_u[i];
265 
266  // Update U working row
267  for (j = 0; j < rows_list_len; j++)
268  {
269  if ((Ufirst[rows_list[j]] != -1))
270  for (jj = Ufirst[rows_list[j]]; jj < cidx_u[rows_list[j]+1]; jj++)
271  {
272  jrow = ridx_u[jj];
273  w_data_u[jrow] -= data_u[jj] * data_l[Lfirst[rows_list[j]]];
274  }
275  }
276  // Update L working column
277  for (j = 0; j < cols_list_len; j++)
278  {
279  if (Lfirst[cols_list[j]] != -1)
280  for (jj = Lfirst[cols_list[j]]; jj < cidx_l[cols_list[j]+1]; jj++)
281  {
282  jrow = ridx_l[jj];
283  w_data_l[jrow] -= data_l[jj] * data_u[Ufirst[cols_list[j]]];
284  }
285  }
286 
287  if ((max_len_u - total_len_u) < n)
288  {
289  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
290  data_out_u.resize (dim_vector (max_len_u, 1));
291  data_u = data_out_u.fortran_vec ();
292  ridx_out_u.resize (dim_vector (max_len_u, 1));
293  ridx_u = ridx_out_u.fortran_vec ();
294  }
295 
296  if ((max_len_l - total_len_l) < n)
297  {
298  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
299  data_out_l.resize (dim_vector (max_len_l, 1));
300  data_l = data_out_l.fortran_vec ();
301  ridx_out_l.resize (dim_vector (max_len_l, 1));
302  ridx_l = ridx_out_l.fortran_vec ();
303  }
304 
305  // Expand the working row into the U output data structures
306  w_len_l = 0;
307  data_u[total_len_u] = w_data_u[k];
308  ridx_u[total_len_u] = k;
309  w_len_u = 1;
310  for (i = k + 1; i < n; i++)
311  {
312  if (w_data_u[i] != zero)
313  {
314  if (std::abs (w_data_u[i]) < (droptol * rows_norm[k]))
315  {
316  if (opt == ROW)
317  cr_sum[k] += w_data_u[i];
318  else if (opt == COL)
319  cr_sum[i] += w_data_u[i];
320  }
321  else
322  {
323  data_u[total_len_u + w_len_u] = w_data_u[i];
324  ridx_u[total_len_u + w_len_u] = i;
325  w_len_u++;
326  }
327  }
328 
329  // Expand the working column into the L output data structures
330  if (w_data_l[i] != zero)
331  {
332  if (std::abs (w_data_l[i]) < (droptol * cols_norm[k]))
333  {
334  if (opt == COL)
335  cr_sum[k] += w_data_l[i];
336  else if (opt == ROW)
337  cr_sum[i] += w_data_l[i];
338  }
339  else
340  {
341  data_l[total_len_l + w_len_l] = w_data_l[i];
342  ridx_l[total_len_l + w_len_l] = i;
343  w_len_l++;
344  }
345  }
346  w_data_u[i] = zero;
347  w_data_l[i] = zero;
348  }
349 
350  // Compensate row and column sums --> milu option
351  if (opt == COL || opt == ROW)
352  data_u[total_len_u] += cr_sum[k];
353 
354  // Check if the pivot is zero
355  if (data_u[total_len_u] == zero)
356  error ("ilu: encountered a pivot equal to 0");
357 
358  // Scale the elements in L by the pivot
359  for (i = total_len_l ; i < (total_len_l + w_len_l); i++)
360  data_l[i] /= data_u[total_len_u];
361 
362  total_len_u += w_len_u;
363  total_len_l += w_len_l;
364  // Check if there are too many elements to be indexed with
365  // octave_idx_type type due to fill-in during the process.
366  if (total_len_l < 0 || total_len_u < 0)
367  error ("ilu: integer overflow. Too many fill-in elements in L or U");
368 
369  cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
370  cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
371 
372  // The tricky part of the algorithm. The arrays pointing to the first
373  // working element of each column in the next iteration (Lfirst) or
374  // the first working element of each row (Ufirst) are updated. Also the
375  // arrays working as lists cols_list and rows_list are filled with
376  // indices pointing to Ufirst and Lfirst respectively.
377  // FIXME: Maybe the -1 indicating in Ufirst and Lfirst, that no elements
378  // have to be considered in a certain column or row in next iteration,
379  // can be removed. It feels safer to me using such an indicator.
380  if (k < (n - 1))
381  {
382  if (w_len_u > 0)
383  Ufirst[k] = cidx_u[k];
384  else
385  Ufirst[k] = -1;
386  if (w_len_l > 0)
387  Lfirst[k] = cidx_l[k];
388  else
389  Lfirst[k] = -1;
390  cols_list_len = 0;
391  rows_list_len = 0;
392  for (i = 0; i <= k; i++)
393  {
394  if (Ufirst[i] != -1)
395  {
396  jj = ridx_u[Ufirst[i]];
397  if (jj < (k + 1))
398  {
399  if (Ufirst[i] < (cidx_u[i+1]))
400  {
401  Ufirst[i]++;
402  if (Ufirst[i] == cidx_u[i+1])
403  Ufirst[i] = -1;
404  else
405  jj = ridx_u[Ufirst[i]];
406  }
407  }
408  if (jj == (k + 1))
409  {
410  cols_list[cols_list_len] = i;
411  cols_list_len++;
412  }
413  }
414 
415  if (Lfirst[i] != -1)
416  {
417  jj = ridx_l[Lfirst[i]];
418  if (jj < (k + 1))
419  if (Lfirst[i] < (cidx_l[i+1]))
420  {
421  Lfirst[i]++;
422  if (Lfirst[i] == cidx_l[i+1])
423  Lfirst[i] = -1;
424  else
425  jj = ridx_l[Lfirst[i]];
426  }
427  if (jj == (k + 1))
428  {
429  rows_list[rows_list_len] = i;
430  rows_list_len++;
431  }
432  }
433  }
434  }
435  }
436 
437  // Build the output matrices
438  L = octave_matrix_t (n, n, total_len_l);
439  U = octave_matrix_t (n, n, total_len_u);
440 
441  // FIXME: Can these loops be replaced by std::copy?
442  for (i = 0; i <= n; i++)
443  L.cidx (i) = cidx_l[i];
444 
445  for (i = 0; i < total_len_l; i++)
446  {
447  L.ridx (i) = ridx_l[i];
448  L.data (i) = data_l[i];
449  }
450 
451  for (i = 0; i <= n; i++)
452  U.cidx (i) = cidx_u[i];
453 
454  for (i = 0; i < total_len_u; i++)
455  {
456  U.ridx (i) = ridx_u[i];
457  U.data (i) = data_u[i];
458  }
459 
460  U = U.transpose ();
461 }
462 
463 DEFUN (__iluc__, args, ,
464  doc: /* -*- texinfo -*-
465 @deftypefn {} {[@var{L}, @var{U}] =} __iluc__ (@var{A}, @var{droptol}, @var{milu})
466 Undocumented internal function.
467 @end deftypefn */)
468 {
469  if (args.length () != 3)
470  print_usage ();
471 
472  double droptol = args(1).double_value ();
473  std::string milu = args(2).string_value ();
474 
475  if (! args(0).iscomplex ())
476  {
477  SparseMatrix sm = args(0).sparse_matrix_value ();
478  SparseMatrix sm_u = Ftriu (ovl (sm))(0).sparse_matrix_value ();
479  SparseMatrix sm_l = Ftril (ovl (sm, -1))(0).sparse_matrix_value ();
480  SparseMatrix U, L;
481 
482  ilu_crout <SparseMatrix, double> (sm_l, sm_u, L, U,
483  xcolnorms (sm).fortran_vec (),
484  xrownorms (sm).fortran_vec (),
485  droptol, milu);
486 
487  SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
488 
489  return ovl (L + speye, U);
490  }
491  else
492  {
493  SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
494  SparseComplexMatrix sm_u = Ftriu (ovl (sm))(0).sparse_complex_matrix_value ();
495  SparseComplexMatrix sm_l = Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ();
496  SparseComplexMatrix U, L;
497  Array<Complex> cols_norm = xcolnorms (sm);
498  Array<Complex> rows_norm = xrownorms (sm);
499 
500  ilu_crout <SparseComplexMatrix, Complex> (sm_l, sm_u, L, U,
501  cols_norm.fortran_vec (),
502  rows_norm.fortran_vec (),
503  Complex (droptol), milu);
504 
505  SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
506 
507  return ovl (L + speye, U);
508  }
509 }
510 
511 // This function implements the IKJ and JKI variants of gaussian elimination
512 // to perform the ILUTP decomposition. The behavior is controlled by milu
513 // parameter. If milu = ['off'|'col'] the JKI version is performed taking
514 // advantage of CCS format of the input matrix. Row pivoting is performed.
515 // If milu = 'row' the input matrix has to be transposed to obtain the
516 // equivalent CRS structure so we can work efficiently with rows. In that
517 // case IKJ version is used and column pivoting is performed.
518 
519 template <typename octave_matrix_t, typename T>
520 void ilu_tp (octave_matrix_t& sm, octave_matrix_t& L, octave_matrix_t& U,
521  octave_idx_type nnz_u, octave_idx_type nnz_l, T *cols_norm,
522  Array <octave_idx_type>& perm_vec, const T droptol = T(0),
523  const T thresh = T(0), const std::string milu = "off",
524  const double udiag = 0)
525 {
526  char opt;
527  enum {OFF, ROW, COL};
528  if (milu == "row")
529  opt = ROW;
530  else if (milu == "col")
531  opt = COL;
532  else
533  opt = OFF;
534 
535  const octave_idx_type n = sm.cols ();
536 
537  // This is necessary for the JKI (milu = "row") variant.
538  if (opt == ROW)
539  sm = sm.transpose ();
540 
541  // Extract pointers to the arrays for faster access inside loops
542  octave_idx_type *cidx_in = sm.cidx ();
543  octave_idx_type *ridx_in = sm.ridx ();
544  T *data_in = sm.data ();
545  octave_idx_type jrow, i, j, k, jj, c, total_len_l, total_len_u, p_perm,
546  max_ind, max_len_l, max_len_u;
547  T zero = T(0);
548 
549  T tl = zero, aux, maximum;
550 
551  max_len_u = nnz_u;
552  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
553  max_len_l = nnz_l;
554  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
555 
556  // Extract pointers to the arrays for faster access inside loops
557  Array <octave_idx_type> cidx_out_l (dim_vector (n + 1, 1));
558  octave_idx_type *cidx_l = cidx_out_l.fortran_vec ();
559  Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
560  octave_idx_type *ridx_l = ridx_out_l.fortran_vec ();
561  Array <T> data_out_l (dim_vector (max_len_l, 1));
562  T *data_l = data_out_l.fortran_vec ();
563 
564  // Data for U
565  Array <octave_idx_type> cidx_out_u (dim_vector (n + 1, 1));
566  octave_idx_type *cidx_u = cidx_out_u.fortran_vec ();
567  Array <octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
568  octave_idx_type *ridx_u = ridx_out_u.fortran_vec ();
569  Array <T> data_out_u (dim_vector (max_len_u, 1));
570  T *data_u = data_out_u.fortran_vec ();
571 
572  // Working arrays and permutation arrays
573  octave_idx_type w_len_u, w_len_l;
574  T total_sum, partial_col_sum = zero, partial_row_sum = zero;
575  std::set <octave_idx_type> iw_l;
576  std::set <octave_idx_type> iw_u;
577  std::set <octave_idx_type>::iterator it, it2;
578  OCTAVE_LOCAL_BUFFER (T, w_data, n);
580  octave_idx_type *perm = perm_vec.fortran_vec ();
582 
583  // Initialize working and permutation arrays
584  cidx_l[0] = cidx_in[0];
585  cidx_u[0] = cidx_in[0];
586  for (i = 0; i < n; i++)
587  {
588  w_data[i] = 0;
589  perm[i] = i;
590  iperm[i] = i;
591  }
592  total_len_u = 0;
593  total_len_l = 0;
594 
595  // Loop over all columns
596  for (k = 0; k < n; k++)
597  {
598  for (j = cidx_in[k]; j < cidx_in[k+1]; j++)
599  {
600  p_perm = iperm[ridx_in[j]];
601  w_data[iperm[ridx_in[j]]] = data_in[j];
602  if (p_perm > k)
603  iw_l.insert (ridx_in[j]);
604  else
605  iw_u.insert (p_perm);
606  }
607 
608  it = iw_u.begin ();
609  jrow = *it;
610  total_sum = zero;
611  while ((jrow < k) && (it != iw_u.end ()))
612  {
613  if (opt == COL)
614  partial_col_sum = w_data[jrow];
615  if (w_data[jrow] != zero)
616  {
617  if (opt == ROW)
618  {
619  partial_row_sum = w_data[jrow];
620  tl = w_data[jrow] / data_u[uptr[jrow]];
621  }
622  for (jj = cidx_l[jrow]; jj < cidx_l[jrow+1]; jj++)
623  {
624  p_perm = iperm[ridx_l[jj]];
625  aux = w_data[p_perm];
626  if (opt == ROW)
627  {
628  w_data[p_perm] -= tl * data_l[jj];
629  partial_row_sum += tl * data_l[jj];
630  }
631  else
632  {
633  tl = data_l[jj] * w_data[jrow];
634  w_data[p_perm] -= tl;
635  if (opt == COL)
636  partial_col_sum += tl;
637  }
638 
639  if ((aux == zero) && (w_data[p_perm] != zero))
640  {
641  if (p_perm > k)
642  iw_l.insert (ridx_l[jj]);
643  else
644  iw_u.insert (p_perm);
645  }
646  }
647 
648  // Drop element from the U part in IKJ
649  // and L part in JKI variant (milu = [col|off])
650  if ((std::abs (w_data[jrow]) < (droptol * cols_norm[k]))
651  && (w_data[jrow] != zero))
652  {
653  if (opt == COL)
654  total_sum += partial_col_sum;
655  else if (opt == ROW)
656  total_sum += partial_row_sum;
657  w_data[jrow] = zero;
658  it2 = it;
659  it++;
660  iw_u.erase (it2);
661  jrow = *it;
662  continue;
663  }
664  else
665  // This is the element scaled by the pivot
666  // in the actual iteration
667  if (opt == ROW)
668  w_data[jrow] = tl;
669  }
670  jrow = *(++it);
671  }
672 
673  // Search for the pivot and update iw_l and iw_u if the pivot is not the
674  // diagonal element
675  if ((thresh > zero) && (k < (n - 1)))
676  {
677  maximum = std::abs (w_data[k]) / thresh;
678  max_ind = perm[k];
679  for (it = iw_l.begin (); it != iw_l.end (); ++it)
680  {
681  p_perm = iperm[*it];
682  if (std::abs (w_data[p_perm]) > maximum)
683  {
684  maximum = std::abs (w_data[p_perm]);
685  max_ind = *it;
686  it2 = it;
687  }
688  }
689  // If the pivot is not the diagonal element update all
690  p_perm = iperm[max_ind];
691  if (max_ind != perm[k])
692  {
693  iw_l.erase (it2);
694  if (w_data[k] != zero)
695  iw_l.insert (perm[k]);
696  else
697  iw_u.insert (k);
698  // Swap data and update permutation vectors
699  aux = w_data[k];
700  iperm[perm[p_perm]] = k;
701  iperm[perm[k]] = p_perm;
702  c = perm[k];
703  perm[k] = perm[p_perm];
704  perm[p_perm] = c;
705  w_data[k] = w_data[p_perm];
706  w_data[p_perm] = aux;
707  }
708 
709  }
710 
711  // Drop elements in the L part in the IKJ version,
712  // and from the U part in the JKI version.
713  it = iw_l.begin ();
714  while (it != iw_l.end ())
715  {
716  p_perm = iperm[*it];
717  if (droptol > zero)
718  if (std::abs (w_data[p_perm]) < (droptol * cols_norm[k]))
719  {
720  if (opt != OFF)
721  total_sum += w_data[p_perm];
722  w_data[p_perm] = zero;
723  it2 = it;
724  it++;
725  iw_l.erase (it2);
726  continue;
727  }
728 
729  it++;
730  }
731 
732  // If milu == [row|col] summation is preserved.
733  // Compensate diagonal element.
734  if (opt != OFF)
735  {
736  if ((total_sum > zero) && (w_data[k] == zero))
737  iw_u.insert (k);
738  w_data[k] += total_sum;
739  }
740 
741  // Check if the pivot is zero and if udiag is active.
742  // NOTE: If the pivot == 0 and udiag is active, then if milu = [col|row]
743  // will not preserve the row sum for that column/row.
744  if (w_data[k] == zero)
745  {
746  if (udiag != 1)
747  error ("ilu: encountered a pivot equal to 0");
748 
749  w_data[k] = droptol;
750  iw_u.insert (k);
751  }
752 
753  // Scale the elements on the L part for IKJ version (milu = [col|off])
754  if (opt != ROW)
755  for (it = iw_l.begin (); it != iw_l.end (); ++it)
756  {
757  p_perm = iperm[*it];
758  w_data[p_perm] = w_data[p_perm] / w_data[k];
759  }
760 
761  if ((max_len_u - total_len_u) < n)
762  {
763  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
764  data_out_u.resize (dim_vector (max_len_u, 1));
765  data_u = data_out_u.fortran_vec ();
766  ridx_out_u.resize (dim_vector (max_len_u, 1));
767  ridx_u = ridx_out_u.fortran_vec ();
768  }
769 
770  if ((max_len_l - total_len_l) < n)
771  {
772  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
773  data_out_l.resize (dim_vector (max_len_l, 1));
774  data_l = data_out_l.fortran_vec ();
775  ridx_out_l.resize (dim_vector (max_len_l, 1));
776  ridx_l = ridx_out_l.fortran_vec ();
777  }
778 
779  // Expand working vector into U.
780  w_len_u = 0;
781  for (it = iw_u.begin (); it != iw_u.end (); ++it)
782  {
783  if (w_data[*it] != zero)
784  {
785  data_u[total_len_u + w_len_u] = w_data[*it];
786  ridx_u[total_len_u + w_len_u] = *it;
787  w_len_u++;
788  }
789  w_data[*it] = 0;
790  }
791 
792  // Expand working vector into L.
793  w_len_l = 0;
794  for (it = iw_l.begin (); it != iw_l.end (); ++it)
795  {
796  p_perm = iperm[*it];
797  if (w_data[p_perm] != zero)
798  {
799  data_l[total_len_l + w_len_l] = w_data[p_perm];
800  ridx_l[total_len_l + w_len_l] = *it;
801  w_len_l++;
802  }
803  w_data[p_perm] = 0;
804  }
805  total_len_u += w_len_u;
806  total_len_l += w_len_l;
807 
808  // Check if there are too many elements to be indexed with
809  // octave_idx_type type due to fill-in during the process.
810  if (total_len_l < 0 || total_len_u < 0)
811  error ("ilu: Integer overflow. Too many fill-in elements in L or U");
812 
813  if (opt == ROW)
814  uptr[k] = total_len_u - 1;
815 
816  cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
817  cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
818 
819  iw_l.clear ();
820  iw_u.clear ();
821  }
822 
823  octave_matrix_t *L_ptr;
824  octave_matrix_t *U_ptr;
825  octave_matrix_t diag (n, n, n);
826 
827  // L and U are interchanged if milu = 'row'. It is a matter
828  // of nomenclature to re-use code with both IKJ and JKI
829  // versions of the algorithm.
830  if (opt == ROW)
831  {
832  L_ptr = &U;
833  U_ptr = &L;
834  L = octave_matrix_t (n, n, total_len_u - n);
835  U = octave_matrix_t (n, n, total_len_l);
836  }
837  else
838  {
839  L_ptr = &L;
840  U_ptr = &U;
841  L = octave_matrix_t (n, n, total_len_l);
842  U = octave_matrix_t (n, n, total_len_u);
843  }
844 
845  for (i = 0; i <= n; i++)
846  {
847  L_ptr->cidx (i) = cidx_l[i];
848  U_ptr->cidx (i) = cidx_u[i];
849  if (opt == ROW)
850  U_ptr->cidx (i) -= i;
851  }
852 
853  for (i = 0; i < n; i++)
854  {
855  if (opt == ROW)
856  diag.elem (i,i) = data_u[uptr[i]];
857  j = cidx_l[i];
858 
859  while (j < cidx_l[i+1])
860  {
861  L_ptr->ridx (j) = ridx_l[j];
862  L_ptr->data (j) = data_l[j];
863  j++;
864  }
865  j = cidx_u[i];
866 
867  while (j < cidx_u[i+1])
868  {
869  c = j;
870  if (opt == ROW)
871  {
872  // The diagonal is removed from L if milu = 'row'.
873  // That is because is convenient to have it inside
874  // the L part to carry out the process.
875  if (ridx_u[j] == i)
876  {
877  j++;
878  continue;
879  }
880  else
881  c -= i;
882  }
883  U_ptr->data (c) = data_u[j];
884  U_ptr->ridx (c) = ridx_u[j];
885  j++;
886  }
887  }
888 
889  if (opt == ROW)
890  {
891  U = U.transpose ();
892  // The diagonal, conveniently permuted is added to U
893  U += diag.index (idx_vector::colon, perm_vec);
894  L = L.transpose ();
895  }
896 }
897 
898 DEFUN (__ilutp__, args, nargout,
899  doc: /* -*- texinfo -*-
900 @deftypefn {} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol}, @var{thresh}, @var{milu}, @var{udiag})
901 @deftypefnx {} {[@var{L}, @var{U}, @var{P}] =} __ilutp__ (@dots{})
902 Undocumented internal function.
903 @end deftypefn */)
904 {
905  if (args.length () != 5)
906  print_usage ();
907 
909  double droptol = args(1).double_value ();
910  double thresh = args(2).double_value ();
911  std::string milu = args(3).string_value ();
912  double udiag = args(4).double_value ();
913 
914  octave_value_list arg_list;
915  octave_idx_type nnz_u, nnz_l;
916  if (! args(0).iscomplex ())
917  {
918  SparseMatrix sm = args(0).sparse_matrix_value ();
919  SparseMatrix U, L;
920  nnz_u = (Ftriu (ovl (sm))(0).sparse_matrix_value ()).nnz ();
921  nnz_l = (Ftril (ovl (sm, -1))(0).sparse_matrix_value ()).nnz ();
922  Array <double> rc_norm;
923  if (milu == "row")
924  rc_norm = xrownorms (sm);
925  else
926  rc_norm = xcolnorms (sm);
927  Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
928 
929  ilu_tp <SparseMatrix, double> (sm, L, U, nnz_u, nnz_l,
930  rc_norm.fortran_vec (),
931  perm, droptol, thresh, milu, udiag);
932 
933  SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
934  if (milu == "row")
935  {
936  retval(0) = L + speye;
937  if (nargout == 3)
938  {
939  retval(1) = U.index (idx_vector::colon, perm);
940  retval(2) = speye.index (idx_vector::colon, perm);
941  }
942  else
943  retval(1) = U;
944  }
945  else
946  {
947  retval(1) = U;
948  if (nargout == 3)
949  {
950  retval(0) = L.index (perm, idx_vector::colon) + speye;
951  retval(2) = speye.index (perm, idx_vector::colon);
952  }
953  else
954  retval(0) = L + speye.index (idx_vector::colon, perm);
955  }
956  }
957  else
958  {
959  SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
960  SparseComplexMatrix U, L;
961  nnz_u = (Ftriu (ovl (sm))(0).sparse_complex_matrix_value ()).nnz ();
962  nnz_l = (Ftril (ovl (sm, -1))(0).sparse_complex_matrix_value ()).nnz ();
963  Array <Complex> rc_norm;
964  if (milu == "row")
965  rc_norm = xrownorms (sm);
966  else
967  rc_norm = xcolnorms (sm);
968  Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
969 
970  ilu_tp <SparseComplexMatrix, Complex>
971  (sm, L, U, nnz_u, nnz_l, rc_norm.fortran_vec (), perm,
972  Complex (droptol), Complex (thresh), milu, udiag);
973 
974  SparseMatrix speye (DiagMatrix (L.cols (), L.cols (), 1.0));
975  if (milu == "row")
976  {
977  retval(0) = L + speye;
978  if (nargout == 3)
979  {
980  retval(1) = U.index (idx_vector::colon, perm);
981  retval(2) = speye.index (idx_vector::colon, perm);
982  }
983  else if (nargout == 2)
984  retval(1) = U;
985  }
986  else
987  {
988  retval(1) = U;
989  if (nargout == 3)
990  {
991  retval(0) = L.index (perm, idx_vector::colon) + speye;
992  retval(2) = speye.index (perm, idx_vector::colon);
993  }
994  else
995  retval(0) = L + speye.index (idx_vector::colon, perm);
996  }
997  }
998 
999  return retval;
1000 }
1001 
1002 /*
1003 ## No test needed for internal helper function.
1004 %!assert (1)
1005 */
OCTAVE_API ColumnVector xrownorms(const Matrix &m, double p)
Definition: oct-norm.cc:593
static const idx_vector colon
Definition: idx-vector.h:498
OCTINTERP_API void print_usage(void)
Definition: defun.cc:54
OCTAVE_EXPORT octave_value_list or N dimensional array whose elements are all equal to the IEEE symbol zero divided by zero($0/0$)
for large enough k
Definition: lu.cc:617
const T * fortran_vec(void) const
Definition: Array.h:584
#define DEFUN(name, args_name, nargout_name, doc)
Macro to define a builtin function.
Definition: defun.h:53
void error(const char *fmt,...)
Definition: error.cc:578
static T abs(T x)
Definition: pr-output.cc:1696
OCTAVE_EXPORT octave_value_list Ftril(const octave_value_list &args, int) and setting all other elements to zero. The optional second argument specifies how many diagonals above or below the main diagonal should also be set to zero. The default value of ar
Definition: tril.cc:374
nd example oindent opens the file binary numeric values will be read assuming they are stored in IEEE format with the least significant bit and then converted to the native representation Opening a file that is already open simply opens it again and returns a separate file id It is not an error to open a file several though writing to the same file through several different file ids may produce unexpected results The possible values of text mode reading and writing automatically converts linefeeds to the appropriate line end character for the you may append a you must also open the file in binary mode The parameter conversions are currently only supported for and permissions will be set to and then everything is written in a single operation This is very efficient and improves performance c
Definition: file-io.cc:587
OCTAVE_EXPORT octave_value_list return the number of command line arguments passed to Octave If called with the optional argument the function xample nargout(@histc)
Definition: ov-usr-fcn.cc:997
OCTAVE_API RowVector xcolnorms(const Matrix &m, double p)
Definition: oct-norm.cc:593
octave_value retval
Definition: data.cc:6246
octave_idx_type cols(void) const
Definition: Sparse.h:259
OCTAVE_EXPORT octave_value_list isa nd deftypefn *return ovl(args(0).isinteger())
void ilu_0(octave_matrix_t &sm, const std::string milu="off")
Definition: __ilu__.cc:43
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:41
for i
Definition: data.cc:5264
std::complex< double > Complex
Definition: oct-cmplx.h:31
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:87
If this string is the system will ring the terminal sometimes it is useful to be able to print the original representation of the string
Definition: utils.cc:888