GNU Octave  4.2.1
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
__ilu__.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2014-2017 Eduardo Ramos Fernández <eduradical951@gmail.com>
4 Copyright (C) 2013-2016 Kai T. Ohlhus <k.ohlhus@gmail.com>
5 
6 This file is part of Octave.
7 
8 Octave is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 Octave is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with Octave; see the file COPYING. If not, see
20 <http://www.gnu.org/licenses/>.
21 
22 */
23 
24 #if defined (HAVE_CONFIG_H)
25 # include "config.h"
26 #endif
27 
28 #include "oct-locbuf.h"
29 
30 #include "defun.h"
31 #include "error.h"
32 #include "parse.h"
33 
34 // This function implements the IKJ and JKI variants of Gaussian elimination to
35 // perform the ILUTP decomposition. The behavior is controlled by milu
36 // parameter. If milu = ['off'|'col'] the JKI version is performed taking
37 // advantage of CCS format of the input matrix. If milu = 'row' the input
38 // matrix has to be transposed to obtain the equivalent CRS structure so we can
39 // work efficiently with rows. In this case IKJ version is used.
40 template <typename octave_matrix_t, typename T>
41 void ilu_0 (octave_matrix_t& sm, const std::string milu = "off")
42 {
43  const octave_idx_type n = sm.cols ();
44  octave_idx_type j1, j2, jrow, jw, i, j, k, jj;
45  T r;
46  T tl = 0;
47 
48  enum {OFF, ROW, COL};
49  char opt;
50  if (milu == "row")
51  {
52  opt = ROW;
53  sm = sm.transpose ();
54  }
55  else if (milu == "col")
56  opt = COL;
57  else
58  opt = OFF;
59 
60  // Input matrix pointers
61  octave_idx_type* cidx = sm.cidx ();
62  octave_idx_type* ridx = sm.ridx ();
63  T* data = sm.data ();
64 
65  // Working arrays
68 
69  // Initialize working arrays
70  for (i = 0; i < n; i++)
71  iw[i] = -1;
72 
73  // Loop over all columns
74  for (k = 0; k < n; k++)
75  {
76  j1 = cidx[k];
77  j2 = cidx[k+1];
78 
79  if (j1 == j2)
80  error ("ilu: A has a zero on the diagonal");
81 
82  for (j = j1; j < j2; j++)
83  iw[ridx[j]] = j;
84 
85  r = 0;
86  j = j1;
87  jrow = ridx[j1];
88  while ((jrow < k) && (j < j2))
89  {
90  if (opt == ROW)
91  {
92  tl = data[j] / data[uptr[jrow]];
93  data[j] = tl;
94  }
95  for (jj = uptr[jrow] + 1; jj < cidx[jrow+1]; jj++)
96  {
97  jw = iw[ridx[jj]];
98  if (jw != -1)
99  if (opt == ROW)
100  data[jw] -= tl * data[jj];
101  else
102  data[jw] -= data[j] * data[jj];
103 
104  else
105  // That is for the milu='row'
106  if (opt == ROW)
107  r += tl * data[jj];
108  else if (opt == COL)
109  r += data[j] * data[jj];
110  }
111  j++;
112  jrow = ridx[j];
113  }
114  uptr[k] = j;
115  if (opt != OFF)
116  data[uptr[k]] -= r;
117 
118  if (opt != ROW)
119  for (jj = uptr[k] + 1; jj < cidx[k+1]; jj++)
120  data[jj] /= data[uptr[k]];
121 
122  if (k != jrow)
123  error ("ilu: A has a zero on the diagonal");
124 
125  if (data[j] == T(0))
126  error ("ilu: encountered a pivot equal to 0");
127 
128  for (i = j1; i < j2; i++)
129  iw[ridx[i]] = -1;
130  }
131 
132  if (opt == ROW)
133  sm = sm.transpose ();
134 }
135 
136 DEFUN (__ilu0__, args, ,
137  doc: /* -*- texinfo -*-
138 @deftypefn {} {[@var{L}, @var{U}] =} __ilu0__ (@var{A})
139 @deftypefnx {} {[@var{L}, @var{U}] =} __ilu0__ (@var{A}, @var{milu})
140 @deftypefnx {} {[@var{L}, @var{U}, @var{P}] =} __ilu0__ (@var{A}, @dots{})
141 Undocumented internal function.
142 @end deftypefn */)
143 {
144  int nargin = args.length ();
145 
146  if (nargin < 1 || nargin > 2)
147  print_usage ();
148 
150 
151  std::string milu;
152 
153  // In ILU0 algorithm the zero-pattern of the input matrix is preserved so
154  // its structure does not change during the algorithm. The same input
155  // matrix is used to build the output matrix due to that fact.
156  octave_value_list arg_list;
157  if (! args(0).is_complex_type ())
158  {
159  SparseMatrix sm = args(0).sparse_matrix_value ();
160  ilu_0 <SparseMatrix, double> (sm, milu);
161 
162  arg_list.append (sm);
163  retval(1) = feval ("triu", arg_list)(0).sparse_matrix_value ();
164  SparseMatrix eye =
165  feval ("speye", ovl (sm.cols ()))(0).sparse_matrix_value ();
166  arg_list.append (-1);
167  retval(0) = eye +
168  feval ("tril", arg_list)(0).sparse_matrix_value ();
169  }
170  else
171  {
172  SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
173  ilu_0 <SparseComplexMatrix, Complex> (sm, milu);
174 
175  arg_list.append (sm);
176  retval(1) = feval ("triu", arg_list)(0).sparse_complex_matrix_value ();
177  SparseComplexMatrix eye =
178  feval ("speye", ovl (sm.cols ()))(0).sparse_complex_matrix_value ();
179  arg_list.append (-1);
180  retval(0) = eye +
181  feval ("tril", arg_list)(0).sparse_complex_matrix_value ();
182  }
183 
184  return retval;
185 }
186 
187 template <typename octave_matrix_t, typename T>
188 void ilu_crout (octave_matrix_t& sm_l, octave_matrix_t& sm_u,
189  octave_matrix_t& L, octave_matrix_t& U, T* cols_norm,
190  T* rows_norm, const T droptol = 0,
191  const std::string milu = "off")
192 {
193  // Map the strings into chars for faster comparing inside loops
194  char opt;
195  enum {OFF, ROW, COL};
196  if (milu == "row")
197  opt = ROW;
198  else if (milu == "col")
199  opt = COL;
200  else
201  opt = OFF;
202 
203  octave_idx_type jrow, i, j, k, jj, total_len_l, total_len_u, max_len_u,
204  max_len_l, w_len_u, w_len_l, cols_list_len, rows_list_len;
205 
206  const octave_idx_type n = sm_u.cols ();
207  sm_u = sm_u.transpose ();
208 
209  max_len_u = sm_u.nnz ();
210  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
211  max_len_l = sm_l.nnz ();
212  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
213 
214  // Extract pointers to the arrays for faster access inside loops
215  octave_idx_type* cidx_in_u = sm_u.cidx ();
216  octave_idx_type* ridx_in_u = sm_u.ridx ();
217  T* data_in_u = sm_u.data ();
218  octave_idx_type* cidx_in_l = sm_l.cidx ();
219  octave_idx_type* ridx_in_l = sm_l.ridx ();
220  T* data_in_l = sm_l.data ();
221 
222  // L output arrays
223  Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
224  octave_idx_type* ridx_l = ridx_out_l.fortran_vec ();
225  Array <T> data_out_l (dim_vector (max_len_l, 1));
226  T* data_l = data_out_l.fortran_vec ();
227 
228  // U output arrays
229  Array <octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
230  octave_idx_type* ridx_u = ridx_out_u.fortran_vec ();
231  Array <T> data_out_u (dim_vector (max_len_u, 1));
232  T* data_u = data_out_u.fortran_vec ();
233 
234  // Working arrays
235  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_l, n + 1);
236  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx_u, n + 1);
237  OCTAVE_LOCAL_BUFFER (octave_idx_type, cols_list, n);
238  OCTAVE_LOCAL_BUFFER (octave_idx_type, rows_list, n);
239  OCTAVE_LOCAL_BUFFER (T, w_data_l, n);
240  OCTAVE_LOCAL_BUFFER (T, w_data_u, n);
243  OCTAVE_LOCAL_BUFFER (T, cr_sum, n);
244 
245  T zero = T (0);
246 
247  // Initialize working arrays
248  cidx_u[0] = cidx_in_u[0];
249  cidx_l[0] = cidx_in_l[0];
250  for (i = 0; i < n; i++)
251  {
252  w_data_u[i] = zero;
253  w_data_l[i] = zero;
254  cr_sum[i] = zero;
255  }
256 
257  total_len_u = 0;
258  total_len_l = 0;
259  cols_list_len = 0;
260  rows_list_len = 0;
261 
262  // Loop over all columns
263  for (k = 0; k < n; k++)
264  {
265  // Load the working column and working row
266  for (i = cidx_in_l[k]; i < cidx_in_l[k+1]; i++)
267  w_data_l[ridx_in_l[i]] = data_in_l[i];
268 
269  for (i = cidx_in_u[k]; i < cidx_in_u[k+1]; i++)
270  w_data_u[ridx_in_u[i]] = data_in_u[i];
271 
272  // Update U working row
273  for (j = 0; j < rows_list_len; j++)
274  {
275  if ((Ufirst[rows_list[j]] != -1))
276  for (jj = Ufirst[rows_list[j]]; jj < cidx_u[rows_list[j]+1]; jj++)
277  {
278  jrow = ridx_u[jj];
279  w_data_u[jrow] -= data_u[jj] * data_l[Lfirst[rows_list[j]]];
280  }
281  }
282  // Update L working column
283  for (j = 0; j < cols_list_len; j++)
284  {
285  if (Lfirst[cols_list[j]] != -1)
286  for (jj = Lfirst[cols_list[j]]; jj < cidx_l[cols_list[j]+1]; jj++)
287  {
288  jrow = ridx_l[jj];
289  w_data_l[jrow] -= data_l[jj] * data_u[Ufirst[cols_list[j]]];
290  }
291  }
292 
293  if ((max_len_u - total_len_u) < n)
294  {
295  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
296  data_out_u.resize (dim_vector (max_len_u, 1));
297  data_u = data_out_u.fortran_vec ();
298  ridx_out_u.resize (dim_vector (max_len_u, 1));
299  ridx_u = ridx_out_u.fortran_vec ();
300  }
301 
302  if ((max_len_l - total_len_l) < n)
303  {
304  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
305  data_out_l.resize (dim_vector (max_len_l, 1));
306  data_l = data_out_l.fortran_vec ();
307  ridx_out_l.resize (dim_vector (max_len_l, 1));
308  ridx_l = ridx_out_l.fortran_vec ();
309  }
310 
311  // Expand the working row into the U output data structures
312  w_len_l = 0;
313  data_u[total_len_u] = w_data_u[k];
314  ridx_u[total_len_u] = k;
315  w_len_u = 1;
316  for (i = k + 1; i < n; i++)
317  {
318  if (w_data_u[i] != zero)
319  {
320  if (std::abs (w_data_u[i]) < (droptol * rows_norm[k]))
321  {
322  if (opt == ROW)
323  cr_sum[k] += w_data_u[i];
324  else if (opt == COL)
325  cr_sum[i] += w_data_u[i];
326  }
327  else
328  {
329  data_u[total_len_u + w_len_u] = w_data_u[i];
330  ridx_u[total_len_u + w_len_u] = i;
331  w_len_u++;
332  }
333  }
334 
335  // Expand the working column into the L output data structures
336  if (w_data_l[i] != zero)
337  {
338  if (std::abs (w_data_l[i]) < (droptol * cols_norm[k]))
339  {
340  if (opt == COL)
341  cr_sum[k] += w_data_l[i];
342  else if (opt == ROW)
343  cr_sum[i] += w_data_l[i];
344  }
345  else
346  {
347  data_l[total_len_l + w_len_l] = w_data_l[i];
348  ridx_l[total_len_l + w_len_l] = i;
349  w_len_l++;
350  }
351  }
352  w_data_u[i] = zero;
353  w_data_l[i] = zero;
354  }
355 
356  // Compensate row and column sums --> milu option
357  if (opt == COL || opt == ROW)
358  data_u[total_len_u] += cr_sum[k];
359 
360  // Check if the pivot is zero
361  if (data_u[total_len_u] == zero)
362  error ("ilu: encountered a pivot equal to 0");
363 
364  // Scale the elements in L by the pivot
365  for (i = total_len_l ; i < (total_len_l + w_len_l); i++)
366  data_l[i] /= data_u[total_len_u];
367 
368  total_len_u += w_len_u;
369  total_len_l += w_len_l;
370  // Check if there are too many elements to be indexed with
371  // octave_idx_type type due to fill-in during the process.
372  if (total_len_l < 0 || total_len_u < 0)
373  error ("ilu: integer overflow. Too many fill-in elements in L or U");
374 
375  cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
376  cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
377 
378  // The tricky part of the algorithm. The arrays pointing to the first
379  // working element of each column in the next iteration (Lfirst) or
380  // the first working element of each row (Ufirst) are updated. Also the
381  // arrays working as lists cols_list and rows_list are filled with
382  // indices pointing to Ufirst and Lfirst respectively.
383  // FIXME: Maybe the -1 indicating in Ufirst and Lfirst, that no elements
384  // have to be considered in a certain column or row in next iteration,
385  // can be removed. It feels safer to me using such an indicator.
386  if (k < (n - 1))
387  {
388  if (w_len_u > 0)
389  Ufirst[k] = cidx_u[k];
390  else
391  Ufirst[k] = -1;
392  if (w_len_l > 0)
393  Lfirst[k] = cidx_l[k];
394  else
395  Lfirst[k] = -1;
396  cols_list_len = 0;
397  rows_list_len = 0;
398  for (i = 0; i <= k; i++)
399  {
400  if (Ufirst[i] != -1)
401  {
402  jj = ridx_u[Ufirst[i]];
403  if (jj < (k + 1))
404  {
405  if (Ufirst[i] < (cidx_u[i+1]))
406  {
407  Ufirst[i]++;
408  if (Ufirst[i] == cidx_u[i+1])
409  Ufirst[i] = -1;
410  else
411  jj = ridx_u[Ufirst[i]];
412  }
413  }
414  if (jj == (k + 1))
415  {
416  cols_list[cols_list_len] = i;
417  cols_list_len++;
418  }
419  }
420 
421  if (Lfirst[i] != -1)
422  {
423  jj = ridx_l[Lfirst[i]];
424  if (jj < (k + 1))
425  if (Lfirst[i] < (cidx_l[i+1]))
426  {
427  Lfirst[i]++;
428  if (Lfirst[i] == cidx_l[i+1])
429  Lfirst[i] = -1;
430  else
431  jj = ridx_l[Lfirst[i]];
432  }
433  if (jj == (k + 1))
434  {
435  rows_list[rows_list_len] = i;
436  rows_list_len++;
437  }
438  }
439  }
440  }
441  }
442 
443  // Build the output matrices
444  L = octave_matrix_t (n, n, total_len_l);
445  U = octave_matrix_t (n, n, total_len_u);
446 
447  // FIXME: Can these loops be replaced by std::copy?
448  for (i = 0; i <= n; i++)
449  L.cidx (i) = cidx_l[i];
450 
451  for (i = 0; i < total_len_l; i++)
452  {
453  L.ridx (i) = ridx_l[i];
454  L.data (i) = data_l[i];
455  }
456 
457  for (i = 0; i <= n; i++)
458  U.cidx (i) = cidx_u[i];
459 
460  for (i = 0; i < total_len_u; i++)
461  {
462  U.ridx (i) = ridx_u[i];
463  U.data (i) = data_u[i];
464  }
465 
466  U = U.transpose ();
467 }
468 
469 DEFUN (__iluc__, args, ,
470  doc: /* -*- texinfo -*-
471 @deftypefn {} {[@var{L}, @var{U}] =} __iluc__ (@var{A})
472 @deftypefnx {} {[@var{L}, @var{U}] =} __iluc__ (@var{A}, @var{droptol})
473 @deftypefnx {} {[@var{L}, @var{U}] =} __iluc__ (@var{A}, @var{droptol}, @var{milu})
474 Undocumented internal function.
475 @end deftypefn */)
476 {
477  int nargin = args.length ();
478 
479  if (nargin < 1 || nargin > 3)
480  print_usage ();
481 
482  std::string milu = "off";
483  double droptol = 0;
484 
485  // Don't repeat input validation of arguments done in ilu.m
486  if (nargin >= 2)
487  droptol = args(1).double_value ();
488 
489  if (nargin == 3)
490  milu = args(2).string_value ();
491 
492  octave_value_list arg_list;
493  if (! args(0).is_complex_type ())
494  {
495  Array<double> cols_norm, rows_norm;
496  arg_list.append (args(0).sparse_matrix_value ());
497  SparseMatrix sm_u = feval ("triu", arg_list)(0).sparse_matrix_value ();
498  arg_list.append (-1);
499  SparseMatrix sm_l = feval ("tril", arg_list)(0).sparse_matrix_value ();
500  arg_list(1) = "rows";
501  rows_norm = feval ("norm", arg_list)(0).vector_value ();
502  arg_list(1) = "cols";
503  cols_norm = feval ("norm", arg_list)(0).vector_value ();
504  arg_list.clear ();
505  SparseMatrix U, L;
506  ilu_crout <SparseMatrix, double> (sm_l, sm_u, L, U,
507  cols_norm.fortran_vec (),
508  rows_norm.fortran_vec (),
509  droptol, milu);
510  arg_list.append (octave_value (L.cols ()));
511  SparseMatrix eye = feval ("speye", arg_list)(0).sparse_matrix_value ();
512  return ovl (L + eye, U);
513  }
514  else
515  {
516  Array<Complex> cols_norm, rows_norm;
517  arg_list.append (args(0).sparse_complex_matrix_value ());
518  SparseComplexMatrix sm_u =
519  feval ("triu", arg_list)(0).sparse_complex_matrix_value ();
520  arg_list.append (-1);
521  SparseComplexMatrix sm_l =
522  feval ("tril", arg_list)(0).sparse_complex_matrix_value ();
523  arg_list(1) = "rows";
524  rows_norm = feval ("norm", arg_list)(0).complex_vector_value ();
525  arg_list(1) = "cols";
526  cols_norm = feval ("norm", arg_list)(0).complex_vector_value ();
527  arg_list.clear ();
528  SparseComplexMatrix U, L;
529  ilu_crout <SparseComplexMatrix, Complex>
530  (sm_l, sm_u, L, U, cols_norm.fortran_vec (),
531  rows_norm.fortran_vec (), Complex (droptol), milu);
532 
533  arg_list.append (octave_value (L.cols ()));
534  SparseComplexMatrix eye =
535  feval ("speye", arg_list)(0).sparse_complex_matrix_value ();
536  return ovl (L + eye, U);
537  }
538 }
539 
540 // This function implements the IKJ and JKI variants of gaussian elimination
541 // to perform the ILUTP decomposition. The behavior is controlled by milu
542 // parameter. If milu = ['off'|'col'] the JKI version is performed taking
543 // advantage of CCS format of the input matrix. Row pivoting is performed.
544 // If milu = 'row' the input matrix has to be transposed to obtain the
545 // equivalent CRS structure so we can work efficiently with rows. In that
546 // case IKJ version is used and column pivoting is performed.
547 
548 template <typename octave_matrix_t, typename T>
549 void ilu_tp (octave_matrix_t& sm, octave_matrix_t& L, octave_matrix_t& U,
550  octave_idx_type nnz_u, octave_idx_type nnz_l, T* cols_norm,
551  Array <octave_idx_type>& perm_vec, const T droptol = T(0),
552  const T thresh = T(0), const std::string milu = "off",
553  const double udiag = 0)
554 {
555  char opt;
556  enum {OFF, ROW, COL};
557  if (milu == "row")
558  opt = ROW;
559  else if (milu == "col")
560  opt = COL;
561  else
562  opt = OFF;
563 
564  const octave_idx_type n = sm.cols ();
565 
566  // This is necessary for the JKI (milu = "row") variant.
567  if (opt == ROW)
568  sm = sm.transpose ();
569 
570  // Extract pointers to the arrays for faster access inside loops
571  octave_idx_type* cidx_in = sm.cidx ();
572  octave_idx_type* ridx_in = sm.ridx ();
573  T* data_in = sm.data ();
574  octave_idx_type jrow, i, j, k, jj, c, total_len_l, total_len_u, p_perm,
575  max_ind, max_len_l, max_len_u;
576  T zero = T(0);
577 
578  T tl = zero, aux, maximum;
579 
580  max_len_u = nnz_u;
581  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
582  max_len_l = nnz_l;
583  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
584 
585  // Extract pointers to the arrays for faster access inside loops
586  Array <octave_idx_type> cidx_out_l (dim_vector (n + 1, 1));
587  octave_idx_type* cidx_l = cidx_out_l.fortran_vec ();
588  Array <octave_idx_type> ridx_out_l (dim_vector (max_len_l, 1));
589  octave_idx_type* ridx_l = ridx_out_l.fortran_vec ();
590  Array <T> data_out_l (dim_vector (max_len_l, 1));
591  T* data_l = data_out_l.fortran_vec ();
592 
593  // Data for U
594  Array <octave_idx_type> cidx_out_u (dim_vector (n + 1, 1));
595  octave_idx_type* cidx_u = cidx_out_u.fortran_vec ();
596  Array <octave_idx_type> ridx_out_u (dim_vector (max_len_u, 1));
597  octave_idx_type* ridx_u = ridx_out_u.fortran_vec ();
598  Array <T> data_out_u (dim_vector (max_len_u, 1));
599  T* data_u = data_out_u.fortran_vec ();
600 
601  // Working arrays and permutation arrays
602  octave_idx_type w_len_u, w_len_l;
603  T total_sum, partial_col_sum = zero, partial_row_sum = zero;
604  std::set <octave_idx_type> iw_l;
605  std::set <octave_idx_type> iw_u;
606  std::set <octave_idx_type>::iterator it, it2;
607  OCTAVE_LOCAL_BUFFER (T, w_data, n);
609  octave_idx_type* perm = perm_vec.fortran_vec ();
611 
612  // Initialize working and permutation arrays
613  cidx_l[0] = cidx_in[0];
614  cidx_u[0] = cidx_in[0];
615  for (i = 0; i < n; i++)
616  {
617  w_data[i] = 0;
618  perm[i] = i;
619  iperm[i] = i;
620  }
621  total_len_u = 0;
622  total_len_l = 0;
623 
624  // Loop over all columns
625  for (k = 0; k < n; k++)
626  {
627  for (j = cidx_in[k]; j < cidx_in[k+1]; j++)
628  {
629  p_perm = iperm[ridx_in[j]];
630  w_data[iperm[ridx_in[j]]] = data_in[j];
631  if (p_perm > k)
632  iw_l.insert (ridx_in[j]);
633  else
634  iw_u.insert (p_perm);
635  }
636 
637  it = iw_u.begin ();
638  jrow = *it;
639  total_sum = zero;
640  while ((jrow < k) && (it != iw_u.end ()))
641  {
642  if (opt == COL)
643  partial_col_sum = w_data[jrow];
644  if (w_data[jrow] != zero)
645  {
646  if (opt == ROW)
647  {
648  partial_row_sum = w_data[jrow];
649  tl = w_data[jrow] / data_u[uptr[jrow]];
650  }
651  for (jj = cidx_l[jrow]; jj < cidx_l[jrow+1]; jj++)
652  {
653  p_perm = iperm[ridx_l[jj]];
654  aux = w_data[p_perm];
655  if (opt == ROW)
656  {
657  w_data[p_perm] -= tl * data_l[jj];
658  partial_row_sum += tl * data_l[jj];
659  }
660  else
661  {
662  tl = data_l[jj] * w_data[jrow];
663  w_data[p_perm] -= tl;
664  if (opt == COL)
665  partial_col_sum += tl;
666  }
667 
668  if ((aux == zero) && (w_data[p_perm] != zero))
669  {
670  if (p_perm > k)
671  iw_l.insert (ridx_l[jj]);
672  else
673  iw_u.insert (p_perm);
674  }
675  }
676 
677  // Drop element from the U part in IKJ
678  // and L part in JKI variant (milu = [col|off])
679  if ((std::abs (w_data[jrow]) < (droptol * cols_norm[k]))
680  && (w_data[jrow] != zero))
681  {
682  if (opt == COL)
683  total_sum += partial_col_sum;
684  else if (opt == ROW)
685  total_sum += partial_row_sum;
686  w_data[jrow] = zero;
687  it2 = it;
688  it++;
689  iw_u.erase (it2);
690  jrow = *it;
691  continue;
692  }
693  else
694  // This is the element scaled by the pivot
695  // in the actual iteration
696  if (opt == ROW)
697  w_data[jrow] = tl;
698  }
699  jrow = *(++it);
700  }
701 
702  // Search for the pivot and update iw_l and iw_u if the pivot is not the
703  // diagonal element
704  if ((thresh > zero) && (k < (n - 1)))
705  {
706  maximum = std::abs (w_data[k]) / thresh;
707  max_ind = perm[k];
708  for (it = iw_l.begin (); it != iw_l.end (); ++it)
709  {
710  p_perm = iperm[*it];
711  if (std::abs (w_data[p_perm]) > maximum)
712  {
713  maximum = std::abs (w_data[p_perm]);
714  max_ind = *it;
715  it2 = it;
716  }
717  }
718  // If the pivot is not the diagonal element update all
719  p_perm = iperm[max_ind];
720  if (max_ind != perm[k])
721  {
722  iw_l.erase (it2);
723  if (w_data[k] != zero)
724  iw_l.insert (perm[k]);
725  else
726  iw_u.insert (k);
727  // Swap data and update permutation vectors
728  aux = w_data[k];
729  iperm[perm[p_perm]] = k;
730  iperm[perm[k]] = p_perm;
731  c = perm[k];
732  perm[k] = perm[p_perm];
733  perm[p_perm] = c;
734  w_data[k] = w_data[p_perm];
735  w_data[p_perm] = aux;
736  }
737 
738  }
739 
740  // Drop elements in the L part in the IKJ version,
741  // and from the U part in the JKI version.
742  it = iw_l.begin ();
743  while (it != iw_l.end ())
744  {
745  p_perm = iperm[*it];
746  if (droptol > zero)
747  if (std::abs (w_data[p_perm]) < (droptol * cols_norm[k]))
748  {
749  if (opt != OFF)
750  total_sum += w_data[p_perm];
751  w_data[p_perm] = zero;
752  it2 = it;
753  it++;
754  iw_l.erase (it2);
755  continue;
756  }
757 
758  it++;
759  }
760 
761  // If milu == [row|col] summation is preserved.
762  // Compensate diagonal element.
763  if (opt != OFF)
764  {
765  if ((total_sum > zero) && (w_data[k] == zero))
766  iw_u.insert (k);
767  w_data[k] += total_sum;
768  }
769 
770  // Check if the pivot is zero and if udiag is active.
771  // NOTE: If the pivot == 0 and udiag is active, then if milu = [col|row]
772  // will not preserve the row sum for that column/row.
773  if (w_data[k] == zero)
774  {
775  if (udiag != 1)
776  error ("ilu: encountered a pivot equal to 0");
777 
778  w_data[k] = droptol;
779  iw_u.insert (k);
780  }
781 
782  // Scale the elements on the L part for IKJ version (milu = [col|off])
783  if (opt != ROW)
784  for (it = iw_l.begin (); it != iw_l.end (); ++it)
785  {
786  p_perm = iperm[*it];
787  w_data[p_perm] = w_data[p_perm] / w_data[k];
788  }
789 
790  if ((max_len_u - total_len_u) < n)
791  {
792  max_len_u += (0.1 * max_len_u) > n ? 0.1 * max_len_u : n;
793  data_out_u.resize (dim_vector (max_len_u, 1));
794  data_u = data_out_u.fortran_vec ();
795  ridx_out_u.resize (dim_vector (max_len_u, 1));
796  ridx_u = ridx_out_u.fortran_vec ();
797  }
798 
799  if ((max_len_l - total_len_l) < n)
800  {
801  max_len_l += (0.1 * max_len_l) > n ? 0.1 * max_len_l : n;
802  data_out_l.resize (dim_vector (max_len_l, 1));
803  data_l = data_out_l.fortran_vec ();
804  ridx_out_l.resize (dim_vector (max_len_l, 1));
805  ridx_l = ridx_out_l.fortran_vec ();
806  }
807 
808  // Expand working vector into U.
809  w_len_u = 0;
810  for (it = iw_u.begin (); it != iw_u.end (); ++it)
811  {
812  if (w_data[*it] != zero)
813  {
814  data_u[total_len_u + w_len_u] = w_data[*it];
815  ridx_u[total_len_u + w_len_u] = *it;
816  w_len_u++;
817  }
818  w_data[*it] = 0;
819  }
820 
821  // Expand working vector into L.
822  w_len_l = 0;
823  for (it = iw_l.begin (); it != iw_l.end (); ++it)
824  {
825  p_perm = iperm[*it];
826  if (w_data[p_perm] != zero)
827  {
828  data_l[total_len_l + w_len_l] = w_data[p_perm];
829  ridx_l[total_len_l + w_len_l] = *it;
830  w_len_l++;
831  }
832  w_data[p_perm] = 0;
833  }
834  total_len_u += w_len_u;
835  total_len_l += w_len_l;
836 
837  // Check if there are too many elements to be indexed with
838  // octave_idx_type type due to fill-in during the process.
839  if (total_len_l < 0 || total_len_u < 0)
840  error ("ilu: Integer overflow. Too many fill-in elements in L or U");
841 
842  if (opt == ROW)
843  uptr[k] = total_len_u - 1;
844 
845  cidx_u[k+1] = cidx_u[k] - cidx_u[0] + w_len_u;
846  cidx_l[k+1] = cidx_l[k] - cidx_l[0] + w_len_l;
847 
848  iw_l.clear ();
849  iw_u.clear ();
850  }
851 
852  octave_matrix_t *L_ptr;
853  octave_matrix_t *U_ptr;
854  octave_matrix_t diag (n, n, n);
855 
856  // L and U are interchanged if milu = 'row'. It is a matter
857  // of nomenclature to re-use code with both IKJ and JKI
858  // versions of the algorithm.
859  if (opt == ROW)
860  {
861  L_ptr = &U;
862  U_ptr = &L;
863  L = octave_matrix_t (n, n, total_len_u - n);
864  U = octave_matrix_t (n, n, total_len_l);
865  }
866  else
867  {
868  L_ptr = &L;
869  U_ptr = &U;
870  L = octave_matrix_t (n, n, total_len_l);
871  U = octave_matrix_t (n, n, total_len_u);
872  }
873 
874  for (i = 0; i <= n; i++)
875  {
876  L_ptr->cidx (i) = cidx_l[i];
877  U_ptr->cidx (i) = cidx_u[i];
878  if (opt == ROW)
879  U_ptr->cidx (i) -= i;
880  }
881 
882  for (i = 0; i < n; i++)
883  {
884  if (opt == ROW)
885  diag.elem (i,i) = data_u[uptr[i]];
886  j = cidx_l[i];
887 
888  while (j < cidx_l[i+1])
889  {
890  L_ptr->ridx (j) = ridx_l[j];
891  L_ptr->data (j) = data_l[j];
892  j++;
893  }
894  j = cidx_u[i];
895 
896  while (j < cidx_u[i+1])
897  {
898  c = j;
899  if (opt == ROW)
900  {
901  // The diagonal is removed from L if milu = 'row'.
902  // That is because is convenient to have it inside
903  // the L part to carry out the process.
904  if (ridx_u[j] == i)
905  {
906  j++;
907  continue;
908  }
909  else
910  c -= i;
911  }
912  U_ptr->data (c) = data_u[j];
913  U_ptr->ridx (c) = ridx_u[j];
914  j++;
915  }
916  }
917 
918  if (opt == ROW)
919  {
920  U = U.transpose ();
921  // The diagonal, conveniently permuted is added to U
922  U += diag.index (idx_vector::colon, perm_vec);
923  L = L.transpose ();
924  }
925 }
926 
927 DEFUN (__ilutp__, args, nargout,
928  doc: /* -*- texinfo -*-
929 @deftypefn {} {[@var{L}, @var{U}] =} __ilutp__ (@var{A})
930 @deftypefnx {} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol})
931 @deftypefnx {} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol}, @var{thresh})
932 @deftypefnx {} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol}, @var{thresh}, @var{milu})
933 @deftypefnx {} {[@var{L}, @var{U}] =} __ilutp__ (@var{A}, @var{droptol}, @var{thresh}, @var{milu}, @var{udiag})
934 @deftypefnx {} {[@var{L}, @var{U}, @var{P}] =} __ilutp__ (@var{A}, @dots{})
935 Undocumented internal function.
936 @end deftypefn */)
937 {
938  int nargin = args.length ();
939 
940  if (nargin < 1 || nargin > 5)
941  print_usage ();
942 
944  std::string milu = "";
945  double droptol = 0;
946  double thresh = 1;
947  double udiag = 0;
948 
949  // Don't repeat input validation of arguments done in ilu.m
950  if (nargin >= 2)
951  droptol = args(1).double_value ();
952 
953  if (nargin >= 3)
954  thresh = args(2).double_value ();
955 
956  if (nargin >= 4)
957  milu = args(3).string_value ();
958 
959  if (nargin == 5)
960  udiag = args(4).double_value ();
961 
962  octave_value_list arg_list;
963  octave_idx_type nnz_u, nnz_l;
964  if (! args(0).is_complex_type ())
965  {
966  Array <double> rc_norm;
967  SparseMatrix sm = args(0).sparse_matrix_value ();
968  arg_list.append (sm);
969  nnz_u = (feval ("triu", arg_list)(0).sparse_matrix_value ()).nnz ();
970  arg_list.append (-1);
971  nnz_l = (feval ("tril", arg_list)(0).sparse_matrix_value ()).nnz ();
972  if (milu == "row")
973  arg_list (1) = "rows";
974  else
975  arg_list (1) = "cols";
976  rc_norm = feval ("norm", arg_list)(0).vector_value ();
977  arg_list.clear ();
978  Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
979  SparseMatrix U, L;
980  ilu_tp <SparseMatrix, double> (sm, L, U, nnz_u, nnz_l,
981  rc_norm.fortran_vec (),
982  perm, droptol, thresh, milu, udiag);
983  arg_list.append (octave_value (L.cols ()));
984  SparseMatrix eye =
985  feval ("speye", arg_list)(0).sparse_matrix_value ();
986  if (milu == "row")
987  {
988  if (nargout == 3)
989  {
990  retval(2) = eye.index (idx_vector::colon, perm);
991  retval(1) = U.index (idx_vector::colon, perm);
992  }
993  else if (nargout == 2)
994  retval(1) = U;
995  retval(0) = L + eye;
996  }
997  else
998  {
999  if (nargout == 3)
1000  {
1001  retval(2) = eye.index (perm, idx_vector::colon);
1002  retval(1) = U;
1003  retval(0) = L.index (perm, idx_vector::colon) + eye;
1004  }
1005  else
1006  {
1007  retval(1) = U;
1008  retval(0) = L + eye.index (perm, idx_vector::colon);
1009  }
1010  }
1011  }
1012  else
1013  {
1014  Array <Complex> rc_norm;
1015  SparseComplexMatrix sm = args(0).sparse_complex_matrix_value ();
1016  arg_list.append (sm);
1017  nnz_u =
1018  feval ("triu", arg_list)(0).sparse_complex_matrix_value ().nnz ();
1019  arg_list.append (-1);
1020  nnz_l =
1021  feval ("tril", arg_list)(0).sparse_complex_matrix_value ().nnz ();
1022  if (milu == "row")
1023  arg_list(1) = "rows";
1024  else
1025  arg_list(1) = "cols";
1026  rc_norm = feval ("norm", arg_list)(0).complex_vector_value ();
1027  arg_list.clear ();
1028  Array <octave_idx_type> perm (dim_vector (sm.cols (), 1));
1029  SparseComplexMatrix U, L;
1030  ilu_tp <SparseComplexMatrix, Complex>
1031  (sm, L, U, nnz_u, nnz_l, rc_norm.fortran_vec (), perm,
1032  Complex (droptol), Complex (thresh), milu, udiag);
1033 
1034  arg_list.append (octave_value (L.cols ()));
1035  SparseComplexMatrix eye =
1036  feval ("speye", arg_list)(0).sparse_complex_matrix_value ();
1037  if (milu == "row")
1038  {
1039  if (nargout == 3)
1040  {
1041  retval(2) = eye.index (idx_vector::colon, perm);
1042  retval(1) = U.index (idx_vector::colon, perm);
1043  }
1044  else if (nargout == 2)
1045  retval(1) = U;
1046  retval(0) = L + eye;
1047  }
1048  else
1049  {
1050  if (nargout == 3)
1051  {
1052  retval(2) = eye.index (perm, idx_vector::colon);
1053  retval(1) = U;
1054  retval(0) = L.index (perm, idx_vector::colon) + eye;
1055  }
1056  else
1057  {
1058  retval(1) = U;
1059  retval(0) = L + eye.index (perm, idx_vector::colon);
1060  }
1061  }
1062  }
1063 
1064  return retval;
1065 }
1066 
1067 /*
1068 ## No test needed for internal helper function.
1069 %!assert (1)
1070 */
octave_idx_type cols(void) const
Definition: Sparse.h:272
void clear(void)
Definition: ovl.h:152
static const idx_vector colon
Definition: idx-vector.h:482
OCTAVE_EXPORT octave_value_list isa nd deftypefn *return ovl(args(0).is_integer_type())
OCTINTERP_API void print_usage(void)
Definition: defun.cc:52
octave_value_list & append(const octave_value &val)
Definition: ovl.cc:85
for large enough k
Definition: lu.cc:606
#define DEFUN(name, args_name, nargout_name, doc)
Definition: defun.h:46
void error(const char *fmt,...)
Definition: error.cc:570
is greater than zero
Definition: load-path.cc:2339
Sparse< T > index(const idx_vector &i, bool resize_ok=false) const
Definition: Sparse.cc:1378
JNIEnv void * args
Definition: ov-java.cc:67
OCTAVE_EXPORT octave_value_list return the number of command line arguments passed to Octave If called with the optional argument the function xample nargout(@histc)
Definition: ov-usr-fcn.cc:935
int nargin
Definition: graphics.cc:10115
octave_value retval
Definition: data.cc:6294
the sparsity preserving column transformation such that that defines the pivoting threshold can be given in which case it defines the c
Definition: lu.cc:138
feval(ar{f}, 1) esult
Definition: oct-parse.cc:8829
=val(i)}if ode{val(i)}occurs in table i
Definition: lookup.cc:239
OCTAVE_EXPORT octave_value_list return the value of the option it must match the dimension of the state and the relative tolerance must also be a vector of the same length tem it must match the dimension of the state and the absolute tolerance must also be a vector of the same length The local error test applied at each integration step is xample roup abs(local error in x(i))<
void ilu_0(octave_matrix_t &sm, const std::string milu="off")
Definition: __ilu__.cc:41
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:200
std::complex< double > Complex
Definition: oct-cmplx.h:31
const T * fortran_vec(void) const
Definition: Array.h:584
Vector representing the dimensions (size) of an Array.
Definition: dim-vector.h:87
If this string is the system will ring the terminal sometimes it is useful to be able to print the original representation of the string
Definition: utils.cc:854