GNU Octave  4.2.1
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
oct-sort.cc
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2003-2017 David Bateman
4 Copyright (C) 2008-2009 Jaroslav Hajek
5 Copyright (C) 2009-2010 VZLU Prague
6 
7 This file is part of Octave.
8 
9 Octave is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13 
14 Octave is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with Octave; see the file COPYING. If not, see
21 <http://www.gnu.org/licenses/>.
22 
23 Code stolen in large part from Python's, listobject.c, which itself had
24 no license header. However, thanks to Tim Peters for the parts of the
25 code I ripped-off.
26 
27 As required in the Python license the short description of the changes
28 made are
29 
30 * convert the sorting code in listobject.cc into a generic class,
31  replacing PyObject* with the type of the class T.
32 
33 * replaced usages of malloc, free, memcpy and memmove by standard C++
34  new [], delete [] and std::copy and std::copy_backward. Note that replacing
35  memmove by std::copy is possible if the destination starts before the source.
36  If not, std::copy_backward needs to be used.
37 
38 * templatize comparison operator in most methods, provide possible dispatch
39 
40 * duplicate methods to avoid by-the-way indexed sorting
41 
42 * add methods for verifying sortedness of array
43 
44 * row sorting via breadth-first tree subsorting
45 
46 * binary lookup and sequential binary lookup optimized for dense downsampling.
47 
48 * NOTE: the memory management routines rely on the fact that delete [] silently
49  ignores null pointers. Don't gripe about the missing checks - they're there.
50 
51 
52 The Python license is
53 
54  PSF LICENSE AGREEMENT FOR PYTHON 2.3
55  --------------------------------------
56 
57  1. This LICENSE AGREEMENT is between the Python Software Foundation
58  ("PSF"), and the Individual or Organization ("Licensee") accessing and
59  otherwise using Python 2.3 software in source or binary form and its
60  associated documentation.
61 
62  2. Subject to the terms and conditions of this License Agreement, PSF
63  hereby grants Licensee a nonexclusive, royalty-free, world-wide
64  license to reproduce, analyze, test, perform and/or display publicly,
65  prepare derivative works, distribute, and otherwise use Python 2.3
66  alone or in any derivative version, provided, however, that PSF's
67  License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
68  2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are
69  retained in Python 2.3 alone or in any derivative version prepared by
70  Licensee.
71 
72  3. In the event Licensee prepares a derivative work that is based on
73  or incorporates Python 2.3 or any part thereof, and wants to make
74  the derivative work available to others as provided herein, then
75  Licensee hereby agrees to include in any such work a brief summary of
76  the changes made to Python 2.3.
77 
78  4. PSF is making Python 2.3 available to Licensee on an "AS IS"
79  basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
80  IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
81  DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
82  FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT
83  INFRINGE ANY THIRD PARTY RIGHTS.
84 
85  5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
86  2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
87  A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3,
88  OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
89 
90  6. This License Agreement will automatically terminate upon a material
91  breach of its terms and conditions.
92 
93  7. Nothing in this License Agreement shall be deemed to create any
94  relationship of agency, partnership, or joint venture between PSF and
95  Licensee. This License Agreement does not grant permission to use PSF
96  trademarks or trade name in a trademark sense to endorse or promote
97  products or services of Licensee, or any third party.
98 
99  8. By copying, installing or otherwise using Python 2.3, Licensee
100  agrees to be bound by the terms and conditions of this License
101  Agreement.
102 */
103 
104 // This file should not include config.h. It is only included in other
105 // C++ source files that should have included config.h before including
106 // this file.
107 
108 #include <cassert>
109 #include <algorithm>
110 #include <functional>
111 #include <cstring>
112 #include <stack>
113 
114 #include "lo-mappers.h"
115 #include "quit.h"
116 #include "oct-sort.h"
117 #include "oct-locbuf.h"
118 
119 template <typename T>
121  compare (ascending_compare), ms (0)
122 { }
123 
124 template <typename T>
125 octave_sort<T>::octave_sort (compare_fcn_type comp)
126  : compare (comp), ms (0)
127 { }
128 
129 template <typename T>
131 {
132  delete ms;
133 }
134 
135 template <typename T>
136 void
138 {
139  if (mode == ASCENDING)
140  compare = ascending_compare;
141  else if (mode == DESCENDING)
142  compare = descending_compare;
143  else
144  compare = 0;
145 }
146 
147 template <typename T>
148 template <typename Comp>
149 void
151  octave_idx_type start, Comp comp)
152 {
153  if (start == 0)
154  ++start;
155 
156  for (; start < nel; ++start)
157  {
158  /* set l to where *start belongs */
159  octave_idx_type l = 0;
161  T pivot = data[start];
162  /* Invariants:
163  * pivot >= all in [lo, l).
164  * pivot < all in [r, start).
165  * The second is vacuously true at the start.
166  */
167  do
168  {
169  octave_idx_type p = l + ((r - l) >> 1);
170  if (comp (pivot, data[p]))
171  r = p;
172  else
173  l = p+1;
174  }
175  while (l < r);
176  /* The invariants still hold, so pivot >= all in [lo, l) and
177  pivot < all in [l, start), so pivot belongs at l. Note
178  that if there are elements equal to pivot, l points to the
179  first slot after them -- that's why this sort is stable.
180  Slide over to make room.
181  Caution: using memmove is much slower under MSVC 5;
182  we're not usually moving many slots. */
183  // NOTE: using swap and going upwards appears to be faster.
184  for (octave_idx_type p = l; p < start; p++)
185  std::swap (pivot, data[p]);
186  data[start] = pivot;
187  }
188 
189  return;
190 }
191 
192 template <typename T>
193 template <typename Comp>
194 void
196  octave_idx_type start, Comp comp)
197 {
198  if (start == 0)
199  ++start;
200 
201  for (; start < nel; ++start)
202  {
203  /* set l to where *start belongs */
204  octave_idx_type l = 0;
206  T pivot = data[start];
207  /* Invariants:
208  * pivot >= all in [lo, l).
209  * pivot < all in [r, start).
210  * The second is vacuously true at the start.
211  */
212  do
213  {
214  octave_idx_type p = l + ((r - l) >> 1);
215  if (comp (pivot, data[p]))
216  r = p;
217  else
218  l = p+1;
219  }
220  while (l < r);
221  /* The invariants still hold, so pivot >= all in [lo, l) and
222  pivot < all in [l, start), so pivot belongs at l. Note
223  that if there are elements equal to pivot, l points to the
224  first slot after them -- that's why this sort is stable.
225  Slide over to make room.
226  Caution: using memmove is much slower under MSVC 5;
227  we're not usually moving many slots. */
228  // NOTE: using swap and going upwards appears to be faster.
229  for (octave_idx_type p = l; p < start; p++)
230  std::swap (pivot, data[p]);
231  data[start] = pivot;
232  octave_idx_type ipivot = idx[start];
233  for (octave_idx_type p = l; p < start; p++)
234  std::swap (ipivot, idx[p]);
235  idx[start] = ipivot;
236  }
237 
238  return;
239 }
240 
241 /*
242 Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi
243 is required on entry. "A run" is the longest ascending sequence, with
244 
245  lo[0] <= lo[1] <= lo[2] <= ...
246 
247 or the longest descending sequence, with
248 
249  lo[0] > lo[1] > lo[2] > ...
250 
251 DESCENDING is set to false in the former case, or to true in the latter.
252 For its intended use in a stable mergesort, the strictness of the defn of
253 "descending" is needed so that the caller can safely reverse a descending
254 sequence without violating stability (strict > ensures there are no equal
255 elements to get out of order).
256 
257 Returns -1 in case of error.
258 */
259 template <typename T>
260 template <typename Comp>
262 octave_sort<T>::count_run (T *lo, octave_idx_type nel, bool& descending,
263  Comp comp)
264 {
265  octave_idx_type n;
266  T *hi = lo + nel;
267 
268  descending = false;
269  ++lo;
270  if (lo == hi)
271  return 1;
272 
273  n = 2;
274 
275  if (comp (*lo, *(lo-1)))
276  {
277  descending = true;
278  for (lo = lo+1; lo < hi; ++lo, ++n)
279  {
280  if (comp (*lo, *(lo-1)))
281  ;
282  else
283  break;
284  }
285  }
286  else
287  {
288  for (lo = lo+1; lo < hi; ++lo, ++n)
289  {
290  if (comp (*lo, *(lo-1)))
291  break;
292  }
293  }
294 
295  return n;
296 }
297 
298 /*
299 Locate the proper position of key in a sorted vector; if the vector contains
300 an element equal to key, return the position immediately to the left of
301 the leftmost equal element. [gallop_right() does the same except returns
302 the position to the right of the rightmost equal element (if any).]
303 
304 "a" is a sorted vector with n elements, starting at a[0]. n must be > 0.
305 
306 "hint" is an index at which to begin the search, 0 <= hint < n. The closer
307 hint is to the final result, the faster this runs.
308 
309 The return value is the int k in 0..n such that
310 
311  a[k-1] < key <= a[k]
312 
313 pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW,
314 key belongs at index k; or, IOW, the first k elements of a should precede
315 key, and the last n-k should follow key.
316 
317 Returns -1 on error. See listsort.txt for info on the method.
318 */
319 template <typename T>
320 template <typename Comp>
323  octave_idx_type hint,
324  Comp comp)
325 {
326  octave_idx_type ofs;
327  octave_idx_type lastofs;
329 
330  a += hint;
331  lastofs = 0;
332  ofs = 1;
333  if (comp (*a, key))
334  {
335  /* a[hint] < key -- gallop right, until
336  * a[hint + lastofs] < key <= a[hint + ofs]
337  */
338  const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */
339  while (ofs < maxofs)
340  {
341  if (comp (a[ofs], key))
342  {
343  lastofs = ofs;
344  ofs = (ofs << 1) + 1;
345  if (ofs <= 0) /* int overflow */
346  ofs = maxofs;
347  }
348  else /* key <= a[hint + ofs] */
349  break;
350  }
351  if (ofs > maxofs)
352  ofs = maxofs;
353  /* Translate back to offsets relative to &a[0]. */
354  lastofs += hint;
355  ofs += hint;
356  }
357  else
358  {
359  /* key <= a[hint] -- gallop left, until
360  * a[hint - ofs] < key <= a[hint - lastofs]
361  */
362  const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */
363  while (ofs < maxofs)
364  {
365  if (comp (*(a-ofs), key))
366  break;
367  /* key <= a[hint - ofs] */
368  lastofs = ofs;
369  ofs = (ofs << 1) + 1;
370  if (ofs <= 0) /* int overflow */
371  ofs = maxofs;
372  }
373  if (ofs > maxofs)
374  ofs = maxofs;
375  /* Translate back to positive offsets relative to &a[0]. */
376  k = lastofs;
377  lastofs = hint - ofs;
378  ofs = hint - k;
379  }
380  a -= hint;
381 
382  /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
383  * right of lastofs but no farther right than ofs. Do a binary
384  * search, with invariant a[lastofs-1] < key <= a[ofs].
385  */
386  ++lastofs;
387  while (lastofs < ofs)
388  {
389  octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);
390 
391  if (comp (a[m], key))
392  lastofs = m+1; /* a[m] < key */
393  else
394  ofs = m; /* key <= a[m] */
395  }
396 
397  return ofs;
398 }
399 
400 /*
401 Exactly like gallop_left(), except that if key already exists in a[0:n],
402 finds the position immediately to the right of the rightmost equal value.
403 
404 The return value is the int k in 0..n such that
405 
406  a[k-1] <= key < a[k]
407 
408 or -1 if error.
409 
410 The code duplication is massive, but this is enough different given that
411 we're sticking to "<" comparisons that it's much harder to follow if
412 written as one routine with yet another "left or right?" flag.
413 */
414 template <typename T>
415 template <typename Comp>
418  octave_idx_type hint,
419  Comp comp)
420 {
421  octave_idx_type ofs;
422  octave_idx_type lastofs;
424 
425  a += hint;
426  lastofs = 0;
427  ofs = 1;
428  if (comp (key, *a))
429  {
430  /* key < a[hint] -- gallop left, until
431  * a[hint - ofs] <= key < a[hint - lastofs]
432  */
433  const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */
434  while (ofs < maxofs)
435  {
436  if (comp (key, *(a-ofs)))
437  {
438  lastofs = ofs;
439  ofs = (ofs << 1) + 1;
440  if (ofs <= 0) /* int overflow */
441  ofs = maxofs;
442  }
443  else /* a[hint - ofs] <= key */
444  break;
445  }
446  if (ofs > maxofs)
447  ofs = maxofs;
448  /* Translate back to positive offsets relative to &a[0]. */
449  k = lastofs;
450  lastofs = hint - ofs;
451  ofs = hint - k;
452  }
453  else
454  {
455  /* a[hint] <= key -- gallop right, until
456  * a[hint + lastofs] <= key < a[hint + ofs]
457  */
458  const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */
459  while (ofs < maxofs)
460  {
461  if (comp (key, a[ofs]))
462  break;
463  /* a[hint + ofs] <= key */
464  lastofs = ofs;
465  ofs = (ofs << 1) + 1;
466  if (ofs <= 0) /* int overflow */
467  ofs = maxofs;
468  }
469  if (ofs > maxofs)
470  ofs = maxofs;
471  /* Translate back to offsets relative to &a[0]. */
472  lastofs += hint;
473  ofs += hint;
474  }
475  a -= hint;
476 
477  /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the
478  * right of lastofs but no farther right than ofs. Do a binary
479  * search, with invariant a[lastofs-1] <= key < a[ofs].
480  */
481  ++lastofs;
482  while (lastofs < ofs)
483  {
484  octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);
485 
486  if (comp (key, a[m]))
487  ofs = m; /* key < a[m] */
488  else
489  lastofs = m+1; /* a[m] <= key */
490  }
491 
492  return ofs;
493 }
494 
495 static inline octave_idx_type
497 {
498  unsigned int nbits = 3;
499  octave_idx_type n2 = static_cast<octave_idx_type> (n) >> 8;
500 
501  /* Round up:
502  * If n < 256, to a multiple of 8.
503  * If n < 2048, to a multiple of 64.
504  * If n < 16384, to a multiple of 512.
505  * If n < 131072, to a multiple of 4096.
506  * If n < 1048576, to a multiple of 32768.
507  * If n < 8388608, to a multiple of 262144.
508  * If n < 67108864, to a multiple of 2097152.
509  * If n < 536870912, to a multiple of 16777216.
510  * ...
511  * If n < 2**(5+3*i), to a multiple of 2**(3*i).
512  *
513  * This over-allocates proportional to the list size, making room
514  * for additional growth. The over-allocation is mild, but is
515  * enough to give linear-time amortized behavior over a long
516  * sequence of appends() in the presence of a poorly-performing
517  * system realloc() (which is a reality, e.g., across all flavors
518  * of Windows, with Win9x behavior being particularly bad -- and
519  * we've still got address space fragmentation problems on Win9x
520  * even with this scheme, although it requires much longer lists to
521  * provoke them than it used to).
522  */
523  while (n2)
524  {
525  n2 >>= 3;
526  nbits += 3;
527  }
528 
529  return ((n >> nbits) + 1) << nbits;
530 }
531 
532 /* Ensure enough temp memory for 'need' array slots is available.
533  * Returns 0 on success and -1 if the memory can't be gotten.
534  */
535 template <typename T>
536 void
538 {
539  if (need <= alloced)
540  return;
541 
542  need = roundupsize (need);
543  /* Don't realloc! That can cost cycles to copy the old data, but
544  * we don't care what's in the block.
545  */
546  delete [] a;
547  delete [] ia; // Must do this or fool possible next getmemi.
548  a = new T [need];
549  alloced = need;
550 
551 }
552 
553 template <typename T>
554 void
556 {
557  if (ia && need <= alloced)
558  return;
559 
560  need = roundupsize (need);
561  /* Don't realloc! That can cost cycles to copy the old data, but
562  * we don't care what's in the block.
563  */
564  delete [] a;
565  delete [] ia;
566 
567  a = new T [need];
568  ia = new octave_idx_type [need];
569  alloced = need;
570 }
571 
572 /* Merge the na elements starting at pa with the nb elements starting at pb
573  * in a stable way, in-place. na and nb must be > 0, and pa + na == pb.
574  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
575  * merge, and should have na <= nb. See listsort.txt for more info.
576  * Return 0 if successful, -1 if error.
577  */
578 template <typename T>
579 template <typename Comp>
580 int
582  T *pb, octave_idx_type nb,
583  Comp comp)
584 {
586  T *dest;
587  int result = -1; /* guilty until proved innocent */
588  octave_idx_type min_gallop = ms->min_gallop;
589 
590  ms->getmem (na);
591 
592  std::copy (pa, pa + na, ms->a);
593  dest = pa;
594  pa = ms->a;
595 
596  *dest++ = *pb++;
597  --nb;
598  if (nb == 0)
599  goto Succeed;
600  if (na == 1)
601  goto CopyB;
602 
603  for (;;)
604  {
605  octave_idx_type acount = 0; /* # of times A won in a row */
606  octave_idx_type bcount = 0; /* # of times B won in a row */
607 
608  /* Do the straightforward thing until (if ever) one run
609  * appears to win consistently.
610  */
611  for (;;)
612  {
613 
614  // FIXME: these loops are candidates for further optimizations.
615  // Rather than testing everything in each cycle, it may be more
616  // efficient to do it in hunks.
617  if (comp (*pb, *pa))
618  {
619  *dest++ = *pb++;
620  ++bcount;
621  acount = 0;
622  --nb;
623  if (nb == 0)
624  goto Succeed;
625  if (bcount >= min_gallop)
626  break;
627  }
628  else
629  {
630  *dest++ = *pa++;
631  ++acount;
632  bcount = 0;
633  --na;
634  if (na == 1)
635  goto CopyB;
636  if (acount >= min_gallop)
637  break;
638  }
639  }
640 
641  /* One run is winning so consistently that galloping may
642  * be a huge win. So try that, and continue galloping until
643  * (if ever) neither run appears to be winning consistently
644  * anymore.
645  */
646  ++min_gallop;
647  do
648  {
649  min_gallop -= min_gallop > 1;
650  ms->min_gallop = min_gallop;
651  k = gallop_right (*pb, pa, na, 0, comp);
652  acount = k;
653  if (k)
654  {
655  if (k < 0)
656  goto Fail;
657  dest = std::copy (pa, pa + k, dest);
658  pa += k;
659  na -= k;
660  if (na == 1)
661  goto CopyB;
662  /* na==0 is impossible now if the comparison
663  * function is consistent, but we can't assume
664  * that it is.
665  */
666  if (na == 0)
667  goto Succeed;
668  }
669  *dest++ = *pb++;
670  --nb;
671  if (nb == 0)
672  goto Succeed;
673 
674  k = gallop_left (*pa, pb, nb, 0, comp);
675  bcount = k;
676  if (k)
677  {
678  if (k < 0)
679  goto Fail;
680  dest = std::copy (pb, pb + k, dest);
681  pb += k;
682  nb -= k;
683  if (nb == 0)
684  goto Succeed;
685  }
686  *dest++ = *pa++;
687  --na;
688  if (na == 1)
689  goto CopyB;
690  }
691  while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
692 
693  ++min_gallop; /* penalize it for leaving galloping mode */
694  ms->min_gallop = min_gallop;
695  }
696 
697 Succeed:
698  result = 0;
699 
700 Fail:
701  if (na)
702  std::copy (pa, pa + na, dest);
703  return result;
704 
705 CopyB:
706  /* The last element of pa belongs at the end of the merge. */
707  std::copy (pb, pb + nb, dest);
708  dest[nb] = *pa;
709 
710  return 0;
711 }
712 
713 template <typename T>
714 template <typename Comp>
715 int
717  T *pb, octave_idx_type *ipb, octave_idx_type nb,
718  Comp comp)
719 {
721  T *dest;
722  octave_idx_type *idest;
723  int result = -1; /* guilty until proved innocent */
724  octave_idx_type min_gallop = ms->min_gallop;
725 
726  ms->getmemi (na);
727 
728  std::copy (pa, pa + na, ms->a);
729  std::copy (ipa, ipa + na, ms->ia);
730  dest = pa; idest = ipa;
731  pa = ms->a; ipa = ms->ia;
732 
733  *dest++ = *pb++; *idest++ = *ipb++;
734  --nb;
735  if (nb == 0)
736  goto Succeed;
737  if (na == 1)
738  goto CopyB;
739 
740  for (;;)
741  {
742  octave_idx_type acount = 0; /* # of times A won in a row */
743  octave_idx_type bcount = 0; /* # of times B won in a row */
744 
745  /* Do the straightforward thing until (if ever) one run
746  * appears to win consistently.
747  */
748  for (;;)
749  {
750 
751  if (comp (*pb, *pa))
752  {
753  *dest++ = *pb++; *idest++ = *ipb++;
754  ++bcount;
755  acount = 0;
756  --nb;
757  if (nb == 0)
758  goto Succeed;
759  if (bcount >= min_gallop)
760  break;
761  }
762  else
763  {
764  *dest++ = *pa++; *idest++ = *ipa++;
765  ++acount;
766  bcount = 0;
767  --na;
768  if (na == 1)
769  goto CopyB;
770  if (acount >= min_gallop)
771  break;
772  }
773  }
774 
775  /* One run is winning so consistently that galloping may
776  * be a huge win. So try that, and continue galloping until
777  * (if ever) neither run appears to be winning consistently
778  * anymore.
779  */
780  ++min_gallop;
781  do
782  {
783  min_gallop -= min_gallop > 1;
784  ms->min_gallop = min_gallop;
785  k = gallop_right (*pb, pa, na, 0, comp);
786  acount = k;
787  if (k)
788  {
789  if (k < 0)
790  goto Fail;
791  dest = std::copy (pa, pa + k, dest);
792  idest = std::copy (ipa, ipa + k, idest);
793  pa += k; ipa += k;
794  na -= k;
795  if (na == 1)
796  goto CopyB;
797  /* na==0 is impossible now if the comparison
798  * function is consistent, but we can't assume
799  * that it is.
800  */
801  if (na == 0)
802  goto Succeed;
803  }
804  *dest++ = *pb++; *idest++ = *ipb++;
805  --nb;
806  if (nb == 0)
807  goto Succeed;
808 
809  k = gallop_left (*pa, pb, nb, 0, comp);
810  bcount = k;
811  if (k)
812  {
813  if (k < 0)
814  goto Fail;
815  dest = std::copy (pb, pb + k, dest);
816  idest = std::copy (ipb, ipb + k, idest);
817  pb += k; ipb += k;
818  nb -= k;
819  if (nb == 0)
820  goto Succeed;
821  }
822  *dest++ = *pa++; *idest++ = *ipa++;
823  --na;
824  if (na == 1)
825  goto CopyB;
826  }
827  while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
828 
829  ++min_gallop; /* penalize it for leaving galloping mode */
830  ms->min_gallop = min_gallop;
831  }
832 
833 Succeed:
834  result = 0;
835 
836 Fail:
837  if (na)
838  {
839  std::copy (pa, pa + na, dest);
840  std::copy (ipa, ipa + na, idest);
841  }
842  return result;
843 
844 CopyB:
845  /* The last element of pa belongs at the end of the merge. */
846  std::copy (pb, pb + nb, dest);
847  std::copy (ipb, ipb + nb, idest);
848  dest[nb] = *pa;
849  idest[nb] = *ipa;
850 
851  return 0;
852 }
853 
854 /* Merge the na elements starting at pa with the nb elements starting at pb
855  * in a stable way, in-place. na and nb must be > 0, and pa + na == pb.
856  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
857  * merge, and should have na >= nb. See listsort.txt for more info.
858  * Return 0 if successful, -1 if error.
859  */
860 template <typename T>
861 template <typename Comp>
862 int
864  T *pb, octave_idx_type nb,
865  Comp comp)
866 {
868  T *dest;
869  int result = -1; /* guilty until proved innocent */
870  T *basea, *baseb;
871  octave_idx_type min_gallop = ms->min_gallop;
872 
873  ms->getmem (nb);
874 
875  dest = pb + nb - 1;
876  std::copy (pb, pb + nb, ms->a);
877  basea = pa;
878  baseb = ms->a;
879  pb = ms->a + nb - 1;
880  pa += na - 1;
881 
882  *dest-- = *pa--;
883  --na;
884  if (na == 0)
885  goto Succeed;
886  if (nb == 1)
887  goto CopyA;
888 
889  for (;;)
890  {
891  octave_idx_type acount = 0; /* # of times A won in a row */
892  octave_idx_type bcount = 0; /* # of times B won in a row */
893 
894  /* Do the straightforward thing until (if ever) one run
895  * appears to win consistently.
896  */
897  for (;;)
898  {
899  if (comp (*pb, *pa))
900  {
901  *dest-- = *pa--;
902  ++acount;
903  bcount = 0;
904  --na;
905  if (na == 0)
906  goto Succeed;
907  if (acount >= min_gallop)
908  break;
909  }
910  else
911  {
912  *dest-- = *pb--;
913  ++bcount;
914  acount = 0;
915  --nb;
916  if (nb == 1)
917  goto CopyA;
918  if (bcount >= min_gallop)
919  break;
920  }
921  }
922 
923  /* One run is winning so consistently that galloping may
924  * be a huge win. So try that, and continue galloping until
925  * (if ever) neither run appears to be winning consistently
926  * anymore.
927  */
928  ++min_gallop;
929  do
930  {
931  min_gallop -= min_gallop > 1;
932  ms->min_gallop = min_gallop;
933  k = gallop_right (*pb, basea, na, na-1, comp);
934  if (k < 0)
935  goto Fail;
936  k = na - k;
937  acount = k;
938  if (k)
939  {
940  dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;
941  pa -= k;
942  na -= k;
943  if (na == 0)
944  goto Succeed;
945  }
946  *dest-- = *pb--;
947  --nb;
948  if (nb == 1)
949  goto CopyA;
950 
951  k = gallop_left (*pa, baseb, nb, nb-1, comp);
952  if (k < 0)
953  goto Fail;
954  k = nb - k;
955  bcount = k;
956  if (k)
957  {
958  dest -= k;
959  pb -= k;
960  std::copy (pb+1, pb+1 + k, dest+1);
961  nb -= k;
962  if (nb == 1)
963  goto CopyA;
964  /* nb==0 is impossible now if the comparison
965  * function is consistent, but we can't assume
966  * that it is.
967  */
968  if (nb == 0)
969  goto Succeed;
970  }
971  *dest-- = *pa--;
972  --na;
973  if (na == 0)
974  goto Succeed;
975  } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
976  ++min_gallop; /* penalize it for leaving galloping mode */
977  ms->min_gallop = min_gallop;
978  }
979 
980 Succeed:
981  result = 0;
982 
983 Fail:
984  if (nb)
985  std::copy (baseb, baseb + nb, dest-(nb-1));
986  return result;
987 
988 CopyA:
989  /* The first element of pb belongs at the front of the merge. */
990  dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;
991  pa -= na;
992  *dest = *pb;
993 
994  return 0;
995 }
996 
997 template <typename T>
998 template <typename Comp>
999 int
1001  T *pb, octave_idx_type *ipb, octave_idx_type nb,
1002  Comp comp)
1003 {
1005  T *dest;
1006  octave_idx_type *idest;
1007  int result = -1; /* guilty until proved innocent */
1008  T *basea, *baseb;
1009  octave_idx_type *ibaseb;
1010  octave_idx_type min_gallop = ms->min_gallop;
1011 
1012  ms->getmemi (nb);
1013 
1014  dest = pb + nb - 1;
1015  idest = ipb + nb - 1;
1016  std::copy (pb, pb + nb, ms->a);
1017  std::copy (ipb, ipb + nb, ms->ia);
1018  basea = pa;
1019  baseb = ms->a; ibaseb = ms->ia;
1020  pb = ms->a + nb - 1; ipb = ms->ia + nb - 1;
1021  pa += na - 1; ipa += na - 1;
1022 
1023  *dest-- = *pa--; *idest-- = *ipa--;
1024  --na;
1025  if (na == 0)
1026  goto Succeed;
1027  if (nb == 1)
1028  goto CopyA;
1029 
1030  for (;;)
1031  {
1032  octave_idx_type acount = 0; /* # of times A won in a row */
1033  octave_idx_type bcount = 0; /* # of times B won in a row */
1034 
1035  /* Do the straightforward thing until (if ever) one run
1036  * appears to win consistently.
1037  */
1038  for (;;)
1039  {
1040  if (comp (*pb, *pa))
1041  {
1042  *dest-- = *pa--; *idest-- = *ipa--;
1043  ++acount;
1044  bcount = 0;
1045  --na;
1046  if (na == 0)
1047  goto Succeed;
1048  if (acount >= min_gallop)
1049  break;
1050  }
1051  else
1052  {
1053  *dest-- = *pb--; *idest-- = *ipb--;
1054  ++bcount;
1055  acount = 0;
1056  --nb;
1057  if (nb == 1)
1058  goto CopyA;
1059  if (bcount >= min_gallop)
1060  break;
1061  }
1062  }
1063 
1064  /* One run is winning so consistently that galloping may
1065  * be a huge win. So try that, and continue galloping until
1066  * (if ever) neither run appears to be winning consistently
1067  * anymore.
1068  */
1069  ++min_gallop;
1070  do
1071  {
1072  min_gallop -= min_gallop > 1;
1073  ms->min_gallop = min_gallop;
1074  k = gallop_right (*pb, basea, na, na-1, comp);
1075  if (k < 0)
1076  goto Fail;
1077  k = na - k;
1078  acount = k;
1079  if (k)
1080  {
1081  dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;
1082  idest = std::copy_backward (ipa+1 - k, ipa+1, idest+1) - 1;
1083  pa -= k; ipa -= k;
1084  na -= k;
1085  if (na == 0)
1086  goto Succeed;
1087  }
1088  *dest-- = *pb--; *idest-- = *ipb--;
1089  --nb;
1090  if (nb == 1)
1091  goto CopyA;
1092 
1093  k = gallop_left (*pa, baseb, nb, nb-1, comp);
1094  if (k < 0)
1095  goto Fail;
1096  k = nb - k;
1097  bcount = k;
1098  if (k)
1099  {
1100  dest -= k; idest -= k;
1101  pb -= k; ipb -= k;
1102  std::copy (pb+1, pb+1 + k, dest+1);
1103  std::copy (ipb+1, ipb+1 + k, idest+1);
1104  nb -= k;
1105  if (nb == 1)
1106  goto CopyA;
1107  /* nb==0 is impossible now if the comparison
1108  * function is consistent, but we can't assume
1109  * that it is.
1110  */
1111  if (nb == 0)
1112  goto Succeed;
1113  }
1114  *dest-- = *pa--; *idest-- = *ipa--;
1115  --na;
1116  if (na == 0)
1117  goto Succeed;
1118  } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
1119  ++min_gallop; /* penalize it for leaving galloping mode */
1120  ms->min_gallop = min_gallop;
1121  }
1122 
1123 Succeed:
1124  result = 0;
1125 
1126 Fail:
1127  if (nb)
1128  {
1129  std::copy (baseb, baseb + nb, dest-(nb-1));
1130  std::copy (ibaseb, ibaseb + nb, idest-(nb-1));
1131  }
1132  return result;
1133 
1134 CopyA:
1135  /* The first element of pb belongs at the front of the merge. */
1136  dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;
1137  idest = std::copy_backward (ipa+1 - na, ipa+1, idest+1) - 1;
1138  pa -= na; ipa -= na;
1139  *dest = *pb; *idest = *ipb;
1140 
1141  return 0;
1142 }
1143 
1144 /* Merge the two runs at stack indices i and i+1.
1145  * Returns 0 on success, -1 on error.
1146  */
1147 template <typename T>
1148 template <typename Comp>
1149 int
1151  Comp comp)
1152 {
1153  T *pa, *pb;
1154  octave_idx_type na, nb;
1156 
1157  pa = data + ms->pending[i].base;
1158  na = ms->pending[i].len;
1159  pb = data + ms->pending[i+1].base;
1160  nb = ms->pending[i+1].len;
1161 
1162  /* Record the length of the combined runs; if i is the 3rd-last
1163  * run now, also slide over the last run (which isn't involved
1164  * in this merge). The current run i+1 goes away in any case.
1165  */
1166  ms->pending[i].len = na + nb;
1167  if (i == ms->n - 3)
1168  ms->pending[i+1] = ms->pending[i+2];
1169  ms->n--;
1170 
1171  /* Where does b start in a? Elements in a before that can be
1172  * ignored (already in place).
1173  */
1174  k = gallop_right (*pb, pa, na, 0, comp);
1175  if (k < 0)
1176  return -1;
1177  pa += k;
1178  na -= k;
1179  if (na == 0)
1180  return 0;
1181 
1182  /* Where does a end in b? Elements in b after that can be
1183  * ignored (already in place).
1184  */
1185  nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);
1186  if (nb <= 0)
1187  return nb;
1188 
1189  /* Merge what remains of the runs, using a temp array with
1190  * min (na, nb) elements.
1191  */
1192  if (na <= nb)
1193  return merge_lo (pa, na, pb, nb, comp);
1194  else
1195  return merge_hi (pa, na, pb, nb, comp);
1196 }
1197 
1198 template <typename T>
1199 template <typename Comp>
1200 int
1202  Comp comp)
1203 {
1204  T *pa, *pb;
1205  octave_idx_type *ipa, *ipb;
1206  octave_idx_type na, nb;
1208 
1209  pa = data + ms->pending[i].base;
1210  ipa = idx + ms->pending[i].base;
1211  na = ms->pending[i].len;
1212  pb = data + ms->pending[i+1].base;
1213  ipb = idx + ms->pending[i+1].base;
1214  nb = ms->pending[i+1].len;
1215 
1216  /* Record the length of the combined runs; if i is the 3rd-last
1217  * run now, also slide over the last run (which isn't involved
1218  * in this merge). The current run i+1 goes away in any case.
1219  */
1220  ms->pending[i].len = na + nb;
1221  if (i == ms->n - 3)
1222  ms->pending[i+1] = ms->pending[i+2];
1223  ms->n--;
1224 
1225  /* Where does b start in a? Elements in a before that can be
1226  * ignored (already in place).
1227  */
1228  k = gallop_right (*pb, pa, na, 0, comp);
1229  if (k < 0)
1230  return -1;
1231  pa += k; ipa += k;
1232  na -= k;
1233  if (na == 0)
1234  return 0;
1235 
1236  /* Where does a end in b? Elements in b after that can be
1237  * ignored (already in place).
1238  */
1239  nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);
1240  if (nb <= 0)
1241  return nb;
1242 
1243  /* Merge what remains of the runs, using a temp array with
1244  * min (na, nb) elements.
1245  */
1246  if (na <= nb)
1247  return merge_lo (pa, ipa, na, pb, ipb, nb, comp);
1248  else
1249  return merge_hi (pa, ipa, na, pb, ipb, nb, comp);
1250 }
1251 
1252 /* Examine the stack of runs waiting to be merged, merging adjacent runs
1253  * until the stack invariants are re-established:
1254  *
1255  * 1. len[-3] > len[-2] + len[-1]
1256  * 2. len[-2] > len[-1]
1257  *
1258  * See listsort.txt for more info.
1259  *
1260  * Returns 0 on success, -1 on error.
1261  */
1262 template <typename T>
1263 template <typename Comp>
1264 int
1266 {
1267  struct s_slice *p = ms->pending;
1268 
1269  while (ms->n > 1)
1270  {
1271  octave_idx_type n = ms->n - 2;
1272  if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len)
1273  {
1274  if (p[n-1].len < p[n+1].len)
1275  --n;
1276  if (merge_at (n, data, comp) < 0)
1277  return -1;
1278  }
1279  else if (p[n].len <= p[n+1].len)
1280  {
1281  if (merge_at (n, data, comp) < 0)
1282  return -1;
1283  }
1284  else
1285  break;
1286  }
1287 
1288  return 0;
1289 }
1290 
1291 template <typename T>
1292 template <typename Comp>
1293 int
1295 {
1296  struct s_slice *p = ms->pending;
1297 
1298  while (ms->n > 1)
1299  {
1300  octave_idx_type n = ms->n - 2;
1301  if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len)
1302  {
1303  if (p[n-1].len < p[n+1].len)
1304  --n;
1305  if (merge_at (n, data, idx, comp) < 0)
1306  return -1;
1307  }
1308  else if (p[n].len <= p[n+1].len)
1309  {
1310  if (merge_at (n, data, idx, comp) < 0)
1311  return -1;
1312  }
1313  else
1314  break;
1315  }
1316 
1317  return 0;
1318 }
1319 
1320 /* Regardless of invariants, merge all runs on the stack until only one
1321  * remains. This is used at the end of the mergesort.
1322  *
1323  * Returns 0 on success, -1 on error.
1324  */
1325 template <typename T>
1326 template <typename Comp>
1327 int
1329 {
1330  struct s_slice *p = ms->pending;
1331 
1332  while (ms->n > 1)
1333  {
1334  octave_idx_type n = ms->n - 2;
1335  if (n > 0 && p[n-1].len < p[n+1].len)
1336  --n;
1337  if (merge_at (n, data, comp) < 0)
1338  return -1;
1339  }
1340 
1341  return 0;
1342 }
1343 
1344 template <typename T>
1345 template <typename Comp>
1346 int
1348 {
1349  struct s_slice *p = ms->pending;
1350 
1351  while (ms->n > 1)
1352  {
1353  octave_idx_type n = ms->n - 2;
1354  if (n > 0 && p[n-1].len < p[n+1].len)
1355  --n;
1356  if (merge_at (n, data, idx, comp) < 0)
1357  return -1;
1358  }
1359 
1360  return 0;
1361 }
1362 
1363 /* Compute a good value for the minimum run length; natural runs shorter
1364  * than this are boosted artificially via binary insertion.
1365  *
1366  * If n < 64, return n (it's too small to bother with fancy stuff).
1367  * Else if n is an exact power of 2, return 32.
1368  * Else return an int k, 32 <= k <= 64, such that n/k is close to, but
1369  * strictly less than, an exact power of 2.
1370  *
1371  * See listsort.txt for more info.
1372  */
1373 template <typename T>
1376 {
1377  octave_idx_type r = 0; /* becomes 1 if any 1 bits are shifted off */
1378 
1379  while (n >= 64)
1380  {
1381  r |= n & 1;
1382  n >>= 1;
1383  }
1384 
1385  return n + r;
1386 }
1387 
1388 template <typename T>
1389 template <typename Comp>
1390 void
1391 octave_sort<T>::sort (T *data, octave_idx_type nel, Comp comp)
1392 {
1393  /* Re-initialize the Mergestate as this might be the second time called */
1394  if (! ms) ms = new MergeState;
1395 
1396  ms->reset ();
1397  ms->getmem (1024);
1398 
1399  if (nel > 1)
1400  {
1401  octave_idx_type nremaining = nel;
1402  octave_idx_type lo = 0;
1403 
1404  /* March over the array once, left to right, finding natural runs,
1405  * and extending short natural runs to minrun elements.
1406  */
1407  octave_idx_type minrun = merge_compute_minrun (nremaining);
1408  do
1409  {
1410  bool descending;
1411  octave_idx_type n;
1412 
1413  /* Identify next run. */
1414  n = count_run (data + lo, nremaining, descending, comp);
1415  if (n < 0)
1416  return;
1417  if (descending)
1418  std::reverse (data + lo, data + lo + n);
1419  /* If short, extend to min (minrun, nremaining). */
1420  if (n < minrun)
1421  {
1422  const octave_idx_type force = nremaining <= minrun ? nremaining
1423  : minrun;
1424  binarysort (data + lo, force, n, comp);
1425  n = force;
1426  }
1427  /* Push run onto pending-runs stack, and maybe merge. */
1428  assert (ms->n < MAX_MERGE_PENDING);
1429  ms->pending[ms->n].base = lo;
1430  ms->pending[ms->n].len = n;
1431  ms->n++;
1432  if (merge_collapse (data, comp) < 0)
1433  return;
1434  /* Advance to find next run. */
1435  lo += n;
1436  nremaining -= n;
1437  }
1438  while (nremaining);
1439 
1440  merge_force_collapse (data, comp);
1441  }
1442 }
1443 
1444 template <typename T>
1445 template <typename Comp>
1446 void
1448  Comp comp)
1449 {
1450  /* Re-initialize the Mergestate as this might be the second time called */
1451  if (! ms) ms = new MergeState;
1452 
1453  ms->reset ();
1454  ms->getmemi (1024);
1455 
1456  if (nel > 1)
1457  {
1458  octave_idx_type nremaining = nel;
1459  octave_idx_type lo = 0;
1460 
1461  /* March over the array once, left to right, finding natural runs,
1462  * and extending short natural runs to minrun elements.
1463  */
1464  octave_idx_type minrun = merge_compute_minrun (nremaining);
1465  do
1466  {
1467  bool descending;
1468  octave_idx_type n;
1469 
1470  /* Identify next run. */
1471  n = count_run (data + lo, nremaining, descending, comp);
1472  if (n < 0)
1473  return;
1474  if (descending)
1475  {
1476  std::reverse (data + lo, data + lo + n);
1477  std::reverse (idx + lo, idx + lo + n);
1478  }
1479  /* If short, extend to min (minrun, nremaining). */
1480  if (n < minrun)
1481  {
1482  const octave_idx_type force = nremaining <= minrun ? nremaining
1483  : minrun;
1484  binarysort (data + lo, idx + lo, force, n, comp);
1485  n = force;
1486  }
1487  /* Push run onto pending-runs stack, and maybe merge. */
1488  assert (ms->n < MAX_MERGE_PENDING);
1489  ms->pending[ms->n].base = lo;
1490  ms->pending[ms->n].len = n;
1491  ms->n++;
1492  if (merge_collapse (data, idx, comp) < 0)
1493  return;
1494  /* Advance to find next run. */
1495  lo += n;
1496  nremaining -= n;
1497  }
1498  while (nremaining);
1499 
1500  merge_force_collapse (data, idx, comp);
1501  }
1502 }
1503 
1504 template <typename T>
1505 void
1507 {
1508 #if defined (INLINE_ASCENDING_SORT)
1509  if (compare == ascending_compare)
1510  sort (data, nel, std::less<T> ());
1511  else
1512 #endif
1513 #if defined (INLINE_DESCENDING_SORT)
1514  if (compare == descending_compare)
1515  sort (data, nel, std::greater<T> ());
1516  else
1517 #endif
1518  if (compare)
1519  sort (data, nel, compare);
1520 }
1521 
1522 template <typename T>
1523 void
1525 {
1526 #if defined (INLINE_ASCENDING_SORT)
1527  if (compare == ascending_compare)
1528  sort (data, idx, nel, std::less<T> ());
1529  else
1530 #endif
1531 #if defined (INLINE_DESCENDING_SORT)
1532  if (compare == descending_compare)
1533  sort (data, idx, nel, std::greater<T> ());
1534  else
1535 #endif
1536  if (compare)
1537  sort (data, idx, nel, compare);
1538 }
1539 
1540 template <typename T>
1541 template <typename Comp>
1542 bool
1543 octave_sort<T>::is_sorted (const T *data, octave_idx_type nel, Comp comp)
1544 {
1545  const T *end = data + nel;
1546  if (data != end)
1547  {
1548  const T *next = data;
1549  while (++next != end)
1550  {
1551  if (comp (*next, *data))
1552  break;
1553  data = next;
1554  }
1555  data = next;
1556  }
1557 
1558  return data == end;
1559 }
1560 
1561 template <typename T>
1562 bool
1564 {
1565  bool retval = false;
1566 #if defined (INLINE_ASCENDING_SORT)
1567  if (compare == ascending_compare)
1568  retval = is_sorted (data, nel, std::less<T> ());
1569  else
1570 #endif
1571 #if defined (INLINE_DESCENDING_SORT)
1572  if (compare == descending_compare)
1573  retval = is_sorted (data, nel, std::greater<T> ());
1574  else
1575 #endif
1576  if (compare)
1577  retval = is_sorted (data, nel, compare);
1578 
1579  return retval;
1580 }
1581 
1582 // FIXME: is there really no way to make this local to the following function?
1584 {
1586  : col (c), ofs (o), nel (n) { }
1588 };
1589 
1590 template <typename T>
1591 template <typename Comp>
1592 void
1594  octave_idx_type rows, octave_idx_type cols,
1595  Comp comp)
1596 {
1597  OCTAVE_LOCAL_BUFFER (T, buf, rows);
1598  for (octave_idx_type i = 0; i < rows; i++)
1599  idx[i] = i;
1600 
1601  if (cols == 0 || rows <= 1)
1602  return;
1603 
1604  // This is a breadth-first traversal.
1605  typedef sortrows_run_t run_t;
1606  std::stack<run_t> runs;
1607 
1608  runs.push (run_t (0, 0, rows));
1609 
1610  while (! runs.empty ())
1611  {
1612  octave_idx_type col = runs.top ().col;
1613  octave_idx_type ofs = runs.top ().ofs;
1614  octave_idx_type nel = runs.top ().nel;
1615  runs.pop ();
1616  assert (nel > 1);
1617 
1618  T *lbuf = buf + ofs;
1619  const T *ldata = data + rows*col;
1620  octave_idx_type *lidx = idx + ofs;
1621 
1622  // Gather.
1623  for (octave_idx_type i = 0; i < nel; i++)
1624  lbuf[i] = ldata[lidx[i]];
1625 
1626  // Sort.
1627  sort (lbuf, lidx, nel, comp);
1628 
1629  // Identify constant runs and schedule subsorts.
1630  if (col < cols-1)
1631  {
1632  octave_idx_type lst = 0;
1633  for (octave_idx_type i = 0; i < nel; i++)
1634  {
1635  if (comp (lbuf[lst], lbuf[i]))
1636  {
1637  if (i > lst + 1)
1638  runs.push (run_t (col+1, ofs + lst, i - lst));
1639  lst = i;
1640  }
1641  }
1642  if (nel > lst + 1)
1643  runs.push (run_t (col+1, ofs + lst, nel - lst));
1644  }
1645  }
1646 }
1647 
1648 template <typename T>
1649 void
1651  octave_idx_type rows, octave_idx_type cols)
1652 {
1653 #if defined (INLINE_ASCENDING_SORT)
1654  if (compare == ascending_compare)
1655  sort_rows (data, idx, rows, cols, std::less<T> ());
1656  else
1657 #endif
1658 #if defined (INLINE_DESCENDING_SORT)
1659  if (compare == descending_compare)
1660  sort_rows (data, idx, rows, cols, std::greater<T> ());
1661  else
1662 #endif
1663  if (compare)
1664  sort_rows (data, idx, rows, cols, compare);
1665 }
1666 
1667 template <typename T>
1668 template <typename Comp>
1669 bool
1671  octave_idx_type cols, Comp comp)
1672 {
1673  if (rows <= 1 || cols == 0)
1674  return true;
1675 
1676  // This is a breadth-first traversal.
1677  const T *lastrow = data + rows*(cols - 1);
1678  typedef std::pair<const T *, octave_idx_type> run_t;
1679  std::stack<run_t> runs;
1680 
1681  bool sorted = true;
1682  runs.push (run_t (data, rows));
1683  while (sorted && ! runs.empty ())
1684  {
1685  const T *lo = runs.top ().first;
1686  octave_idx_type n = runs.top ().second;
1687  runs.pop ();
1688  if (lo < lastrow)
1689  {
1690  // Not the final column.
1691  assert (n > 1);
1692  const T *hi = lo + n;
1693  const T *lst = lo;
1694  for (lo++; lo < hi; lo++)
1695  {
1696  if (comp (*lst, *lo))
1697  {
1698  if (lo > lst + 1)
1699  runs.push (run_t (lst + rows, lo - lst));
1700  lst = lo;
1701  }
1702  else if (comp (*lo, *lst))
1703  break;
1704 
1705  }
1706  if (lo == hi)
1707  {
1708  if (lo > lst + 1)
1709  runs.push (run_t (lst + rows, lo - lst));
1710  }
1711  else
1712  {
1713  sorted = false;
1714  break;
1715  }
1716  }
1717  else
1718  // The final column - use fast code.
1719  sorted = is_sorted (lo, n, comp);
1720  }
1721 
1722  return sorted;
1723 }
1724 
1725 template <typename T>
1726 bool
1728  octave_idx_type cols)
1729 {
1730  bool retval = false;
1731 
1732 #if defined (INLINE_ASCENDING_SORT)
1733  if (compare == ascending_compare)
1734  retval = is_sorted_rows (data, rows, cols, std::less<T> ());
1735  else
1736 #endif
1737 #if defined (INLINE_DESCENDING_SORT)
1738  if (compare == descending_compare)
1739  retval = is_sorted_rows (data, rows, cols, std::greater<T> ());
1740  else
1741 #endif
1742  if (compare)
1743  retval = is_sorted_rows (data, rows, cols, compare);
1744 
1745  return retval;
1746 }
1747 
1748 // The simple binary lookup.
1749 
1750 template <typename T>
1751 template <typename Comp>
1754  const T& value, Comp comp)
1755 {
1756  octave_idx_type lo = 0;
1757  octave_idx_type hi = nel;
1758 
1759  while (lo < hi)
1760  {
1761  octave_idx_type mid = lo + ((hi-lo) >> 1);
1762  if (comp (value, data[mid]))
1763  hi = mid;
1764  else
1765  lo = mid + 1;
1766  }
1767 
1768  return lo;
1769 }
1770 
1771 template <typename T>
1774  const T& value)
1775 {
1776  octave_idx_type retval = 0;
1777 
1778 #if defined (INLINE_ASCENDING_SORT)
1779  if (compare == ascending_compare)
1780  retval = lookup (data, nel, value, std::less<T> ());
1781  else
1782 #endif
1783 #if defined (INLINE_DESCENDING_SORT)
1784  if (compare == descending_compare)
1785  retval = lookup (data, nel, value, std::greater<T> ());
1786  else
1787 #endif
1788  if (compare)
1789  retval = lookup (data, nel, value, std::ptr_fun (compare));
1790 
1791  return retval;
1792 }
1793 
1794 template <typename T>
1795 template <typename Comp>
1796 void
1798  const T *values, octave_idx_type nvalues,
1799  octave_idx_type *idx, Comp comp)
1800 {
1801  // Use a sequence of binary lookups.
1802  // FIXME: Can this be sped up generally? The sorted merge case is dealt with
1803  // elsewhere.
1804  for (octave_idx_type j = 0; j < nvalues; j++)
1805  idx[j] = lookup (data, nel, values[j], comp);
1806 }
1807 
1808 template <typename T>
1809 void
1811  const T* values, octave_idx_type nvalues,
1812  octave_idx_type *idx)
1813 {
1814 #if defined (INLINE_ASCENDING_SORT)
1815  if (compare == ascending_compare)
1816  lookup (data, nel, values, nvalues, idx, std::less<T> ());
1817  else
1818 #endif
1819 #if defined (INLINE_DESCENDING_SORT)
1820  if (compare == descending_compare)
1821  lookup (data, nel, values, nvalues, idx, std::greater<T> ());
1822  else
1823 #endif
1824  if (compare)
1825  lookup (data, nel, values, nvalues, idx, std::ptr_fun (compare));
1826 }
1827 
1828 template <typename T>
1829 template <typename Comp>
1830 void
1832  const T *values, octave_idx_type nvalues,
1833  octave_idx_type *idx, bool rev, Comp comp)
1834 {
1835  if (rev)
1836  {
1837  octave_idx_type i = 0;
1838  octave_idx_type j = nvalues - 1;
1839 
1840  if (nvalues > 0 && nel > 0)
1841  {
1842  while (true)
1843  {
1844  if (comp (values[j], data[i]))
1845  {
1846  idx[j] = i;
1847  if (--j < 0)
1848  break;
1849  }
1850  else if (++i == nel)
1851  break;
1852  }
1853  }
1854 
1855  for (; j >= 0; j--)
1856  idx[j] = i;
1857  }
1858  else
1859  {
1860  octave_idx_type i = 0;
1861  octave_idx_type j = 0;
1862 
1863  if (nvalues > 0 && nel > 0)
1864  {
1865  while (true)
1866  {
1867  if (comp (values[j], data[i]))
1868  {
1869  idx[j] = i;
1870  if (++j == nvalues)
1871  break;
1872  }
1873  else if (++i == nel)
1874  break;
1875  }
1876  }
1877 
1878  for (; j != nvalues; j++)
1879  idx[j] = i;
1880  }
1881 }
1882 
1883 template <typename T>
1884 void
1886  const T* values, octave_idx_type nvalues,
1887  octave_idx_type *idx, bool rev)
1888 {
1889 #if defined (INLINE_ASCENDING_SORT)
1890  if (compare == ascending_compare)
1891  lookup_sorted (data, nel, values, nvalues, idx, rev, std::less<T> ());
1892  else
1893 #endif
1894 #if defined (INLINE_DESCENDING_SORT)
1895  if (compare == descending_compare)
1896  lookup_sorted (data, nel, values, nvalues, idx, rev, std::greater<T> ());
1897  else
1898 #endif
1899  if (compare)
1900  lookup_sorted (data, nel, values, nvalues, idx, rev,
1901  std::ptr_fun (compare));
1902 }
1903 
1904 template <typename T>
1905 template <typename Comp>
1906 void
1909  Comp comp)
1910 {
1911  // Simply wrap the STL algorithms.
1912  // FIXME: this will fail if we attempt to inline <,> for Complex.
1913  if (up == lo+1)
1914  std::nth_element (data, data + lo, data + nel, comp);
1915  else if (lo == 0)
1916  std::partial_sort (data, data + up, data + nel, comp);
1917  else
1918  {
1919  std::nth_element (data, data + lo, data + nel, comp);
1920  if (up == lo + 2)
1921  {
1922  // Finding two subsequent elements.
1923  std::swap (data[lo+1],
1924  *std::min_element (data + lo + 1, data + nel, comp));
1925  }
1926  else
1927  std::partial_sort (data + lo + 1, data + up, data + nel, comp);
1928  }
1929 }
1930 
1931 template <typename T>
1932 void
1935 {
1936  if (up < 0)
1937  up = lo + 1;
1938 #if defined (INLINE_ASCENDING_SORT)
1939  if (compare == ascending_compare)
1940  nth_element (data, nel, lo, up, std::less<T> ());
1941  else
1942 #endif
1943 #if defined (INLINE_DESCENDING_SORT)
1944  if (compare == descending_compare)
1945  nth_element (data, nel, lo, up, std::greater<T> ());
1946  else
1947 #endif
1948  if (compare)
1949  nth_element (data, nel, lo, up, std::ptr_fun (compare));
1950 }
1951 
1952 template <typename T>
1953 bool
1955  typename ref_param<T>::type y)
1956 {
1957  return x < y;
1958 }
1959 
1960 template <typename T>
1961 bool
1963  typename ref_param<T>::type y)
1964 {
1965  return x > y;
1966 }
int merge_at(octave_idx_type i, T *data, Comp comp)
Definition: oct-sort.cc:1150
MergeState * ms
Definition: oct-sort.h:233
octave_idx_type len
Definition: oct-sort.h:182
octave_idx_type * ia
Definition: oct-sort.h:210
void sort_rows(const T *data, octave_idx_type *idx, octave_idx_type rows, octave_idx_type cols)
Definition: oct-sort.cc:1650
octave_idx_type lookup(const T *data, octave_idx_type nel, const T &value)
Definition: oct-sort.cc:1773
octave_idx_type gallop_left(T key, T *a, octave_idx_type n, octave_idx_type hint, Comp comp)
Definition: oct-sort.cc:322
octave_idx_type ofs
Definition: oct-sort.cc:1587
sortmode
Definition: oct-sort.h:105
Return the CPU time used by your Octave session The first output is the total time spent executing your process and is equal to the sum of second and third which are the number of CPU seconds spent executing in user mode and the number of CPU seconds spent executing in system mode
Definition: data.cc:6386
octave_idx_type base
Definition: oct-sort.h:182
void set_compare(compare_fcn_type comp)
Definition: oct-sort.h:122
for large enough k
Definition: lu.cc:606
sortrows_run_t(octave_idx_type c, octave_idx_type o, octave_idx_type n)
Definition: oct-sort.cc:1585
void binarysort(T *data, octave_idx_type nel, octave_idx_type start, Comp comp)
Definition: oct-sort.cc:150
octave_idx_type count_run(T *lo, octave_idx_type n, bool &descending, Comp comp)
Definition: oct-sort.cc:262
octave_idx_type alloced
Definition: oct-sort.h:211
cell array If invoked with two or more scalar integer or a vector of integer values
Definition: ov-cell.cc:1205
calling an anonymous function involves an overhead quite comparable to the overhead of an m file function Passing a handle to a built in function is because the interpreter is not involved in the internal loop For a
Definition: cellfun.cc:398
bool swap
Definition: load-save.cc:725
int merge_hi(T *pa, octave_idx_type na, T *pb, octave_idx_type nb, Comp comp)
Definition: oct-sort.cc:863
static uint32_t * next
Definition: randmtzig.cc:183
void getmemi(octave_idx_type need)
Definition: oct-sort.cc:555
nd deftypefn *octave_map m
Definition: ov-struct.cc:2058
compare_fcn_type compare
Definition: oct-sort.h:231
octave_value retval
Definition: data.cc:6294
static octave_idx_type roundupsize(octave_idx_type n)
Definition: oct-sort.cc:496
static bool descending_compare(typename ref_param< T >::type, typename ref_param< T >::type)
Definition: oct-sort.cc:1962
the sparsity preserving column transformation such that that defines the pivoting threshold can be given in which case it defines the c
Definition: lu.cc:138
#define MAX_MERGE_PENDING
Definition: oct-sort.h:95
bool is_sorted_rows(const T *data, octave_idx_type rows, octave_idx_type cols)
Definition: oct-sort.cc:1727
bool is_sorted(const T *data, octave_idx_type nel)
Definition: oct-sort.cc:1563
With real return the complex result
Definition: data.cc:3375
if_then_else< is_class_type< T >::no, T, T const & >::result type
Definition: lo-traits.h:118
octave_idx_type gallop_right(T key, T *a, octave_idx_type n, octave_idx_type hint, Comp comp)
Definition: oct-sort.cc:417
void sort(T *data, octave_idx_type nel)
Definition: oct-sort.cc:1506
octave::sys::time start
Definition: graphics.cc:11731
int merge_force_collapse(T *data, Comp comp)
Definition: oct-sort.cc:1328
=val(i)}if ode{val(i)}occurs in table i
Definition: lookup.cc:239
void lookup_sorted(const T *data, octave_idx_type nel, const T *values, octave_idx_type nvalues, octave_idx_type *idx, bool rev=false)
Definition: oct-sort.cc:1885
p
Definition: lu.cc:138
OCTAVE_EXPORT octave_value_list or N dimensional array whose elements are all equal to the IEEE symbol zero divided by nd tex zero divided by nd ifnottex and any operation involving another NaN value(5+NaN).Note that NaN always compares not equal to NaN(NaN!
octave_sort(void)
Definition: oct-sort.cc:120
void getmem(octave_idx_type need)
Definition: oct-sort.cc:537
#define MIN_GALLOP
Definition: oct-sort.h:99
the element is set to zero In other the statement xample y
Definition: data.cc:5342
#define OCTAVE_LOCAL_BUFFER(T, buf, size)
Definition: oct-locbuf.h:200
~octave_sort(void)
Definition: oct-sort.cc:130
struct s_slice pending[85]
Definition: oct-sort.h:222
void nth_element(T *data, octave_idx_type nel, octave_idx_type lo, octave_idx_type up=-1)
Definition: oct-sort.cc:1933
octave_idx_type n
Definition: oct-sort.h:221
int merge_collapse(T *data, Comp comp)
Definition: oct-sort.cc:1265
octave_idx_type merge_compute_minrun(octave_idx_type n)
Definition: oct-sort.cc:1375
octave_idx_type min_gallop
Definition: oct-sort.h:205
F77_RET_T F77_REAL &F77_RET_T F77_DBLE &F77_RET_T F77_REAL &F77_RET_T F77_DBLE &F77_RET_T F77_REAL &F77_RET_T F77_DBLE &F77_RET_T const F77_REAL const F77_REAL F77_REAL &F77_RET_T const F77_DBLE const F77_DBLE F77_DBLE &F77_RET_T F77_REAL &F77_RET_T F77_DBLE &F77_RET_T F77_DBLE &F77_RET_T F77_REAL &F77_RET_T F77_REAL &F77_RET_T F77_DBLE &F77_RET_T const F77_DBLE F77_DBLE &F77_RET_T const F77_REAL F77_REAL &F77_RET_T F77_REAL F77_REAL &F77_RET_T F77_DBLE F77_DBLE &F77_RET_T const F77_DBLE * x
static bool ascending_compare(typename ref_param< T >::type, typename ref_param< T >::type)
Definition: oct-sort.cc:1954
int merge_lo(T *pa, octave_idx_type na, T *pb, octave_idx_type nb, Comp comp)
Definition: oct-sort.cc:581