oct-sort.cc

Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (C) 2003-2012 David Bateman
00004 Copyright (C) 2008-2009 Jaroslav Hajek
00005 Copyright (C) 2009-2010 VZLU Prague
00006 
00007 This file is part of Octave.
00008 
00009 Octave is free software; you can redistribute it and/or modify it
00010 under the terms of the GNU General Public License as published by the
00011 Free Software Foundation; either version 3 of the License, or (at your
00012 option) any later version.
00013 
00014 Octave is distributed in the hope that it will be useful, but WITHOUT
00015 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00016 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
00017 for more details.
00018 
00019 You should have received a copy of the GNU General Public License
00020 along with Octave; see the file COPYING.  If not, see
00021 <http://www.gnu.org/licenses/>.
00022 
00023 Code stolen in large part from Python's, listobject.c, which itself had
00024 no license header. However, thanks to Tim Peters for the parts of the
00025 code I ripped-off.
00026 
00027 As required in the Python license the short description of the changes
00028 made are
00029 
00030 * convert the sorting code in listobject.cc into a generic class,
00031   replacing PyObject* with the type of the class T.
00032 
00033 * replaced usages of malloc, free, memcpy and memmove by standard C++
00034   new [], delete [] and std::copy and std::copy_backward. Note that replacing
00035   memmove by std::copy is possible if the destination starts before the source.
00036   If not, std::copy_backward needs to be used.
00037 
00038 * templatize comparison operator in most methods, provide possible dispatch
00039 
00040 * duplicate methods to avoid by-the-way indexed sorting
00041 
00042 * add methods for verifying sortedness of array
00043 
00044 * row sorting via breadth-first tree subsorting
00045 
00046 * binary lookup and sequential binary lookup optimized for dense downsampling.
00047 
00048 * NOTE: the memory management routines rely on the fact that delete [] silently
00049   ignores null pointers. Don't gripe about the missing checks - they're there.
00050 
00051 
00052 The Python license is
00053 
00054   PSF LICENSE AGREEMENT FOR PYTHON 2.3
00055   --------------------------------------
00056 
00057   1. This LICENSE AGREEMENT is between the Python Software Foundation
00058   ("PSF"), and the Individual or Organization ("Licensee") accessing and
00059   otherwise using Python 2.3 software in source or binary form and its
00060   associated documentation.
00061 
00062   2. Subject to the terms and conditions of this License Agreement, PSF
00063   hereby grants Licensee a nonexclusive, royalty-free, world-wide
00064   license to reproduce, analyze, test, perform and/or display publicly,
00065   prepare derivative works, distribute, and otherwise use Python 2.3
00066   alone or in any derivative version, provided, however, that PSF's
00067   License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
00068   2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are
00069   retained in Python 2.3 alone or in any derivative version prepared by
00070   Licensee.
00071 
00072   3. In the event Licensee prepares a derivative work that is based on
00073   or incorporates Python 2.3 or any part thereof, and wants to make
00074   the derivative work available to others as provided herein, then
00075   Licensee hereby agrees to include in any such work a brief summary of
00076   the changes made to Python 2.3.
00077 
00078   4. PSF is making Python 2.3 available to Licensee on an "AS IS"
00079   basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
00080   IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
00081   DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
00082   FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT
00083   INFRINGE ANY THIRD PARTY RIGHTS.
00084 
00085   5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
00086   2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
00087   A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3,
00088   OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
00089 
00090   6. This License Agreement will automatically terminate upon a material
00091   breach of its terms and conditions.
00092 
00093   7. Nothing in this License Agreement shall be deemed to create any
00094   relationship of agency, partnership, or joint venture between PSF and
00095   Licensee.  This License Agreement does not grant permission to use PSF
00096   trademarks or trade name in a trademark sense to endorse or promote
00097   products or services of Licensee, or any third party.
00098 
00099   8. By copying, installing or otherwise using Python 2.3, Licensee
00100   agrees to be bound by the terms and conditions of this License
00101   Agreement.
00102 */
00103 
00104 #ifdef HAVE_CONFIG_H
00105 #include <config.h>
00106 #endif
00107 
00108 #include <cassert>
00109 #include <algorithm>
00110 #include <functional>
00111 #include <cstring>
00112 #include <stack>
00113 
00114 #include "lo-mappers.h"
00115 #include "quit.h"
00116 #include "oct-sort.h"
00117 #include "oct-locbuf.h"
00118 
00119 template <class T>
00120 octave_sort<T>::octave_sort (void) :
00121   compare (ascending_compare), ms (0)
00122 {
00123 }
00124 
00125 template <class T>
00126 octave_sort<T>::octave_sort (compare_fcn_type comp)
00127   : compare (comp), ms (0)
00128 {
00129 }
00130 
00131 template <class T>
00132 octave_sort<T>::~octave_sort ()
00133 {
00134   delete ms;
00135 }
00136 
00137 template <class T>
00138 void
00139 octave_sort<T>::set_compare (sortmode mode)
00140 {
00141   if (mode == ASCENDING)
00142     compare = ascending_compare;
00143   else if (mode == DESCENDING)
00144     compare = descending_compare;
00145   else
00146     compare = 0;
00147 }
00148 
00149 template <class T>
00150 template <class Comp>
00151 void
00152 octave_sort<T>::binarysort (T *data, octave_idx_type nel,
00153                             octave_idx_type start, Comp comp)
00154 {
00155   if (start == 0)
00156     ++start;
00157 
00158   for (; start < nel; ++start)
00159     {
00160       /* set l to where *start belongs */
00161       octave_idx_type l = 0, r = start;
00162       T pivot = data[start];
00163       /* Invariants:
00164        * pivot >= all in [lo, l).
00165        * pivot  < all in [r, start).
00166        * The second is vacuously true at the start.
00167        */
00168       do
00169         {
00170           octave_idx_type p = l + ((r - l) >> 1);
00171           if (comp (pivot, data[p]))
00172             r = p;
00173           else
00174             l = p+1;
00175         }
00176       while (l < r);
00177       /* The invariants still hold, so pivot >= all in [lo, l) and
00178          pivot < all in [l, start), so pivot belongs at l.  Note
00179          that if there are elements equal to pivot, l points to the
00180          first slot after them -- that's why this sort is stable.
00181          Slide over to make room.
00182          Caution: using memmove is much slower under MSVC 5;
00183          we're not usually moving many slots. */
00184       // NOTE: using swap and going upwards appears to be faster.
00185       for (octave_idx_type p = l; p < start; p++)
00186         std::swap (pivot, data[p]);
00187       data[start] = pivot;
00188     }
00189 
00190   return;
00191 }
00192 
00193 template <class T>
00194 template <class Comp>
00195 void
00196 octave_sort<T>::binarysort (T *data, octave_idx_type *idx, octave_idx_type nel,
00197                             octave_idx_type start, Comp comp)
00198 {
00199   if (start == 0)
00200     ++start;
00201 
00202   for (; start < nel; ++start)
00203     {
00204       /* set l to where *start belongs */
00205       octave_idx_type l = 0, r = start;
00206       T pivot = data[start];
00207       /* Invariants:
00208        * pivot >= all in [lo, l).
00209        * pivot  < all in [r, start).
00210        * The second is vacuously true at the start.
00211        */
00212       do
00213         {
00214           octave_idx_type p = l + ((r - l) >> 1);
00215           if (comp (pivot, data[p]))
00216             r = p;
00217           else
00218             l = p+1;
00219         }
00220       while (l < r);
00221       /* The invariants still hold, so pivot >= all in [lo, l) and
00222          pivot < all in [l, start), so pivot belongs at l.  Note
00223          that if there are elements equal to pivot, l points to the
00224          first slot after them -- that's why this sort is stable.
00225          Slide over to make room.
00226          Caution: using memmove is much slower under MSVC 5;
00227          we're not usually moving many slots. */
00228       // NOTE: using swap and going upwards appears to be faster.
00229       for (octave_idx_type p = l; p < start; p++)
00230         std::swap (pivot, data[p]);
00231       data[start] = pivot;
00232       octave_idx_type ipivot = idx[start];
00233       for (octave_idx_type p = l; p < start; p++)
00234         std::swap (ipivot, idx[p]);
00235       idx[start] = ipivot;
00236     }
00237 
00238   return;
00239 }
00240 
00241 /*
00242 Return the length of the run beginning at lo, in the slice [lo, hi).  lo < hi
00243 is required on entry.  "A run" is the longest ascending sequence, with
00244 
00245     lo[0] <= lo[1] <= lo[2] <= ...
00246 
00247 or the longest descending sequence, with
00248 
00249     lo[0] > lo[1] > lo[2] > ...
00250 
00251 DESCENDING is set to false in the former case, or to true in the latter.
00252 For its intended use in a stable mergesort, the strictness of the defn of
00253 "descending" is needed so that the caller can safely reverse a descending
00254 sequence without violating stability (strict > ensures there are no equal
00255 elements to get out of order).
00256 
00257 Returns -1 in case of error.
00258 */
00259 template <class T>
00260 template <class Comp>
00261 octave_idx_type
00262 octave_sort<T>::count_run (T *lo, octave_idx_type nel, bool& descending, Comp comp)
00263 {
00264   octave_idx_type n;
00265   T *hi = lo + nel;
00266 
00267   descending = false;
00268   ++lo;
00269   if (lo == hi)
00270     return 1;
00271 
00272   n = 2;
00273 
00274   if (comp (*lo, *(lo-1)))
00275     {
00276       descending = true;
00277       for (lo = lo+1; lo < hi; ++lo, ++n)
00278         {
00279           if (comp (*lo, *(lo-1)))
00280             ;
00281           else
00282             break;
00283         }
00284     }
00285   else
00286     {
00287       for (lo = lo+1; lo < hi; ++lo, ++n)
00288         {
00289           if (comp (*lo, *(lo-1)))
00290             break;
00291         }
00292     }
00293 
00294   return n;
00295 }
00296 
00297 /*
00298 Locate the proper position of key in a sorted vector; if the vector contains
00299 an element equal to key, return the position immediately to the left of
00300 the leftmost equal element.  [gallop_right() does the same except returns
00301 the position to the right of the rightmost equal element (if any).]
00302 
00303 "a" is a sorted vector with n elements, starting at a[0].  n must be > 0.
00304 
00305 "hint" is an index at which to begin the search, 0 <= hint < n.  The closer
00306 hint is to the final result, the faster this runs.
00307 
00308 The return value is the int k in 0..n such that
00309 
00310     a[k-1] < key <= a[k]
00311 
00312 pretending that *(a-1) is minus infinity and a[n] is plus infinity.  IOW,
00313 key belongs at index k; or, IOW, the first k elements of a should precede
00314 key, and the last n-k should follow key.
00315 
00316 Returns -1 on error.  See listsort.txt for info on the method.
00317 */
00318 template <class T>
00319 template <class Comp>
00320 octave_idx_type
00321 octave_sort<T>::gallop_left (T key, T *a, octave_idx_type n, octave_idx_type hint,
00322                              Comp comp)
00323 {
00324   octave_idx_type ofs;
00325   octave_idx_type lastofs;
00326   octave_idx_type k;
00327 
00328   a += hint;
00329   lastofs = 0;
00330   ofs = 1;
00331   if (comp (*a, key))
00332     {
00333       /* a[hint] < key -- gallop right, until
00334        * a[hint + lastofs] < key <= a[hint + ofs]
00335        */
00336       const octave_idx_type maxofs = n - hint;  /* &a[n-1] is highest */
00337       while (ofs < maxofs)
00338         {
00339           if (comp (a[ofs], key))
00340             {
00341               lastofs = ofs;
00342               ofs = (ofs << 1) + 1;
00343               if (ofs <= 0)     /* int overflow */
00344                 ofs = maxofs;
00345             }
00346           else  /* key <= a[hint + ofs] */
00347             break;
00348         }
00349       if (ofs > maxofs)
00350         ofs = maxofs;
00351       /* Translate back to offsets relative to &a[0]. */
00352       lastofs += hint;
00353       ofs += hint;
00354     }
00355   else
00356     {
00357       /* key <= a[hint] -- gallop left, until
00358        * a[hint - ofs] < key <= a[hint - lastofs]
00359        */
00360       const octave_idx_type maxofs = hint + 1;  /* &a[0] is lowest */
00361       while (ofs < maxofs)
00362         {
00363           if (comp (*(a-ofs), key))
00364             break;
00365           /* key <= a[hint - ofs] */
00366           lastofs = ofs;
00367           ofs = (ofs << 1) + 1;
00368           if (ofs <= 0) /* int overflow */
00369             ofs = maxofs;
00370         }
00371       if (ofs > maxofs)
00372         ofs = maxofs;
00373       /* Translate back to positive offsets relative to &a[0]. */
00374       k = lastofs;
00375       lastofs = hint - ofs;
00376       ofs = hint - k;
00377     }
00378   a -= hint;
00379 
00380   /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
00381    * right of lastofs but no farther right than ofs.  Do a binary
00382    * search, with invariant a[lastofs-1] < key <= a[ofs].
00383    */
00384   ++lastofs;
00385   while (lastofs < ofs)
00386     {
00387       octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);
00388 
00389       if (comp (a[m], key))
00390         lastofs = m+1;  /* a[m] < key */
00391       else
00392         ofs = m;        /* key <= a[m] */
00393     }
00394 
00395   return ofs;
00396 }
00397 
00398 /*
00399 Exactly like gallop_left(), except that if key already exists in a[0:n],
00400 finds the position immediately to the right of the rightmost equal value.
00401 
00402 The return value is the int k in 0..n such that
00403 
00404     a[k-1] <= key < a[k]
00405 
00406 or -1 if error.
00407 
00408 The code duplication is massive, but this is enough different given that
00409 we're sticking to "<" comparisons that it's much harder to follow if
00410 written as one routine with yet another "left or right?" flag.
00411 */
00412 template <class T>
00413 template <class Comp>
00414 octave_idx_type
00415 octave_sort<T>::gallop_right (T key, T *a, octave_idx_type n, octave_idx_type hint,
00416                               Comp comp)
00417 {
00418   octave_idx_type ofs;
00419   octave_idx_type lastofs;
00420   octave_idx_type k;
00421 
00422   a += hint;
00423   lastofs = 0;
00424   ofs = 1;
00425   if (comp (key, *a))
00426     {
00427       /* key < a[hint] -- gallop left, until
00428        * a[hint - ofs] <= key < a[hint - lastofs]
00429        */
00430       const octave_idx_type maxofs = hint + 1;  /* &a[0] is lowest */
00431       while (ofs < maxofs)
00432         {
00433           if (comp (key, *(a-ofs)))
00434             {
00435               lastofs = ofs;
00436               ofs = (ofs << 1) + 1;
00437               if (ofs <= 0)     /* int overflow */
00438                 ofs = maxofs;
00439             }
00440           else  /* a[hint - ofs] <= key */
00441             break;
00442         }
00443       if (ofs > maxofs)
00444         ofs = maxofs;
00445       /* Translate back to positive offsets relative to &a[0]. */
00446       k = lastofs;
00447       lastofs = hint - ofs;
00448       ofs = hint - k;
00449     }
00450   else
00451     {
00452       /* a[hint] <= key -- gallop right, until
00453        * a[hint + lastofs] <= key < a[hint + ofs]
00454        */
00455       const octave_idx_type maxofs = n - hint;  /* &a[n-1] is highest */
00456       while (ofs < maxofs)
00457         {
00458           if (comp (key, a[ofs]))
00459             break;
00460           /* a[hint + ofs] <= key */
00461           lastofs = ofs;
00462           ofs = (ofs << 1) + 1;
00463           if (ofs <= 0) /* int overflow */
00464             ofs = maxofs;
00465         }
00466       if (ofs > maxofs)
00467         ofs = maxofs;
00468       /* Translate back to offsets relative to &a[0]. */
00469       lastofs += hint;
00470       ofs += hint;
00471     }
00472   a -= hint;
00473 
00474   /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the
00475    * right of lastofs but no farther right than ofs.  Do a binary
00476    * search, with invariant a[lastofs-1] <= key < a[ofs].
00477    */
00478   ++lastofs;
00479   while (lastofs < ofs)
00480     {
00481       octave_idx_type m = lastofs + ((ofs - lastofs) >> 1);
00482 
00483       if (comp (key, a[m]))
00484         ofs = m;        /* key < a[m] */
00485       else
00486         lastofs = m+1;  /* a[m] <= key */
00487     }
00488 
00489   return ofs;
00490 }
00491 
00492 static inline octave_idx_type
00493 roundupsize (octave_idx_type n)
00494 {
00495   unsigned int nbits = 3;
00496   octave_idx_type n2 = static_cast<octave_idx_type> (n) >> 8;
00497 
00498   /* Round up:
00499    * If n <       256, to a multiple of        8.
00500    * If n <      2048, to a multiple of       64.
00501    * If n <     16384, to a multiple of      512.
00502    * If n <    131072, to a multiple of     4096.
00503    * If n <   1048576, to a multiple of    32768.
00504    * If n <   8388608, to a multiple of   262144.
00505    * If n <  67108864, to a multiple of  2097152.
00506    * If n < 536870912, to a multiple of 16777216.
00507    * ...
00508    * If n < 2**(5+3*i), to a multiple of 2**(3*i).
00509    *
00510    * This over-allocates proportional to the list size, making room
00511    * for additional growth.  The over-allocation is mild, but is
00512    * enough to give linear-time amortized behavior over a long
00513    * sequence of appends() in the presence of a poorly-performing
00514    * system realloc() (which is a reality, e.g., across all flavors
00515    * of Windows, with Win9x behavior being particularly bad -- and
00516    * we've still got address space fragmentation problems on Win9x
00517    * even with this scheme, although it requires much longer lists to
00518    * provoke them than it used to).
00519    */
00520   while (n2)
00521     {
00522       n2 >>= 3;
00523       nbits += 3;
00524     }
00525 
00526   return ((n >> nbits) + 1) << nbits;
00527 }
00528 
00529 /* Ensure enough temp memory for 'need' array slots is available.
00530  * Returns 0 on success and -1 if the memory can't be gotten.
00531  */
00532 template <class T>
00533 void
00534 octave_sort<T>::MergeState::getmem (octave_idx_type need)
00535 {
00536   if (need <= alloced)
00537     return;
00538 
00539   need = roundupsize (need);
00540   /* Don't realloc!  That can cost cycles to copy the old data, but
00541    * we don't care what's in the block.
00542    */
00543   delete [] a;
00544   delete [] ia; // Must do this or fool possible next getmemi.
00545   a = new T[need];
00546   alloced = need;
00547 
00548 }
00549 
00550 template <class T>
00551 void
00552 octave_sort<T>::MergeState::getmemi (octave_idx_type need)
00553 {
00554   if (ia && need <= alloced)
00555     return;
00556 
00557   need = roundupsize (need);
00558   /* Don't realloc!  That can cost cycles to copy the old data, but
00559    * we don't care what's in the block.
00560    */
00561   delete [] a;
00562   delete [] ia;
00563 
00564   a = new T[need];
00565   ia = new octave_idx_type[need];
00566   alloced = need;
00567 }
00568 
00569 /* Merge the na elements starting at pa with the nb elements starting at pb
00570  * in a stable way, in-place.  na and nb must be > 0, and pa + na == pb.
00571  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
00572  * merge, and should have na <= nb.  See listsort.txt for more info.
00573  * Return 0 if successful, -1 if error.
00574  */
00575 template <class T>
00576 template <class Comp>
00577 int
00578 octave_sort<T>::merge_lo (T *pa, octave_idx_type na,
00579                           T *pb, octave_idx_type nb,
00580                           Comp comp)
00581 {
00582   octave_idx_type k;
00583   T *dest;
00584   int result = -1;      /* guilty until proved innocent */
00585   octave_idx_type min_gallop = ms->min_gallop;
00586 
00587   ms->getmem (na);
00588 
00589   std::copy (pa, pa + na, ms->a);
00590   dest = pa;
00591   pa = ms->a;
00592 
00593   *dest++ = *pb++;
00594   --nb;
00595   if (nb == 0)
00596     goto Succeed;
00597   if (na == 1)
00598     goto CopyB;
00599 
00600   for (;;)
00601     {
00602       octave_idx_type acount = 0;       /* # of times A won in a row */
00603       octave_idx_type bcount = 0;       /* # of times B won in a row */
00604 
00605       /* Do the straightforward thing until (if ever) one run
00606        * appears to win consistently.
00607        */
00608       for (;;)
00609         {
00610 
00611           // FIXME: these loops are candidates for further optimizations.
00612           // Rather than testing everything in each cycle, it may be more
00613           // efficient to do it in hunks.
00614           if (comp (*pb, *pa))
00615             {
00616               *dest++ = *pb++;
00617               ++bcount;
00618               acount = 0;
00619               --nb;
00620               if (nb == 0)
00621                 goto Succeed;
00622               if (bcount >= min_gallop)
00623                 break;
00624             }
00625           else
00626             {
00627               *dest++ = *pa++;
00628               ++acount;
00629               bcount = 0;
00630               --na;
00631               if (na == 1)
00632                 goto CopyB;
00633               if (acount >= min_gallop)
00634                 break;
00635             }
00636         }
00637 
00638       /* One run is winning so consistently that galloping may
00639        * be a huge win.  So try that, and continue galloping until
00640        * (if ever) neither run appears to be winning consistently
00641        * anymore.
00642        */
00643       ++min_gallop;
00644       do
00645         {
00646           min_gallop -= min_gallop > 1;
00647           ms->min_gallop = min_gallop;
00648           k = gallop_right (*pb, pa, na, 0, comp);
00649           acount = k;
00650           if (k)
00651             {
00652               if (k < 0)
00653                 goto Fail;
00654               dest = std::copy (pa, pa + k, dest);
00655               pa += k;
00656               na -= k;
00657               if (na == 1)
00658                 goto CopyB;
00659               /* na==0 is impossible now if the comparison
00660                * function is consistent, but we can't assume
00661                * that it is.
00662                */
00663               if (na == 0)
00664                 goto Succeed;
00665             }
00666           *dest++ = *pb++;
00667           --nb;
00668           if (nb == 0)
00669             goto Succeed;
00670 
00671           k = gallop_left (*pa, pb, nb, 0, comp);
00672           bcount = k;
00673           if (k)
00674             {
00675               if (k < 0)
00676                 goto Fail;
00677               dest = std::copy (pb, pb + k, dest);
00678               pb += k;
00679               nb -= k;
00680               if (nb == 0)
00681                 goto Succeed;
00682             }
00683           *dest++ = *pa++;
00684           --na;
00685           if (na == 1)
00686             goto CopyB;
00687         }
00688       while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
00689 
00690       ++min_gallop;     /* penalize it for leaving galloping mode */
00691       ms->min_gallop = min_gallop;
00692     }
00693 
00694  Succeed:
00695   result = 0;
00696 
00697  Fail:
00698   if (na)
00699     std::copy (pa, pa + na, dest);
00700   return result;
00701 
00702  CopyB:
00703   /* The last element of pa belongs at the end of the merge. */
00704   std::copy (pb, pb + nb, dest);
00705   dest[nb] = *pa;
00706 
00707   return 0;
00708 }
00709 
00710 template <class T>
00711 template <class Comp>
00712 int
00713 octave_sort<T>::merge_lo (T *pa, octave_idx_type *ipa, octave_idx_type na,
00714                           T *pb, octave_idx_type *ipb, octave_idx_type nb,
00715                           Comp comp)
00716 {
00717   octave_idx_type k;
00718   T *dest;
00719   octave_idx_type *idest;
00720   int result = -1;      /* guilty until proved innocent */
00721   octave_idx_type min_gallop = ms->min_gallop;
00722 
00723   ms->getmemi (na);
00724 
00725   std::copy (pa, pa + na, ms->a);
00726   std::copy (ipa, ipa + na, ms->ia);
00727   dest = pa; idest = ipa;
00728   pa = ms->a; ipa = ms->ia;
00729 
00730   *dest++ = *pb++; *idest++ = *ipb++;
00731   --nb;
00732   if (nb == 0)
00733     goto Succeed;
00734   if (na == 1)
00735     goto CopyB;
00736 
00737   for (;;)
00738     {
00739       octave_idx_type acount = 0;       /* # of times A won in a row */
00740       octave_idx_type bcount = 0;       /* # of times B won in a row */
00741 
00742       /* Do the straightforward thing until (if ever) one run
00743        * appears to win consistently.
00744        */
00745       for (;;)
00746         {
00747 
00748           if (comp (*pb, *pa))
00749             {
00750               *dest++ = *pb++; *idest++ = *ipb++;
00751               ++bcount;
00752               acount = 0;
00753               --nb;
00754               if (nb == 0)
00755                 goto Succeed;
00756               if (bcount >= min_gallop)
00757                 break;
00758             }
00759           else
00760             {
00761               *dest++ = *pa++; *idest++ = *ipa++;
00762               ++acount;
00763               bcount = 0;
00764               --na;
00765               if (na == 1)
00766                 goto CopyB;
00767               if (acount >= min_gallop)
00768                 break;
00769             }
00770         }
00771 
00772       /* One run is winning so consistently that galloping may
00773        * be a huge win.  So try that, and continue galloping until
00774        * (if ever) neither run appears to be winning consistently
00775        * anymore.
00776        */
00777       ++min_gallop;
00778       do
00779         {
00780           min_gallop -= min_gallop > 1;
00781           ms->min_gallop = min_gallop;
00782           k = gallop_right (*pb, pa, na, 0, comp);
00783           acount = k;
00784           if (k)
00785             {
00786               if (k < 0)
00787                 goto Fail;
00788               dest = std::copy (pa, pa + k, dest);
00789               idest = std::copy (ipa, ipa + k, idest);
00790               pa += k; ipa += k;
00791               na -= k;
00792               if (na == 1)
00793                 goto CopyB;
00794               /* na==0 is impossible now if the comparison
00795                * function is consistent, but we can't assume
00796                * that it is.
00797                */
00798               if (na == 0)
00799                 goto Succeed;
00800             }
00801           *dest++ = *pb++; *idest++ = *ipb++;
00802           --nb;
00803           if (nb == 0)
00804             goto Succeed;
00805 
00806           k = gallop_left (*pa, pb, nb, 0, comp);
00807           bcount = k;
00808           if (k)
00809             {
00810               if (k < 0)
00811                 goto Fail;
00812               dest = std::copy (pb, pb + k, dest);
00813               idest = std::copy (ipb, ipb + k, idest);
00814               pb += k; ipb += k;
00815               nb -= k;
00816               if (nb == 0)
00817                 goto Succeed;
00818             }
00819           *dest++ = *pa++; *idest++ = *ipa++;
00820           --na;
00821           if (na == 1)
00822             goto CopyB;
00823         }
00824       while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
00825 
00826       ++min_gallop;     /* penalize it for leaving galloping mode */
00827       ms->min_gallop = min_gallop;
00828     }
00829 
00830  Succeed:
00831   result = 0;
00832 
00833  Fail:
00834   if (na)
00835     {
00836       std::copy (pa, pa + na, dest);
00837       std::copy (ipa, ipa + na, idest);
00838     }
00839   return result;
00840 
00841  CopyB:
00842   /* The last element of pa belongs at the end of the merge. */
00843   std::copy (pb, pb + nb, dest);
00844   std::copy (ipb, ipb + nb, idest);
00845   dest[nb] = *pa;
00846   idest[nb] = *ipa;
00847 
00848   return 0;
00849 }
00850 
00851 /* Merge the na elements starting at pa with the nb elements starting at pb
00852  * in a stable way, in-place.  na and nb must be > 0, and pa + na == pb.
00853  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
00854  * merge, and should have na >= nb.  See listsort.txt for more info.
00855  * Return 0 if successful, -1 if error.
00856  */
00857 template <class T>
00858 template <class Comp>
00859 int
00860 octave_sort<T>::merge_hi (T *pa, octave_idx_type na,
00861                           T *pb, octave_idx_type nb,
00862                           Comp comp)
00863 {
00864   octave_idx_type k;
00865   T *dest;
00866   int result = -1;      /* guilty until proved innocent */
00867   T *basea, *baseb;
00868   octave_idx_type min_gallop = ms->min_gallop;
00869 
00870   ms->getmem (nb);
00871 
00872   dest = pb + nb - 1;
00873   std::copy (pb, pb + nb, ms->a);
00874   basea = pa;
00875   baseb = ms->a;
00876   pb = ms->a + nb - 1;
00877   pa += na - 1;
00878 
00879   *dest-- = *pa--;
00880   --na;
00881   if (na == 0)
00882     goto Succeed;
00883   if (nb == 1)
00884     goto CopyA;
00885 
00886   for (;;)
00887     {
00888       octave_idx_type acount = 0;       /* # of times A won in a row */
00889       octave_idx_type bcount = 0;       /* # of times B won in a row */
00890 
00891       /* Do the straightforward thing until (if ever) one run
00892        * appears to win consistently.
00893        */
00894       for (;;)
00895         {
00896           if (comp (*pb, *pa))
00897             {
00898               *dest-- = *pa--;
00899               ++acount;
00900               bcount = 0;
00901               --na;
00902               if (na == 0)
00903                 goto Succeed;
00904               if (acount >= min_gallop)
00905                 break;
00906             }
00907           else
00908             {
00909               *dest-- = *pb--;
00910               ++bcount;
00911               acount = 0;
00912               --nb;
00913               if (nb == 1)
00914                 goto CopyA;
00915               if (bcount >= min_gallop)
00916                 break;
00917             }
00918         }
00919 
00920       /* One run is winning so consistently that galloping may
00921        * be a huge win.  So try that, and continue galloping until
00922        * (if ever) neither run appears to be winning consistently
00923        * anymore.
00924        */
00925       ++min_gallop;
00926       do
00927         {
00928           min_gallop -= min_gallop > 1;
00929           ms->min_gallop = min_gallop;
00930           k = gallop_right (*pb, basea, na, na-1, comp);
00931           if (k < 0)
00932             goto Fail;
00933           k = na - k;
00934           acount = k;
00935           if (k)
00936             {
00937               dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;
00938               pa -= k;
00939               na -= k;
00940               if (na == 0)
00941                 goto Succeed;
00942             }
00943           *dest-- = *pb--;
00944           --nb;
00945           if (nb == 1)
00946             goto CopyA;
00947 
00948           k = gallop_left (*pa, baseb, nb, nb-1, comp);
00949           if (k < 0)
00950             goto Fail;
00951           k = nb - k;
00952           bcount = k;
00953           if (k)
00954             {
00955               dest -= k;
00956               pb -= k;
00957               std::copy (pb+1, pb+1 + k, dest+1);
00958               nb -= k;
00959               if (nb == 1)
00960                 goto CopyA;
00961               /* nb==0 is impossible now if the comparison
00962                * function is consistent, but we can't assume
00963                * that it is.
00964                */
00965               if (nb == 0)
00966                 goto Succeed;
00967             }
00968           *dest-- = *pa--;
00969           --na;
00970           if (na == 0)
00971             goto Succeed;
00972         } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
00973       ++min_gallop;     /* penalize it for leaving galloping mode */
00974       ms->min_gallop = min_gallop;
00975     }
00976 
00977 Succeed:
00978   result = 0;
00979 
00980 Fail:
00981   if (nb)
00982     std::copy (baseb, baseb + nb, dest-(nb-1));
00983   return result;
00984 
00985 CopyA:
00986   /* The first element of pb belongs at the front of the merge. */
00987   dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;
00988   pa -= na;
00989   *dest = *pb;
00990 
00991   return 0;
00992 }
00993 
00994 template <class T>
00995 template <class Comp>
00996 int
00997 octave_sort<T>::merge_hi (T *pa, octave_idx_type *ipa, octave_idx_type na,
00998                           T *pb, octave_idx_type *ipb, octave_idx_type nb,
00999                           Comp comp)
01000 {
01001   octave_idx_type k;
01002   T *dest;
01003   octave_idx_type *idest;
01004   int result = -1;      /* guilty until proved innocent */
01005   T *basea, *baseb;
01006   octave_idx_type *ibaseb;
01007   octave_idx_type min_gallop = ms->min_gallop;
01008 
01009   ms->getmemi (nb);
01010 
01011   dest = pb + nb - 1;
01012   idest = ipb + nb - 1;
01013   std::copy (pb, pb + nb, ms->a);
01014   std::copy (ipb, ipb + nb, ms->ia);
01015   basea = pa;
01016   baseb = ms->a; ibaseb = ms->ia;
01017   pb = ms->a + nb - 1; ipb = ms->ia + nb - 1;
01018   pa += na - 1; ipa += na - 1;
01019 
01020   *dest-- = *pa--; *idest-- = *ipa--;
01021   --na;
01022   if (na == 0)
01023     goto Succeed;
01024   if (nb == 1)
01025     goto CopyA;
01026 
01027   for (;;)
01028     {
01029       octave_idx_type acount = 0;       /* # of times A won in a row */
01030       octave_idx_type bcount = 0;       /* # of times B won in a row */
01031 
01032       /* Do the straightforward thing until (if ever) one run
01033        * appears to win consistently.
01034        */
01035       for (;;)
01036         {
01037           if (comp (*pb, *pa))
01038             {
01039               *dest-- = *pa--; *idest-- = *ipa--;
01040               ++acount;
01041               bcount = 0;
01042               --na;
01043               if (na == 0)
01044                 goto Succeed;
01045               if (acount >= min_gallop)
01046                 break;
01047             }
01048           else
01049             {
01050               *dest-- = *pb--; *idest-- = *ipb--;
01051               ++bcount;
01052               acount = 0;
01053               --nb;
01054               if (nb == 1)
01055                 goto CopyA;
01056               if (bcount >= min_gallop)
01057                 break;
01058             }
01059         }
01060 
01061       /* One run is winning so consistently that galloping may
01062        * be a huge win.  So try that, and continue galloping until
01063        * (if ever) neither run appears to be winning consistently
01064        * anymore.
01065        */
01066       ++min_gallop;
01067       do
01068         {
01069           min_gallop -= min_gallop > 1;
01070           ms->min_gallop = min_gallop;
01071           k = gallop_right (*pb, basea, na, na-1, comp);
01072           if (k < 0)
01073             goto Fail;
01074           k = na - k;
01075           acount = k;
01076           if (k)
01077             {
01078               dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1;
01079               idest = std::copy_backward (ipa+1 - k, ipa+1, idest+1) - 1;
01080               pa -= k; ipa -= k;
01081               na -= k;
01082               if (na == 0)
01083                 goto Succeed;
01084             }
01085           *dest-- = *pb--; *idest-- = *ipb--;
01086           --nb;
01087           if (nb == 1)
01088             goto CopyA;
01089 
01090           k = gallop_left (*pa, baseb, nb, nb-1, comp);
01091           if (k < 0)
01092             goto Fail;
01093           k = nb - k;
01094           bcount = k;
01095           if (k)
01096             {
01097               dest -= k; idest -= k;
01098               pb -= k; ipb -= k;
01099               std::copy (pb+1, pb+1 + k, dest+1);
01100               std::copy (ipb+1, ipb+1 + k, idest+1);
01101               nb -= k;
01102               if (nb == 1)
01103                 goto CopyA;
01104               /* nb==0 is impossible now if the comparison
01105                * function is consistent, but we can't assume
01106                * that it is.
01107                */
01108               if (nb == 0)
01109                 goto Succeed;
01110             }
01111           *dest-- = *pa--; *idest-- = *ipa--;
01112           --na;
01113           if (na == 0)
01114             goto Succeed;
01115         } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
01116       ++min_gallop;     /* penalize it for leaving galloping mode */
01117       ms->min_gallop = min_gallop;
01118     }
01119 
01120 Succeed:
01121   result = 0;
01122 
01123 Fail:
01124   if (nb)
01125     {
01126       std::copy (baseb, baseb + nb, dest-(nb-1));
01127       std::copy (ibaseb, ibaseb + nb, idest-(nb-1));
01128     }
01129   return result;
01130 
01131 CopyA:
01132   /* The first element of pb belongs at the front of the merge. */
01133   dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1;
01134   idest = std::copy_backward (ipa+1 - na, ipa+1, idest+1) - 1;
01135   pa -= na; ipa -= na;
01136   *dest = *pb; *idest = *ipb;
01137 
01138   return 0;
01139 }
01140 
01141 /* Merge the two runs at stack indices i and i+1.
01142  * Returns 0 on success, -1 on error.
01143  */
01144 template <class T>
01145 template <class Comp>
01146 int
01147 octave_sort<T>::merge_at (octave_idx_type i, T *data,
01148                           Comp comp)
01149 {
01150   T *pa, *pb;
01151   octave_idx_type na, nb;
01152   octave_idx_type k;
01153 
01154   pa = data + ms->pending[i].base;
01155   na = ms->pending[i].len;
01156   pb = data + ms->pending[i+1].base;
01157   nb = ms->pending[i+1].len;
01158 
01159   /* Record the length of the combined runs; if i is the 3rd-last
01160    * run now, also slide over the last run (which isn't involved
01161    * in this merge).  The current run i+1 goes away in any case.
01162    */
01163   ms->pending[i].len = na + nb;
01164   if (i == ms->n - 3)
01165     ms->pending[i+1] = ms->pending[i+2];
01166   ms->n--;
01167 
01168   /* Where does b start in a?  Elements in a before that can be
01169    * ignored (already in place).
01170    */
01171   k = gallop_right (*pb, pa, na, 0, comp);
01172   if (k < 0)
01173     return -1;
01174   pa += k;
01175   na -= k;
01176   if (na == 0)
01177     return 0;
01178 
01179   /* Where does a end in b?  Elements in b after that can be
01180    * ignored (already in place).
01181    */
01182   nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);
01183   if (nb <= 0)
01184     return nb;
01185 
01186   /* Merge what remains of the runs, using a temp array with
01187    * min(na, nb) elements.
01188    */
01189   if (na <= nb)
01190     return merge_lo (pa, na, pb, nb, comp);
01191   else
01192     return merge_hi (pa, na, pb, nb, comp);
01193 }
01194 
01195 template <class T>
01196 template <class Comp>
01197 int
01198 octave_sort<T>::merge_at (octave_idx_type i, T *data, octave_idx_type *idx,
01199                           Comp comp)
01200 {
01201   T *pa, *pb;
01202   octave_idx_type *ipa, *ipb;
01203   octave_idx_type na, nb;
01204   octave_idx_type k;
01205 
01206   pa = data + ms->pending[i].base;
01207   ipa = idx + ms->pending[i].base;
01208   na = ms->pending[i].len;
01209   pb = data + ms->pending[i+1].base;
01210   ipb = idx + ms->pending[i+1].base;
01211   nb = ms->pending[i+1].len;
01212 
01213   /* Record the length of the combined runs; if i is the 3rd-last
01214    * run now, also slide over the last run (which isn't involved
01215    * in this merge).  The current run i+1 goes away in any case.
01216    */
01217   ms->pending[i].len = na + nb;
01218   if (i == ms->n - 3)
01219     ms->pending[i+1] = ms->pending[i+2];
01220   ms->n--;
01221 
01222   /* Where does b start in a?  Elements in a before that can be
01223    * ignored (already in place).
01224    */
01225   k = gallop_right (*pb, pa, na, 0, comp);
01226   if (k < 0)
01227     return -1;
01228   pa += k; ipa += k;
01229   na -= k;
01230   if (na == 0)
01231     return 0;
01232 
01233   /* Where does a end in b?  Elements in b after that can be
01234    * ignored (already in place).
01235    */
01236   nb = gallop_left (pa[na-1], pb, nb, nb-1, comp);
01237   if (nb <= 0)
01238     return nb;
01239 
01240   /* Merge what remains of the runs, using a temp array with
01241    * min(na, nb) elements.
01242    */
01243   if (na <= nb)
01244     return merge_lo (pa, ipa, na, pb, ipb, nb, comp);
01245   else
01246     return merge_hi (pa, ipa, na, pb, ipb, nb, comp);
01247 }
01248 
01249 /* Examine the stack of runs waiting to be merged, merging adjacent runs
01250  * until the stack invariants are re-established:
01251  *
01252  * 1. len[-3] > len[-2] + len[-1]
01253  * 2. len[-2] > len[-1]
01254  *
01255  * See listsort.txt for more info.
01256  *
01257  * Returns 0 on success, -1 on error.
01258  */
01259 template <class T>
01260 template <class Comp>
01261 int
01262 octave_sort<T>::merge_collapse (T *data, Comp comp)
01263 {
01264   struct s_slice *p = ms->pending;
01265 
01266   while (ms->n > 1)
01267     {
01268       octave_idx_type n = ms->n - 2;
01269       if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len)
01270         {
01271           if (p[n-1].len < p[n+1].len)
01272             --n;
01273           if (merge_at (n, data, comp) < 0)
01274             return -1;
01275         }
01276       else if (p[n].len <= p[n+1].len)
01277         {
01278           if (merge_at (n, data, comp) < 0)
01279             return -1;
01280         }
01281       else
01282         break;
01283     }
01284 
01285   return 0;
01286 }
01287 
01288 template <class T>
01289 template <class Comp>
01290 int
01291 octave_sort<T>::merge_collapse (T *data, octave_idx_type *idx, Comp comp)
01292 {
01293   struct s_slice *p = ms->pending;
01294 
01295   while (ms->n > 1)
01296     {
01297       octave_idx_type n = ms->n - 2;
01298       if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len)
01299         {
01300           if (p[n-1].len < p[n+1].len)
01301             --n;
01302           if (merge_at (n, data, idx, comp) < 0)
01303             return -1;
01304         }
01305       else if (p[n].len <= p[n+1].len)
01306         {
01307           if (merge_at (n, data, idx, comp) < 0)
01308             return -1;
01309         }
01310       else
01311         break;
01312     }
01313 
01314   return 0;
01315 }
01316 
01317 /* Regardless of invariants, merge all runs on the stack until only one
01318  * remains.  This is used at the end of the mergesort.
01319  *
01320  * Returns 0 on success, -1 on error.
01321  */
01322 template <class T>
01323 template <class Comp>
01324 int
01325 octave_sort<T>::merge_force_collapse (T *data, Comp comp)
01326 {
01327   struct s_slice *p = ms->pending;
01328 
01329   while (ms->n > 1)
01330     {
01331       octave_idx_type n = ms->n - 2;
01332       if (n > 0 && p[n-1].len < p[n+1].len)
01333         --n;
01334       if (merge_at (n, data, comp) < 0)
01335         return -1;
01336     }
01337 
01338   return 0;
01339 }
01340 
01341 template <class T>
01342 template <class Comp>
01343 int
01344 octave_sort<T>::merge_force_collapse (T *data, octave_idx_type *idx, Comp comp)
01345 {
01346   struct s_slice *p = ms->pending;
01347 
01348   while (ms->n > 1)
01349     {
01350       octave_idx_type n = ms->n - 2;
01351       if (n > 0 && p[n-1].len < p[n+1].len)
01352         --n;
01353       if (merge_at (n, data, idx, comp) < 0)
01354         return -1;
01355     }
01356 
01357   return 0;
01358 }
01359 
01360 /* Compute a good value for the minimum run length; natural runs shorter
01361  * than this are boosted artificially via binary insertion.
01362  *
01363  * If n < 64, return n (it's too small to bother with fancy stuff).
01364  * Else if n is an exact power of 2, return 32.
01365  * Else return an int k, 32 <= k <= 64, such that n/k is close to, but
01366  * strictly less than, an exact power of 2.
01367  *
01368  * See listsort.txt for more info.
01369  */
01370 template <class T>
01371 octave_idx_type
01372 octave_sort<T>::merge_compute_minrun (octave_idx_type n)
01373 {
01374   octave_idx_type r = 0;        /* becomes 1 if any 1 bits are shifted off */
01375 
01376   while (n >= 64)
01377     {
01378       r |= n & 1;
01379       n >>= 1;
01380     }
01381 
01382   return n + r;
01383 }
01384 
01385 template <class T>
01386 template <class Comp>
01387 void
01388 octave_sort<T>::sort (T *data, octave_idx_type nel, Comp comp)
01389 {
01390   /* Re-initialize the Mergestate as this might be the second time called */
01391   if (! ms) ms = new MergeState;
01392 
01393   ms->reset ();
01394   ms->getmem (1024);
01395 
01396   if (nel > 1)
01397     {
01398       octave_idx_type nremaining = nel;
01399       octave_idx_type lo = 0;
01400 
01401       /* March over the array once, left to right, finding natural runs,
01402        * and extending short natural runs to minrun elements.
01403        */
01404       octave_idx_type minrun = merge_compute_minrun (nremaining);
01405       do
01406         {
01407           bool descending;
01408           octave_idx_type n;
01409 
01410           /* Identify next run. */
01411           n = count_run (data + lo, nremaining, descending, comp);
01412           if (n < 0)
01413             goto fail;
01414           if (descending)
01415             std::reverse (data + lo, data + lo + n);
01416           /* If short, extend to min(minrun, nremaining). */
01417           if (n < minrun)
01418             {
01419               const octave_idx_type force = nremaining <= minrun ? nremaining : minrun;
01420               binarysort (data + lo, force, n, comp);
01421               n = force;
01422             }
01423           /* Push run onto pending-runs stack, and maybe merge. */
01424           assert (ms->n < MAX_MERGE_PENDING);
01425           ms->pending[ms->n].base = lo;
01426           ms->pending[ms->n].len = n;
01427           ms->n++;
01428           if (merge_collapse (data, comp) < 0)
01429             goto fail;
01430           /* Advance to find next run. */
01431           lo += n;
01432           nremaining -= n;
01433         }
01434       while (nremaining);
01435 
01436       merge_force_collapse (data, comp);
01437     }
01438 
01439 fail:
01440   return;
01441 }
01442 
01443 template <class T>
01444 template <class Comp>
01445 void
01446 octave_sort<T>::sort (T *data, octave_idx_type *idx, octave_idx_type nel,
01447                       Comp comp)
01448 {
01449   /* Re-initialize the Mergestate as this might be the second time called */
01450   if (! ms) ms = new MergeState;
01451 
01452   ms->reset ();
01453   ms->getmemi (1024);
01454 
01455   if (nel > 1)
01456     {
01457       octave_idx_type nremaining = nel;
01458       octave_idx_type lo = 0;
01459 
01460       /* March over the array once, left to right, finding natural runs,
01461        * and extending short natural runs to minrun elements.
01462        */
01463       octave_idx_type minrun = merge_compute_minrun (nremaining);
01464       do
01465         {
01466           bool descending;
01467           octave_idx_type n;
01468 
01469           /* Identify next run. */
01470           n = count_run (data + lo, nremaining, descending, comp);
01471           if (n < 0)
01472             goto fail;
01473           if (descending)
01474             {
01475               std::reverse (data + lo, data + lo + n);
01476               std::reverse (idx + lo, idx + lo + n);
01477             }
01478           /* If short, extend to min(minrun, nremaining). */
01479           if (n < minrun)
01480             {
01481               const octave_idx_type force = nremaining <= minrun ? nremaining : minrun;
01482               binarysort (data + lo, idx + lo, force, n, comp);
01483               n = force;
01484             }
01485           /* Push run onto pending-runs stack, and maybe merge. */
01486           assert (ms->n < MAX_MERGE_PENDING);
01487           ms->pending[ms->n].base = lo;
01488           ms->pending[ms->n].len = n;
01489           ms->n++;
01490           if (merge_collapse (data, idx, comp) < 0)
01491             goto fail;
01492           /* Advance to find next run. */
01493           lo += n;
01494           nremaining -= n;
01495         }
01496       while (nremaining);
01497 
01498       merge_force_collapse (data, idx, comp);
01499     }
01500 
01501 fail:
01502   return;
01503 }
01504 
01505 template <class T>
01506 void
01507 octave_sort<T>::sort (T *data, octave_idx_type nel)
01508 {
01509 #ifdef INLINE_ASCENDING_SORT
01510   if (compare == ascending_compare)
01511     sort (data, nel, std::less<T> ());
01512   else
01513 #endif
01514 #ifdef INLINE_DESCENDING_SORT
01515     if (compare == descending_compare)
01516       sort (data, nel, std::greater<T> ());
01517   else
01518 #endif
01519     if (compare)
01520       sort (data, nel, compare);
01521 }
01522 
01523 template <class T>
01524 void
01525 octave_sort<T>::sort (T *data, octave_idx_type *idx, octave_idx_type nel)
01526 {
01527 #ifdef INLINE_ASCENDING_SORT
01528   if (compare == ascending_compare)
01529     sort (data, idx, nel, std::less<T> ());
01530   else
01531 #endif
01532 #ifdef INLINE_DESCENDING_SORT
01533     if (compare == descending_compare)
01534       sort (data, idx, nel, std::greater<T> ());
01535   else
01536 #endif
01537     if (compare)
01538       sort (data, idx, nel, compare);
01539 }
01540 
01541 template <class T> template <class Comp>
01542 bool
01543 octave_sort<T>::is_sorted (const T *data, octave_idx_type nel, Comp comp)
01544 {
01545   const T *end = data + nel;
01546   if (data != end)
01547     {
01548       const T *next = data;
01549       while (++next != end)
01550         {
01551           if (comp (*next, *data))
01552             break;
01553           data = next;
01554         }
01555       data = next;
01556     }
01557 
01558   return data == end;
01559 }
01560 
01561 template <class T>
01562 bool
01563 octave_sort<T>::is_sorted (const T *data, octave_idx_type nel)
01564 {
01565   bool retval = false;
01566 #ifdef INLINE_ASCENDING_SORT
01567   if (compare == ascending_compare)
01568     retval = is_sorted (data, nel, std::less<T> ());
01569   else
01570 #endif
01571 #ifdef INLINE_DESCENDING_SORT
01572     if (compare == descending_compare)
01573       retval = is_sorted (data, nel, std::greater<T> ());
01574   else
01575 #endif
01576     if (compare)
01577       retval = is_sorted (data, nel, compare);
01578 
01579   return retval;
01580 }
01581 
01582 // FIXME: is there really no way to make this local to the following function?
01583 struct sortrows_run_t
01584 {
01585   sortrows_run_t (octave_idx_type c, octave_idx_type o, octave_idx_type n)
01586     : col (c), ofs (o), nel (n) { }
01587   octave_idx_type col, ofs, nel;
01588 };
01589 
01590 
01591 template <class T> template <class Comp>
01592 void
01593 octave_sort<T>::sort_rows (const T *data, octave_idx_type *idx,
01594                            octave_idx_type rows, octave_idx_type cols,
01595                            Comp comp)
01596 {
01597   OCTAVE_LOCAL_BUFFER (T, buf, rows);
01598   for (octave_idx_type i = 0; i < rows; i++)
01599     idx[i] = i;
01600 
01601   if (cols == 0 || rows <= 1)
01602     return;
01603 
01604 
01605   // This is a breadth-first traversal.
01606   typedef sortrows_run_t run_t;
01607   std::stack<run_t> runs;
01608 
01609   runs.push (run_t (0, 0, rows));
01610 
01611   while (! runs.empty ())
01612     {
01613       octave_idx_type col  = runs.top ().col;
01614       octave_idx_type ofs  = runs.top ().ofs;
01615       octave_idx_type nel  = runs.top ().nel;
01616       runs.pop ();
01617       assert (nel > 1);
01618 
01619       T *lbuf = buf + ofs;
01620       const T *ldata = data + rows*col;
01621       octave_idx_type *lidx = idx + ofs;
01622 
01623       // Gather.
01624       for (octave_idx_type i = 0; i < nel; i++)
01625         lbuf[i] = ldata[lidx[i]];
01626 
01627       // Sort.
01628       sort (lbuf, lidx, nel, comp);
01629 
01630       // Identify constant runs and schedule subsorts.
01631       if (col < cols-1)
01632         {
01633           octave_idx_type lst = 0;
01634           for (octave_idx_type i = 0; i < nel; i++)
01635             {
01636               if (comp (lbuf[lst], lbuf[i]))
01637                 {
01638                   if (i > lst + 1)
01639                     runs.push (run_t (col+1, ofs + lst, i - lst));
01640                   lst = i;
01641                 }
01642             }
01643           if (nel > lst + 1)
01644             runs.push (run_t (col+1, ofs + lst, nel - lst));
01645         }
01646     }
01647 }
01648 
01649 template <class T>
01650 void
01651 octave_sort<T>::sort_rows (const T *data, octave_idx_type *idx,
01652                            octave_idx_type rows, octave_idx_type cols)
01653 {
01654 #ifdef INLINE_ASCENDING_SORT
01655   if (compare == ascending_compare)
01656     sort_rows (data, idx, rows, cols, std::less<T> ());
01657   else
01658 #endif
01659 #ifdef INLINE_DESCENDING_SORT
01660     if (compare == descending_compare)
01661       sort_rows (data, idx, rows, cols, std::greater<T> ());
01662   else
01663 #endif
01664     if (compare)
01665       sort_rows (data, idx, rows, cols, compare);
01666 }
01667 
01668 template <class T> template <class Comp>
01669 bool
01670 octave_sort<T>::is_sorted_rows (const T *data, octave_idx_type rows,
01671                                 octave_idx_type cols, Comp comp)
01672 {
01673   if (rows <= 1 || cols == 0)
01674     return true;
01675 
01676   // This is a breadth-first traversal.
01677   const T *lastrow = data + rows*(cols - 1);
01678   typedef std::pair<const T *, octave_idx_type> run_t;
01679   std::stack<run_t> runs;
01680 
01681   bool sorted = true;
01682   runs.push (run_t (data, rows));
01683   while (sorted && ! runs.empty ())
01684     {
01685       const T *lo = runs.top ().first;
01686       octave_idx_type n = runs.top ().second;
01687       runs.pop ();
01688       if (lo < lastrow)
01689         {
01690           // Not the final column.
01691           assert (n > 1);
01692           const T *hi = lo + n, *lst = lo;
01693           for (lo++; lo < hi; lo++)
01694             {
01695               if (comp (*lst, *lo))
01696                 {
01697                   if (lo > lst + 1)
01698                       runs.push (run_t (lst + rows, lo - lst));
01699                   lst = lo;
01700                 }
01701               else if (comp (*lo, *lst))
01702                 break;
01703 
01704             }
01705           if (lo == hi)
01706             {
01707               if (lo > lst + 1)
01708                 runs.push (run_t (lst + rows, lo - lst));
01709             }
01710           else
01711             {
01712               sorted = false;
01713               break;
01714             }
01715         }
01716       else
01717         // The final column - use fast code.
01718         sorted = is_sorted (lo, n, comp);
01719     }
01720 
01721   return sorted;
01722 }
01723 
01724 template <class T>
01725 bool
01726 octave_sort<T>::is_sorted_rows (const T *data, octave_idx_type rows,
01727                                 octave_idx_type cols)
01728 {
01729   bool retval = false;
01730 
01731 #ifdef INLINE_ASCENDING_SORT
01732   if (compare == ascending_compare)
01733     retval = is_sorted_rows (data, rows, cols, std::less<T> ());
01734   else
01735 #endif
01736 #ifdef INLINE_DESCENDING_SORT
01737     if (compare == descending_compare)
01738       retval = is_sorted_rows (data, rows, cols, std::greater<T> ());
01739   else
01740 #endif
01741     if (compare)
01742       retval = is_sorted_rows (data, rows, cols, compare);
01743 
01744   return retval;
01745 }
01746 
01747 // The simple binary lookup.
01748 
01749 template <class T> template <class Comp>
01750 octave_idx_type
01751 octave_sort<T>::lookup (const T *data, octave_idx_type nel,
01752                         const T& value, Comp comp)
01753 {
01754   octave_idx_type lo = 0, hi = nel;
01755 
01756   while (lo < hi)
01757     {
01758       octave_idx_type mid = lo + ((hi-lo) >> 1);
01759       if (comp (value, data[mid]))
01760         hi = mid;
01761       else
01762         lo = mid + 1;
01763     }
01764 
01765   return lo;
01766 }
01767 
01768 template <class T>
01769 octave_idx_type
01770 octave_sort<T>::lookup (const T *data, octave_idx_type nel,
01771                         const T& value)
01772 {
01773   octave_idx_type retval = 0;
01774 
01775 #ifdef INLINE_ASCENDING_SORT
01776   if (compare == ascending_compare)
01777     retval = lookup (data, nel, value, std::less<T> ());
01778   else
01779 #endif
01780 #ifdef INLINE_DESCENDING_SORT
01781     if (compare == descending_compare)
01782       retval = lookup (data, nel, value, std::greater<T> ());
01783   else
01784 #endif
01785     if (compare)
01786       retval = lookup (data, nel, value, std::ptr_fun (compare));
01787 
01788   return retval;
01789 }
01790 
01791 template <class T> template <class Comp>
01792 void
01793 octave_sort<T>::lookup (const T *data, octave_idx_type nel,
01794                         const T *values, octave_idx_type nvalues,
01795                         octave_idx_type *idx, Comp comp)
01796 {
01797   // Use a sequence of binary lookups.
01798   // TODO: Can this be sped up generally? The sorted merge case is dealt with
01799   // elsewhere.
01800   for (octave_idx_type j = 0; j < nvalues; j++)
01801     idx[j] = lookup (data, nel, values[j], comp);
01802 }
01803 
01804 template <class T>
01805 void
01806 octave_sort<T>::lookup (const T *data, octave_idx_type nel,
01807                         const T* values, octave_idx_type nvalues,
01808                         octave_idx_type *idx)
01809 {
01810 #ifdef INLINE_ASCENDING_SORT
01811   if (compare == ascending_compare)
01812     lookup (data, nel, values, nvalues, idx, std::less<T> ());
01813   else
01814 #endif
01815 #ifdef INLINE_DESCENDING_SORT
01816     if (compare == descending_compare)
01817       lookup (data, nel, values, nvalues, idx, std::greater<T> ());
01818   else
01819 #endif
01820     if (compare)
01821       lookup (data, nel, values, nvalues, idx, std::ptr_fun (compare));
01822 }
01823 
01824 template <class T> template <class Comp>
01825 void
01826 octave_sort<T>::lookup_sorted (const T *data, octave_idx_type nel,
01827                                const T *values, octave_idx_type nvalues,
01828                                octave_idx_type *idx, bool rev, Comp comp)
01829 {
01830   if (rev)
01831     {
01832       octave_idx_type i = 0, j = nvalues - 1;
01833 
01834       if (nvalues > 0 && nel > 0)
01835         {
01836           while (true)
01837             {
01838               if (comp (values[j], data[i]))
01839                 {
01840                   idx[j] = i;
01841                   if (--j < 0)
01842                     break;
01843                 }
01844               else if (++i == nel)
01845                 break;
01846             }
01847         }
01848 
01849       for (; j >= 0; j--)
01850         idx[j] = i;
01851     }
01852   else
01853     {
01854       octave_idx_type i = 0, j = 0;
01855 
01856       if (nvalues > 0 && nel > 0)
01857         {
01858           while (true)
01859             {
01860               if (comp (values[j], data[i]))
01861                 {
01862                   idx[j] = i;
01863                   if (++j == nvalues)
01864                     break;
01865                 }
01866               else if (++i == nel)
01867                 break;
01868             }
01869         }
01870 
01871       for (; j != nvalues; j++)
01872         idx[j] = i;
01873     }
01874 }
01875 
01876 template <class T>
01877 void
01878 octave_sort<T>::lookup_sorted (const T *data, octave_idx_type nel,
01879                                const T* values, octave_idx_type nvalues,
01880                                octave_idx_type *idx, bool rev)
01881 {
01882 #ifdef INLINE_ASCENDING_SORT
01883   if (compare == ascending_compare)
01884     lookup_sorted (data, nel, values, nvalues, idx, rev, std::less<T> ());
01885   else
01886 #endif
01887 #ifdef INLINE_DESCENDING_SORT
01888     if (compare == descending_compare)
01889       lookup_sorted (data, nel, values, nvalues, idx, rev, std::greater<T> ());
01890   else
01891 #endif
01892     if (compare)
01893       lookup_sorted (data, nel, values, nvalues, idx, rev, std::ptr_fun (compare));
01894 }
01895 
01896 template <class T> template <class Comp>
01897 void
01898 octave_sort<T>::nth_element (T *data, octave_idx_type nel,
01899                              octave_idx_type lo, octave_idx_type up,
01900                              Comp comp)
01901 {
01902   // Simply wrap the STL algorithms.
01903   // FIXME: this will fail if we attempt to inline <,> for Complex.
01904   if (up == lo+1)
01905     std::nth_element (data, data + lo, data + nel, comp);
01906   else if (lo == 0)
01907     std::partial_sort (data, data + up, data + nel, comp);
01908   else
01909     {
01910       std::nth_element (data, data + lo, data + nel, comp);
01911       if (up == lo + 2)
01912         {
01913           // Finding two subsequent elements.
01914           std::swap (data[lo+1],
01915                      *std::min_element (data + lo + 1, data + nel, comp));
01916         }
01917       else
01918         std::partial_sort (data + lo + 1, data + up, data + nel, comp);
01919     }
01920 }
01921 
01922 template <class T>
01923 void
01924 octave_sort<T>::nth_element (T *data, octave_idx_type nel,
01925                              octave_idx_type lo, octave_idx_type up)
01926 {
01927   if (up < 0)
01928     up = lo + 1;
01929 #ifdef INLINE_ASCENDING_SORT
01930   if (compare == ascending_compare)
01931     nth_element (data, nel, lo, up, std::less<T> ());
01932   else
01933 #endif
01934 #ifdef INLINE_DESCENDING_SORT
01935     if (compare == descending_compare)
01936       nth_element (data, nel, lo, up, std::greater<T> ());
01937   else
01938 #endif
01939     if (compare)
01940       nth_element (data, nel, lo, up, std::ptr_fun (compare));
01941 }
01942 
01943 template <class T>
01944 bool
01945 octave_sort<T>::ascending_compare (typename ref_param<T>::type x,
01946                                    typename ref_param<T>::type y)
01947 {
01948   return x < y;
01949 }
01950 
01951 template <class T>
01952 bool
01953 octave_sort<T>::descending_compare (typename ref_param<T>::type x,
01954                                     typename ref_param<T>::type y)
01955 {
01956   return x > y;
01957 }
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines