QuickIndex.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "QuickIndex.h"
00019 #include "Error.h"
00020 
00021 #define __QI_INVALID          0
00022 #define __QI_VECTOR           1
00023 #define __QI_INTARRAY         2
00024 #define __QI_STRINGARRAY      3
00025 
00026 QuickIndex::QuickIndex()
00027 {
00028     source = NULL;
00029     datatype = __QI_INVALID;
00030 }
00031 
00032 void QuickIndex::Index(const IntArray & source_data)
00033 {
00034     source = (const void *) &source_data;
00035     datatype = __QI_INTARRAY;
00036 
00037     Dimension(source_data.Length());
00038     SetSequence();
00039     Sort();
00040 }
00041 
00042 void QuickIndex::Index(const Vector & source_data)
00043 {
00044     source = (const void *) &source_data;
00045     datatype = __QI_VECTOR;
00046 
00047     Dimension(source_data.Length());
00048     SetSequence();
00049     Sort();
00050 }
00051 
00052 void QuickIndex::Index(const StringArray & source_data)
00053 {
00054     source = (const void *) &source_data;
00055     datatype = __QI_STRINGARRAY;
00056 
00057     Dimension(source_data.Length());
00058     SetSequence();
00059     Sort();
00060 }
00061 
00062 void QuickIndex::IndexCounts(const StringIntMap & source_data)
00063 {
00064     IntArray counts(source_data.Length());
00065 
00066     for (int i = 0; i < source_data.Length(); i++)
00067         counts[i] = source_data.GetCount(i);
00068 
00069     Index(counts);
00070 }
00071 
00072 bool QuickIndex::IsBefore(int i, int j)
00073 {
00074     i = (*this)[i];
00075     j = (*this)[j];
00076 
00077     switch (datatype)
00078     {
00079         case __QI_VECTOR :
00080         {
00081             const Vector & data = * (const Vector *) source;
00082             return data[i] < data[j];
00083         }
00084         case __QI_INTARRAY :
00085         {
00086             const IntArray & data = * (const IntArray *) source;
00087             return data[i] < data[j];
00088         }
00089         case __QI_STRINGARRAY :
00090         {
00091             const StringArray & data = * (const StringArray *) source;
00092             return data[i].SlowCompare(data[j]) < 0;
00093         }
00094     }
00095     return 0;
00096 }
00097 
00098 void QuickIndex::Sort()
00099 {
00100     struct __QuickIndexStack
00101     {
00102         int left, right;
00103     };
00104 
00105     if (Length() <= 1)
00106         return;
00107 
00108     // Create a pseudo-stack to avoid recursion
00109     __QuickIndexStack stack[32];
00110 
00111     int stackIdx = 0;
00112 
00113     // Size of minimum partition to median of three
00114     const int Threshold = 7;
00115 
00116     // current partitions
00117     int lsize, rsize;
00118     int l, mid, r;
00119     int scanl, scanr, pivot;
00120 
00121     l = 0;
00122     r = Length() - 1;
00123 
00124     while (1)
00125     {
00126         while (r > l)
00127         {
00128             if (r - l > Threshold)
00129                 // QuickSort : median of three partitioning
00130             {
00131                 mid = (r + l) / 2;
00132 
00133                 // sort l, mid, and r
00134                 if (IsBefore(mid, l))
00135                     Swap(mid, l);
00136 
00137                 if (IsBefore(r, l))
00138                     Swap(r, l);
00139 
00140                 if (IsBefore(r, mid))
00141                     Swap(r, mid);
00142 
00143                 // set up for partitioning...
00144                 pivot = r - 1;
00145 
00146                 Swap(mid, pivot);
00147 
00148                 scanl = l + 1;
00149                 scanr = r - 2;
00150             }
00151             else
00152             {
00153                 // set up random partition -- faster
00154                 pivot = r;
00155                 scanl = l;
00156                 scanr = r - 1;
00157             }
00158 
00159             while (1)
00160             {
00161                 // scan from left for element >= pivot
00162                 while ((scanl < r) && IsBefore(scanl, pivot))
00163                     ++scanl;
00164 
00165                 while ((scanr > l) && IsBefore(pivot, scanr))
00166                     --scanr;
00167 
00168                 // if scans have met, we are done
00169                 if (scanl >= scanr)
00170                     break;
00171 
00172                 Swap(scanl, scanr);
00173 
00174                 if (scanl < r)
00175                     ++scanl;
00176 
00177                 if (scanr > l)
00178                     --scanr;
00179             }
00180 
00181             // Exchange final element
00182             Swap(pivot, scanl);
00183 
00184             // Place largest partition on stack
00185             lsize = scanl - l;
00186             rsize = r - scanl;
00187 
00188             if (lsize > rsize)
00189             {
00190                 // if size is one we are done
00191                 ++ stackIdx;
00192 
00193                 stack[stackIdx].left = l;
00194                 stack[stackIdx].right = scanl - 1;
00195 
00196                 if (rsize != 0)
00197                     l = scanl + 1;
00198                 else
00199                     break;
00200             }
00201             else
00202             {
00203                 // if size is one we are done
00204                 ++ stackIdx;
00205 
00206                 stack[stackIdx].left = scanl + 1;
00207                 stack[stackIdx].right = r;
00208 
00209                 if (lsize != 0)
00210                     r = scanl - 1;
00211                 else
00212                     break;
00213             }
00214         }
00215 
00216         // iterate with values from stack
00217         if (stackIdx)
00218         {
00219             l = stack[stackIdx].left;
00220             r = stack[stackIdx].right;
00221 
00222             --stackIdx;
00223         }
00224         else
00225             break;
00226     }
00227 }
00228 
00229 
00230 
00231 
00232 
00233 
00234 
Generated on Wed Nov 17 15:38:29 2010 for StatGen Software by  doxygen 1.6.3