libStatGen Software  1
QuickIndex.cpp
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "QuickIndex.h"
00019 #include "Error.h"
00020 
00021 #define __QI_INVALID          0
00022 #define __QI_VECTOR           1
00023 #define __QI_INTARRAY         2
00024 #define __QI_STRINGARRAY      3
00025 
00026 QuickIndex::QuickIndex()
00027 {
00028     source = NULL;
00029     datatype = __QI_INVALID;
00030 }
00031 
00032 void QuickIndex::Index(const IntArray & source_data)
00033 {
00034     source = (const void *) &source_data;
00035     datatype = __QI_INTARRAY;
00036 
00037     Dimension(source_data.Length());
00038     SetSequence();
00039     Sort();
00040 }
00041 
00042 void QuickIndex::Index(const Vector & source_data)
00043 {
00044     source = (const void *) &source_data;
00045     datatype = __QI_VECTOR;
00046 
00047     Dimension(source_data.Length());
00048     SetSequence();
00049     Sort();
00050 }
00051 
00052 void QuickIndex::Index(const StringArray & source_data)
00053 {
00054     source = (const void *) &source_data;
00055     datatype = __QI_STRINGARRAY;
00056 
00057     Dimension(source_data.Length());
00058     SetSequence();
00059     Sort();
00060 }
00061 
00062 void QuickIndex::IndexCounts(const StringIntMap & source_data)
00063 {
00064     IntArray counts(source_data.Length());
00065 
00066     for (int i = 0; i < source_data.Length(); i++)
00067         counts[i] = source_data.GetCount(i);
00068 
00069     Index(counts);
00070 }
00071 
00072 void QuickIndex::IndexCounts(const StringIntHash & source_data)
00073 {
00074     IntArray counts(source_data.Capacity());
00075 
00076     for (int i = 0; i < source_data.Capacity(); i++)
00077         if (source_data.SlotInUse(i))
00078             counts[i] = source_data.Integer(i);
00079         else
00080             counts[i] = -1;
00081 
00082     Index(counts);
00083 
00084     Reverse();
00085     Dimension(source_data.Entries());
00086     Reverse();
00087 }
00088 
00089 bool QuickIndex::IsBefore(int i, int j)
00090 {
00091     i = (*this)[i];
00092     j = (*this)[j];
00093 
00094     switch (datatype)
00095     {
00096         case __QI_VECTOR :
00097         {
00098             const Vector & data = * (const Vector *) source;
00099             return data[i] < data[j];
00100         }
00101         case __QI_INTARRAY :
00102         {
00103             const IntArray & data = * (const IntArray *) source;
00104             return data[i] < data[j];
00105         }
00106         case __QI_STRINGARRAY :
00107         {
00108             const StringArray & data = * (const StringArray *) source;
00109             return data[i].SlowCompare(data[j]) < 0;
00110         }
00111     }
00112     return 0;
00113 }
00114 
00115 void QuickIndex::Sort()
00116 {
00117     struct __QuickIndexStack
00118     {
00119         int left, right;
00120     };
00121 
00122     if (Length() <= 1)
00123         return;
00124 
00125     // Create a pseudo-stack to avoid recursion
00126     __QuickIndexStack stack[32];
00127 
00128     int stackIdx = 0;
00129 
00130     // Size of minimum partition to median of three
00131     const int Threshold = 7;
00132 
00133     // current partitions
00134     int lsize, rsize;
00135     int l, mid, r;
00136     int scanl, scanr, pivot;
00137 
00138     l = 0;
00139     r = Length() - 1;
00140 
00141     while (1)
00142     {
00143         while (r > l)
00144         {
00145             if (r - l > Threshold)
00146                 // QuickSort : median of three partitioning
00147             {
00148                 mid = (r + l) / 2;
00149 
00150                 // sort l, mid, and r
00151                 if (IsBefore(mid, l))
00152                     Swap(mid, l);
00153 
00154                 if (IsBefore(r, l))
00155                     Swap(r, l);
00156 
00157                 if (IsBefore(r, mid))
00158                     Swap(r, mid);
00159 
00160                 // set up for partitioning...
00161                 pivot = r - 1;
00162 
00163                 Swap(mid, pivot);
00164 
00165                 scanl = l + 1;
00166                 scanr = r - 2;
00167             }
00168             else
00169             {
00170                 // set up random partition -- faster
00171                 pivot = r;
00172                 scanl = l;
00173                 scanr = r - 1;
00174             }
00175 
00176             while (1)
00177             {
00178                 // scan from left for element >= pivot
00179                 while ((scanl < r) && IsBefore(scanl, pivot))
00180                     ++scanl;
00181 
00182                 while ((scanr > l) && IsBefore(pivot, scanr))
00183                     --scanr;
00184 
00185                 // if scans have met, we are done
00186                 if (scanl >= scanr)
00187                     break;
00188 
00189                 Swap(scanl, scanr);
00190 
00191                 if (scanl < r)
00192                     ++scanl;
00193 
00194                 if (scanr > l)
00195                     --scanr;
00196             }
00197 
00198             // Exchange final element
00199             Swap(pivot, scanl);
00200 
00201             // Place largest partition on stack
00202             lsize = scanl - l;
00203             rsize = r - scanl;
00204 
00205             if (lsize > rsize)
00206             {
00207                 // if size is one we are done
00208                 ++ stackIdx;
00209 
00210                 stack[stackIdx].left = l;
00211                 stack[stackIdx].right = scanl - 1;
00212 
00213                 if (rsize != 0)
00214                     l = scanl + 1;
00215                 else
00216                     break;
00217             }
00218             else
00219             {
00220                 // if size is one we are done
00221                 ++ stackIdx;
00222 
00223                 stack[stackIdx].left = scanl + 1;
00224                 stack[stackIdx].right = r;
00225 
00226                 if (lsize != 0)
00227                     r = scanl - 1;
00228                 else
00229                     break;
00230             }
00231         }
00232 
00233         // iterate with values from stack
00234         if (stackIdx)
00235         {
00236             l = stack[stackIdx].left;
00237             r = stack[stackIdx].right;
00238 
00239             --stackIdx;
00240         }
00241         else
00242             break;
00243     }
00244 }
00245 
00246 
00247 
00248 
00249 
00250 
00251 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends