libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include "QuickIndex.h" 00019 #include "Error.h" 00020 00021 #define __QI_INVALID 0 00022 #define __QI_VECTOR 1 00023 #define __QI_INTARRAY 2 00024 #define __QI_STRINGARRAY 3 00025 00026 QuickIndex::QuickIndex() 00027 { 00028 source = NULL; 00029 datatype = __QI_INVALID; 00030 } 00031 00032 void QuickIndex::Index(const IntArray & source_data) 00033 { 00034 source = (const void *) &source_data; 00035 datatype = __QI_INTARRAY; 00036 00037 Dimension(source_data.Length()); 00038 SetSequence(); 00039 Sort(); 00040 } 00041 00042 void QuickIndex::Index(const Vector & source_data) 00043 { 00044 source = (const void *) &source_data; 00045 datatype = __QI_VECTOR; 00046 00047 Dimension(source_data.Length()); 00048 SetSequence(); 00049 Sort(); 00050 } 00051 00052 void QuickIndex::Index(const StringArray & source_data) 00053 { 00054 source = (const void *) &source_data; 00055 datatype = __QI_STRINGARRAY; 00056 00057 Dimension(source_data.Length()); 00058 SetSequence(); 00059 Sort(); 00060 } 00061 00062 void QuickIndex::IndexCounts(const StringIntMap & source_data) 00063 { 00064 IntArray counts(source_data.Length()); 00065 00066 for (int i = 0; i < source_data.Length(); i++) 00067 counts[i] = source_data.GetCount(i); 00068 00069 Index(counts); 00070 } 00071 00072 void QuickIndex::IndexCounts(const StringIntHash & source_data) 00073 { 00074 IntArray counts(source_data.Capacity()); 00075 00076 for (int i = 0; i < source_data.Capacity(); i++) 00077 if (source_data.SlotInUse(i)) 00078 counts[i] = source_data.Integer(i); 00079 else 00080 counts[i] = -1; 00081 00082 Index(counts); 00083 00084 Reverse(); 00085 Dimension(source_data.Entries()); 00086 Reverse(); 00087 } 00088 00089 bool QuickIndex::IsBefore(int i, int j) 00090 { 00091 i = (*this)[i]; 00092 j = (*this)[j]; 00093 00094 switch (datatype) 00095 { 00096 case __QI_VECTOR : 00097 { 00098 const Vector & data = * (const Vector *) source; 00099 return data[i] < data[j]; 00100 } 00101 case __QI_INTARRAY : 00102 { 00103 const IntArray & data = * (const IntArray *) source; 00104 return data[i] < data[j]; 00105 } 00106 case __QI_STRINGARRAY : 00107 { 00108 const StringArray & data = * (const StringArray *) source; 00109 return data[i].SlowCompare(data[j]) < 0; 00110 } 00111 } 00112 return 0; 00113 } 00114 00115 void QuickIndex::Sort() 00116 { 00117 struct __QuickIndexStack 00118 { 00119 int left, right; 00120 }; 00121 00122 if (Length() <= 1) 00123 return; 00124 00125 // Create a pseudo-stack to avoid recursion 00126 __QuickIndexStack stack[32]; 00127 00128 int stackIdx = 0; 00129 00130 // Size of minimum partition to median of three 00131 const int Threshold = 7; 00132 00133 // current partitions 00134 int lsize, rsize; 00135 int l, mid, r; 00136 int scanl, scanr, pivot; 00137 00138 l = 0; 00139 r = Length() - 1; 00140 00141 while (1) 00142 { 00143 while (r > l) 00144 { 00145 if (r - l > Threshold) 00146 // QuickSort : median of three partitioning 00147 { 00148 mid = (r + l) / 2; 00149 00150 // sort l, mid, and r 00151 if (IsBefore(mid, l)) 00152 Swap(mid, l); 00153 00154 if (IsBefore(r, l)) 00155 Swap(r, l); 00156 00157 if (IsBefore(r, mid)) 00158 Swap(r, mid); 00159 00160 // set up for partitioning... 00161 pivot = r - 1; 00162 00163 Swap(mid, pivot); 00164 00165 scanl = l + 1; 00166 scanr = r - 2; 00167 } 00168 else 00169 { 00170 // set up random partition -- faster 00171 pivot = r; 00172 scanl = l; 00173 scanr = r - 1; 00174 } 00175 00176 while (1) 00177 { 00178 // scan from left for element >= pivot 00179 while ((scanl < r) && IsBefore(scanl, pivot)) 00180 ++scanl; 00181 00182 while ((scanr > l) && IsBefore(pivot, scanr)) 00183 --scanr; 00184 00185 // if scans have met, we are done 00186 if (scanl >= scanr) 00187 break; 00188 00189 Swap(scanl, scanr); 00190 00191 if (scanl < r) 00192 ++scanl; 00193 00194 if (scanr > l) 00195 --scanr; 00196 } 00197 00198 // Exchange final element 00199 Swap(pivot, scanl); 00200 00201 // Place largest partition on stack 00202 lsize = scanl - l; 00203 rsize = r - scanl; 00204 00205 if (lsize > rsize) 00206 { 00207 // if size is one we are done 00208 ++ stackIdx; 00209 00210 stack[stackIdx].left = l; 00211 stack[stackIdx].right = scanl - 1; 00212 00213 if (rsize != 0) 00214 l = scanl + 1; 00215 else 00216 break; 00217 } 00218 else 00219 { 00220 // if size is one we are done 00221 ++ stackIdx; 00222 00223 stack[stackIdx].left = scanl + 1; 00224 stack[stackIdx].right = r; 00225 00226 if (lsize != 0) 00227 r = scanl - 1; 00228 else 00229 break; 00230 } 00231 } 00232 00233 // iterate with values from stack 00234 if (stackIdx) 00235 { 00236 l = stack[stackIdx].left; 00237 r = stack[stackIdx].right; 00238 00239 --stackIdx; 00240 } 00241 else 00242 break; 00243 } 00244 } 00245 00246 00247 00248 00249 00250 00251