PedigreeAlleleFreq.cpp
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "PedigreeAlleleFreq.h"
00019 #include "QuickIndex.h"
00020 #include "Error.h"
00021
00022 #include <math.h>
00023
00024 int CountAlleles(Pedigree & , int marker)
00025 {
00026
00027
00028 MarkerInfo * info = Pedigree::GetMarkerInfo(marker);
00029
00030 return info->CountAlleles();
00031 }
00032
00033 void LumpAlleles(Pedigree & ped, int marker, double threshold, bool reorder)
00034 {
00035
00036 int alleles = ped.CountAlleles(marker);
00037
00038 if (alleles < 2) return;
00039
00040 MarkerInfo * info = PedigreeGlobals::GetMarkerInfo(marker);
00041
00042 if (alleles < info->freq.Length())
00043 alleles = info->freq.Length() - 1;
00044
00045 IntArray counts(alleles + 1);
00046 counts.Zero();
00047
00048
00049 for (int i = 0; i < ped.count; i++)
00050 {
00051 counts[int(ped[i].markers[marker][0])]++;
00052 counts[int(ped[i].markers[marker][1])]++;
00053 }
00054
00055
00056 int total = 0;
00057 for (int i = 1; i <= alleles; i++)
00058 total += counts[i];
00059 int thresh = int(total * threshold);
00060
00061
00062
00063
00064
00065
00066 if (thresh == 0)
00067 for (int i = 1; i < info->freq.Length(); i++)
00068 if (counts[i] == 0 && info->freq[i] > 0.0)
00069 counts[i] = 1, total++;
00070
00071
00072
00073 if (!reorder)
00074 {
00075 QuickIndex index(info->alleleLabels);
00076 index.Reverse();
00077
00078 for (int i = 0; i < index.Length(); i++)
00079 counts[index[i]] = i + 1;
00080
00081 total = counts.Sum(1, counts.Length() - 1);
00082 }
00083
00084
00085
00086 counts[0] = total + 1;
00087 QuickIndex index(counts);
00088 index.Reverse();
00089
00090
00091
00092
00093 int N = 0;
00094 bool rare = false;
00095 for (int i = 0; i <= alleles; i++)
00096 if (counts[index[i]] > thresh)
00097 {
00098 counts[index[i]] = i;
00099 N++;
00100 }
00101 else
00102 {
00103 if (counts[index[i]] > 0)
00104 rare = true;
00105 counts[index[i]] = N;
00106 }
00107
00108
00109 for (int i = 0; i < ped.count; i++)
00110 {
00111 Alleles & current = ped[i].markers[marker];
00112 current[0] = counts[current[0]];
00113 current[1] = counts[current[1]];
00114 }
00115
00116 StringArray oldLabels(info->alleleLabels);
00117 String label;
00118
00119 info->alleleLabels.Clear();
00120 info->alleleNumbers.Clear();
00121
00122 for (int i = 0; i < N; i++)
00123 {
00124 if (oldLabels.Length() <= index[i])
00125 info->alleleLabels.Push(label = index[i]);
00126 else
00127 info->alleleLabels.Push(oldLabels[index[i]]);
00128
00129 if (i) info->alleleNumbers.SetInteger(info->alleleLabels.Last(), i);
00130 }
00131
00132
00133 if (info->freq.Length())
00134 {
00135 Vector freq(info->freq);
00136
00137 info->freq.Dimension(N);
00138 info->freq[0] = 0.0;
00139
00140 for (int i = 1; i < N; i++)
00141 {
00142 info->freq[i] = freq[index[i]];
00143 freq[index[i]] = 0;
00144 }
00145
00146 if ((1.0 - info->freq.Sum()) > 1e-10)
00147 rare = true;
00148
00149 if (rare)
00150 {
00151 info->freq.Dimension(N + 1);
00152 info->freq[N] = 1.0 - info->freq.Sum();
00153 }
00154 }
00155
00156 if (rare)
00157 {
00158 info->alleleLabels.Push("OTHER");
00159 info->alleleNumbers.SetInteger("OTHER", info->alleleLabels.Length());
00160 }
00161 }
00162
00163 bool EstimateFrequencies(Pedigree & ped, int marker, int estimator)
00164 {
00165 int alleleCount = CountAlleles(ped, marker);
00166
00167 IntArray founder(alleleCount + 1);
00168 IntArray all(alleleCount + 1);
00169
00170 founder.Zero();
00171 all.Zero();
00172
00173 for (int i = 0; i < ped.count; i++)
00174 {
00175
00176
00177 all[ped[i].markers[marker][0]]++;
00178 if (!ped.chromosomeX || ped[i].sex != SEX_MALE)
00179 all[ped[i].markers[marker][1]]++;
00180 if (!ped[i].isFounder()) continue;
00181 founder[ped[i].markers[marker][0]]++;
00182 if (!ped.chromosomeX || ped[i].sex != SEX_MALE)
00183 founder[ped[i].markers[marker][1]]++;
00184 }
00185
00186 MarkerInfo * info = ped.GetMarkerInfo(marker);
00187
00188 if (info->freq.dim > 0)
00189 {
00190
00191 if (alleleCount >= info->freq.dim)
00192 error("For marker %s, input files define %d alleles, but at least\n"
00193 "one other allele (named '%s') occurs in the pedigree\n",
00194 (const char *) info->name, info->freq.dim - 1,
00195 (const char *) info->GetAlleleLabel(alleleCount));
00196
00197 for (int i = 1; i <= alleleCount; i++)
00198 if (all[i] > 0 && info->freq[i] <= 0.0)
00199 error("Although allele %s for marker %s has frequency zero,\n"
00200 "it occurs %d times in the pedigree",
00201 (const char *) info->GetAlleleLabel(i), (const char *) info->name, all[i]);
00202
00203 return false;
00204 }
00205 else
00206 {
00207 if (alleleCount < 1)
00208 {
00209
00210
00211 info->freq.Dimension(3);
00212 info->freq[0] = 0.0;
00213 info->freq[1] = 0.99999;
00214 info->freq[2] = 0.00001;
00215 return true;
00216 }
00217
00218 info->freq.Dimension(alleleCount + 1);
00219 info->freq.Zero();
00220
00221 if (estimator == FREQ_FOUNDERS && founder.Sum() > founder[0])
00222 {
00223
00224
00225 for (int i = 1; i <= alleleCount; i++)
00226 if (founder[i] == 0 && all[i] > 0)
00227 founder[i] = 1;
00228
00229
00230 double factor = 1.0 / (founder.Sum() - founder[0]);
00231
00232 for (int i = 1; i <= alleleCount; i++)
00233 info->freq[i] = founder[i] * factor;
00234 }
00235 else if (estimator == FREQ_ALL || estimator == FREQ_FOUNDERS)
00236 {
00237
00238 double factor = 1.0 / (all.Sum() - all[0]);
00239
00240 for (int i = 1; i <= alleleCount; i++)
00241 info->freq[i] = all[i] * factor;
00242 }
00243 else if (estimator == FREQ_EQUAL)
00244
00245 {
00246
00247 all[0] = 0;
00248 int alleles = all.CountIfGreater(0);
00249 double freq = 1.0 / alleles;
00250
00251
00252 for (int i = 0; i <= alleleCount; i++)
00253 info->freq[i] = all[i] ? freq : 0.0;
00254 }
00255 }
00256
00257 return true;
00258 }
00259