libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include "GenotypeLists.h" 00019 00020 // When the next line is uncommented, the genotype elimination routines 00021 // produce a lot of output useful for debugging 00022 // #define DEBUG_ELIMINATOR 00023 00024 GenotypeList::GenotypeList() 00025 { 00026 ignore = false; 00027 } 00028 00029 bool GenotypeList::EliminateGenotypes(Pedigree & ped, Family * family, int marker) 00030 { 00031 // First, allocate a genotype list for the family 00032 GenotypeList * list = new GenotypeList [family->count]; 00033 00034 // Next, update the possible allele lists for each individual 00035 InitializeList(list, ped, family, marker); 00036 00037 // Then, do multiple rounds of elimination until a problem is found 00038 // or no changes are made 00039 00040 #ifdef DEBUG_ELIMINATOR 00041 Print(list, ped, family, marker); 00042 #endif 00043 00044 while (PairwiseCheck(list, ped, family) || FamilyCheck(list, ped, family)) 00045 #ifdef DEBUG_ELIMINATOR 00046 Print(list, ped, family, marker) 00047 #endif 00048 ; 00049 00050 for (int i = 0; i < family->count; i++) 00051 if (!list[i].ignore && list[i].allele1.Length() == 0) 00052 { 00053 printf("%s - Family %s has a subtle genotype inconsistency\n", 00054 (const char *) ped.markerNames[marker], (const char *) family->famid); 00055 00056 delete [] list; 00057 return false; 00058 } 00059 00060 delete [] list; 00061 return true; 00062 } 00063 00064 void GenotypeList::InitializeList(GenotypeList * list, Pedigree & ped, Family * family, int marker) 00065 { 00066 for (int i = family->count - 1; i >= 0; i--) 00067 { 00068 Person & person = ped[family->path[i]]; 00069 int id = person.traverse; 00070 bool maleX = person.sex == SEX_MALE && ped.chromosomeX; 00071 00072 #ifdef DEBUG_ELIMINATOR 00073 printf("Initializing genotype list for %s ...\n", (const char *) person.pid); 00074 #endif 00075 00076 // If an individual is genotyped ... 00077 if (person.markers[marker].isKnown()) 00078 { 00079 // Their genotype list starts with just one entry! 00080 list[id].Dimension(1); 00081 list[id].SetGenotype(0, person.markers[marker][0], person.markers[marker][1]); 00082 list[id].alleles.Clear(); 00083 list[id].alleles.Push(person.markers[marker][0]); 00084 list[id].alleles.PushIfNew(person.markers[marker][1]); 00085 list[id].ignore = false; 00086 00087 // "Heterozygous" males have no possible genotypes 00088 if (maleX && person.markers[marker].isHeterozygous()) 00089 list[id].Dimension(0); 00090 } 00091 else if (list[id].alleles.Length()) 00092 if (person.sex == SEX_MALE && ped.chromosomeX) 00093 { 00094 // Males only carry one X chromosome 00095 list[id].Dimension(list[id].alleles.Length() + 1); 00096 00097 for (int i = 0, out = 0; i < list[id].alleles.Length(); i++) 00098 list[id].SetGenotype(out++, list[id].alleles[i], list[id].alleles[i]); 00099 list[id].SetGenotype(list[id].alleles.Length(), -1, -1); 00100 00101 list[id].ignore = false; 00102 } 00103 else 00104 { 00105 // Build the genotype list based on the available allele lists 00106 int count = list[id].alleles.Length() * (list[id].alleles.Length() + 3) / 2 + 1; 00107 00108 list[id].Dimension(count); 00109 00110 for (int i = 0, out = 0; i < list[id].alleles.Length(); i++) 00111 { 00112 // Allow for all pairs of "transmitted" alleles 00113 for (int j = 0; j <= i; j++) 00114 list[id].SetGenotype(out++, list[id].alleles[i], list[id].alleles[j]); 00115 00116 // Allow for an unstransmitted allele 00117 list[id].SetGenotype(out++, list[id].alleles[i], -1); 00118 } 00119 00120 // Allow for a pair of untransmitted alleles 00121 list[id].SetGenotype(count - 1, -1, -1); 00122 00123 list[id].ignore = false; 00124 } 00125 else 00126 list[id].ignore = true; 00127 00128 // If the individual is a founder this is all there is to it 00129 if (i < family->founders) continue; 00130 00131 // If the individual is not a founder, update the parental genotype lists... 00132 int fatid = person.father->traverse; 00133 int motid = person.mother->traverse; 00134 00135 for (int i = 0; i < list[id].alleles.Length(); i++) 00136 { 00137 list[motid].alleles.PushIfNew(list[id].alleles[i]); 00138 if (!maleX) list[fatid].alleles.PushIfNew(list[id].alleles[i]); 00139 } 00140 } 00141 } 00142 00143 bool GenotypeList::PairwiseCheck(GenotypeList * list, Pedigree & ped, Family * family) 00144 { 00145 #ifdef DEBUG_ELIMINATOR 00146 printf("Checking Relative Pairs ...\n"); 00147 #endif 00148 00149 bool changed = false; 00150 00151 for (int i = family->count - 1; i >= family->founders; i--) 00152 { 00153 Person & person = ped[family->path[i]]; 00154 00155 int id = person.traverse; 00156 int fatid = person.father->traverse; 00157 int motid = person.mother->traverse; 00158 00159 bool maleX = person.sex == SEX_MALE && ped.chromosomeX; 00160 00161 if (list[id].ignore) continue; 00162 00163 // Check if genotypes are consistent with paternal genotypes 00164 for (int i = 0; i < list[id].allele1.Length(); i++) 00165 { 00166 int al1 = list[id].allele1[i]; 00167 int al2 = list[id].allele2[i]; 00168 00169 // Remove offspring genotypes incompatible with parental genotypes 00170 if ((maleX && !list[motid].Matches(al1) && al1 != -1) || 00171 (!maleX && !(al1 == -1 && al2 == -1) && 00172 !(list[fatid].Matches(al1) && (al2 == -1 || list[motid].Matches(al2))) && 00173 !((al2 == -1 || list[fatid].Matches(al2)) && list[motid].Matches(al1)))) 00174 { 00175 list[id].Delete(i--); 00176 changed = true; 00177 } 00178 } 00179 00180 // The offspring genotype list allows for a wild-card untransmitted allele 00181 // so any single parental genotype is possible 00182 if (list[id].Matches(-1)) 00183 continue; 00184 00185 // Check if genotypes are consistent with offspring genotypes 00186 for (int i = 0; i < list[motid].allele1.Length(); i++) 00187 { 00188 int al1 = list[motid].allele1[i]; 00189 int al2 = list[motid].allele2[i]; 00190 00191 // Remove genotypes incompatible with offspring genotypes 00192 if (!list[id].Matches(al1) && 00193 !list[id].Matches(al2)) 00194 { 00195 list[motid].Delete(i--); 00196 changed = true; 00197 } 00198 } 00199 00200 // Males don't affect genotype lists for their fathers 00201 if (maleX) continue; 00202 00203 // Check if genotypes are consistent with offspring genotypes 00204 for (int i = 0; i < list[fatid].allele1.Length(); i++) 00205 { 00206 int al1 = list[fatid].allele1[i]; 00207 int al2 = list[fatid].allele2[i]; 00208 00209 // Remove genotypes incompatible with offspring genotypes 00210 if (!list[id].Matches(al1) && 00211 !list[id].Matches(al2)) 00212 { 00213 list[fatid].Delete(i--); 00214 changed = true; 00215 } 00216 } 00217 00218 #ifdef DEBUG_ELIMINATOR 00219 printf("Done checking individual %s\n", (const char *) person.pid); 00220 Print(list, ped, family, 0); 00221 #endif 00222 } 00223 00224 return changed; 00225 } 00226 00227 00228 bool GenotypeList::FamilyCheck(GenotypeList * list, Pedigree & ped, Family * family) 00229 { 00230 #ifdef DEBUG_ELIMINATOR 00231 printf("Checking Nuclear Families ...\n"); 00232 #endif 00233 00234 bool changed = false; 00235 00236 for (int i = family->count - 1; i >= family->founders; i--) 00237 { 00238 Person & person = ped[family->path[i]]; 00239 00240 int fatid = person.father->traverse; 00241 int motid = person.mother->traverse; 00242 00243 // Only go through the loop once per sibship 00244 if (person.sibs[0] != &person || list[fatid].ignore || list[motid].ignore) 00245 continue; 00246 00247 #ifdef DEBUG_ELIMINATOR 00248 printf("Checking Sibship with %s ...\n", (const char *) person.pid); 00249 #endif 00250 00251 // Reset checked genotypes for the mother, father and child 00252 list[fatid].checked = 0; 00253 list[motid].checked = 0; 00254 00255 for (int i = 0; i < person.sibCount; i++) 00256 list[person.sibs[i]->traverse].checked = 0; 00257 00258 // Go through each of the paternal genotypes 00259 changed |= TrimParent(list, person, fatid, motid); 00260 00261 // Go through each of maternal genotypes 00262 changed |= TrimParent(list, person, motid, fatid); 00263 00264 // Sort out the unchecked offspring genotypes ... 00265 for (int i = 0; i < person.sibCount; i++) 00266 { 00267 int sibid = person.sibs[i]->traverse; 00268 bool maleX = person.sibs[i]->sex == SEX_MALE && ped.chromosomeX; 00269 00270 // For dealing with male X chromosomes, the pairwise check is all we need 00271 if (maleX) continue; 00272 00273 for (int j = list[sibid].checked; j < list[sibid].allele1.Length(); j++) 00274 changed |= Cleanup(list, person, motid, fatid, sibid, j); 00275 } 00276 00277 #ifdef DEBUG_ELIMINATOR 00278 // Print(list, ped, family, 0); 00279 #endif 00280 } 00281 00282 return changed; 00283 } 00284 00285 bool GenotypeList::Matches(int genotype, int allele) 00286 { 00287 return allele1[genotype] == allele || allele2[genotype] == allele; 00288 } 00289 00290 bool GenotypeList::Matches(int allele) 00291 { 00292 return allele1.Find(allele) != -1 || allele2.Find(allele) != -1; 00293 } 00294 00295 int GenotypeList::SaveGenotype(int genotype) 00296 { 00297 if (checked > genotype) 00298 return genotype; 00299 00300 if (checked != genotype) 00301 { 00302 allele1.Swap(genotype, checked); 00303 allele2.Swap(genotype, checked); 00304 } 00305 00306 return checked++; 00307 } 00308 00309 bool GenotypeList::CheckTrio(GenotypeList * list, int fatid, int motid, int child, 00310 int i, int j, int k) 00311 { 00312 // TODO: add tests for this code... 00313 return (list[fatid].Matches(i, list[child].allele1[k]) && 00314 (list[motid].Matches(j, list[child].allele2[k]) || list[child].allele2[k] == -1)) || 00315 ((list[fatid].Matches(i, list[child].allele2[k]) || list[child].allele2[k] == -1) && 00316 list[motid].Matches(j, list[child].allele1[k])) || 00317 (list[child].allele1[k] == -1 && list[child].allele2[k] == -1); 00318 } 00319 00320 void GenotypeList::Dimension(int genotypes) 00321 { 00322 allele1.Dimension(genotypes); 00323 allele2.Dimension(genotypes); 00324 } 00325 00326 void GenotypeList::SetGenotype(int genotype, int al1, int al2) 00327 { 00328 allele1[genotype] = al1; 00329 allele2[genotype] = al2; 00330 } 00331 00332 void GenotypeList::Delete(int genotype) 00333 { 00334 allele1.Delete(genotype); 00335 allele2.Delete(genotype); 00336 } 00337 00338 bool GenotypeList::TrimParent(GenotypeList * list, Person & person, int motid, int fatid) 00339 { 00340 bool trimmed = false; 00341 00342 while (list[motid].checked < list[motid].allele1.Length()) 00343 { 00344 int current = list[motid].allele1.Length() - 1; 00345 bool saved = false; 00346 00347 // Pair it with each possible paternal genotype 00348 for (int i = list[fatid].allele1.Length() - 1; i >= 0; i--) 00349 { 00350 int matches = 0; 00351 00352 // Find out if the pairing is compatible with at least one genotype for each child 00353 for (int j = 0; j < person.sibCount; j++) 00354 { 00355 int sibid = person.sibs[j]->traverse; 00356 int maleX = person.sibs[j]->sex == SEX_MALE && person.chromosomeX; 00357 00358 // Since we have done the pairwise check, there is nothing more 00359 // to do for males ... 00360 if (list[sibid].ignore || maleX) 00361 { 00362 matches++; 00363 continue; 00364 } 00365 00366 for (int k = list[sibid].allele1.Length() - 1; k >= 0; k--) 00367 if (CheckTrio(list, motid, fatid, sibid, current, i, k)) 00368 { 00369 matches++; 00370 break; 00371 } 00372 00373 if (matches != j + 1) 00374 break; 00375 } 00376 00377 // Save maternal and paternal genotypes, mark all compatible sibling genotypes 00378 if (matches == person.sibCount) 00379 { 00380 for (int j = 0; j < person.sibCount; j++) 00381 { 00382 int sibid = person.sibs[j]->traverse; 00383 00384 for (int k = list[sibid].checked; k < list[sibid].allele1.Length(); k++) 00385 if (CheckTrio(list, motid, fatid, sibid, current, i, k)) 00386 list[sibid].SaveGenotype(k); 00387 } 00388 00389 list[motid].SaveGenotype(current); 00390 list[fatid].SaveGenotype(i); 00391 00392 saved = true; 00393 00394 break; 00395 } 00396 } 00397 00398 if (!saved) 00399 { 00400 list[motid].Delete(current); 00401 trimmed = true; 00402 } 00403 } 00404 00405 return trimmed; 00406 } 00407 00408 bool GenotypeList::Cleanup(GenotypeList * list, Person & person, int motid, int fatid, int child, int geno) 00409 { 00410 for (int current = 0; current < list[motid].allele1.Length(); current++) 00411 for (int i = list[fatid].allele1.Length() - 1; i >= 0; i--) 00412 if (CheckTrio(list, motid, fatid, child, current, i, geno)) 00413 { 00414 int matches = 0; 00415 00416 // Find out if the pairing is compatible with at least one genotype for each child 00417 for (int j = 0; j < person.sibCount; j++) 00418 { 00419 int sibid = person.sibs[j]->traverse; 00420 int maleX = person.sibs[j]->sex == SEX_MALE && person.chromosomeX; 00421 00422 // After completing the pairwise check, all males are guaranteed 00423 // to be compatible with their mothers 00424 if (list[sibid].ignore || maleX) 00425 { 00426 matches++; 00427 continue; 00428 } 00429 00430 for (int k = list[sibid].allele1.Length() - 1; k >= 0; k--) 00431 if (CheckTrio(list, motid, fatid, sibid, current, i, k)) 00432 { 00433 matches++; 00434 break; 00435 } 00436 00437 if (matches != j + 1) 00438 break; 00439 } 00440 00441 // Update list of compatible sibling genotypes 00442 if (matches == person.sibCount) 00443 for (int j = 0; j < person.sibCount; j++) 00444 { 00445 int sibid = person.sibs[j]->traverse; 00446 00447 for (int k = list[sibid].checked; k < list[sibid].allele1.Length(); k++) 00448 if (CheckTrio(list, motid, fatid, sibid, current, i, k)) 00449 list[sibid].SaveGenotype(k); 00450 00451 return false; 00452 } 00453 } 00454 00455 list[child].Delete(geno); 00456 00457 return true; 00458 } 00459 00460 void GenotypeList::Print(GenotypeList * list, Pedigree & ped, Family * family, int marker) 00461 { 00462 MarkerInfo * info = ped.GetMarkerInfo(marker); 00463 00464 for (int i = 0; i < family->count; i++) 00465 { 00466 printf("%s - ", (const char *) ped[family->path[i]].pid); 00467 00468 for (int j = 0; j < list[i].allele1.Length(); j++) 00469 { 00470 if (list[i].allele1[j] == -1) 00471 printf("*/"); 00472 else 00473 printf("%s/", (const char *) info->GetAlleleLabel(list[i].allele1[j])); 00474 00475 if (list[i].allele2[j] == -1) 00476 printf("* "); 00477 else 00478 printf("%s ", (const char *) info->GetAlleleLabel(list[i].allele2[j])); 00479 } 00480 00481 printf("\n"); 00482 } 00483 printf("\n"); 00484 } 00485