libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include "Pedigree.h" 00019 #include "GenotypeLists.h" 00020 #include "MemoryInfo.h" 00021 #include "Constant.h" 00022 #include "Error.h" 00023 #include "Sort.h" 00024 00025 #include <stdlib.h> 00026 00027 bool Pedigree::sexAsCovariate = false; 00028 String Pedigree::missing("-99.999"); 00029 00030 Pedigree::Pedigree() : pd() 00031 { 00032 haveTwins = count = 0; 00033 size = 10000; 00034 persons = new Person *[size]; 00035 familyCount = 0; 00036 families = new Family * [1]; 00037 multiPd = NULL; 00038 multiFileCount = 0; 00039 } 00040 00041 Pedigree::~Pedigree() 00042 { 00043 for (int i = 0; i < count; i++) 00044 delete persons[i]; 00045 00046 for (int i = 0; i < familyCount; i++) 00047 delete families[i]; 00048 00049 delete [] families; 00050 delete [] persons; 00051 00052 if (multiPd != NULL) 00053 delete [] multiPd; 00054 } 00055 00056 void Pedigree::Sort() 00057 { 00058 QuickSort(persons, count, sizeof(Person *), 00059 COMPAREFUNC Pedigree::ComparePersons); 00060 00061 haveTwins = 0; 00062 00063 // Check for structural problems in input pedigree 00064 bool problem = false; 00065 00066 // Check that we have no duplicates... 00067 for (int i = 1; i < count; i++) 00068 if (ComparePersons((const Person **) &persons[i-1], 00069 (const Person **) &persons[i]) == 0) 00070 { 00071 printf("Family %s: Person %s is duplicated\n", 00072 (const char *) persons[i]->famid, 00073 (const char *) persons[i]->pid); 00074 problem = true; 00075 00076 do 00077 { 00078 i++; 00079 } 00080 while (i < count && 00081 ComparePersons((const Person **) &persons[i-1], 00082 (const Person **) &persons[i]) == 0); 00083 } 00084 00085 // Assign parents... 00086 for (int i = 0; i < count; i++) 00087 { 00088 persons[i]->serial = i; 00089 persons[i]->father = FindPerson(persons[i]->famid, persons[i]->fatid); 00090 persons[i]->mother = FindPerson(persons[i]->famid, persons[i]->motid); 00091 00092 problem |= !persons[i]->CheckParents(); 00093 00094 persons[i]->AssessStatus(); 00095 00096 // Check if we have any twins... 00097 haveTwins |= persons[i]->zygosity; 00098 } 00099 00100 if (problem) 00101 error("Please correct problems with pedigree structure\n"); 00102 00103 MakeSibships(); 00104 MakeFamilies(); 00105 } 00106 00107 void Pedigree::MakeSibships() 00108 { 00109 Person ** sibs = new Person * [count]; 00110 for (int i = 0; i < count; i++) 00111 sibs[i] = persons[i]; 00112 00113 QuickSort(sibs, count, sizeof(Person *), 00114 COMPAREFUNC Pedigree::CompareParents); 00115 00116 for (int first = 0; first < count; first++) 00117 if (!sibs[first]->isFounder()) 00118 { 00119 int last = first + 1; 00120 while (last < count) 00121 if (sibs[first]-> mother != sibs[last]->mother || 00122 sibs[first]-> father != sibs[last]->father) 00123 break; 00124 else last++; 00125 last --; 00126 00127 for (int j = first; j <= last; j++) 00128 { 00129 if (sibs[j]->sibCount) delete [] sibs[j]->sibs; 00130 sibs[j]->sibCount = last - first + 1; 00131 sibs[j]->sibs = new Person * [sibs[j]->sibCount]; 00132 for (int k = first; k <= last; k++) 00133 sibs[j]->sibs[k - first] = sibs[k]; 00134 } 00135 first = last; 00136 } 00137 delete [] sibs; 00138 } 00139 00140 void Pedigree::MakeFamilies() 00141 { 00142 for (int i = 0; i < familyCount; i++) 00143 delete families[i]; 00144 delete [] families; 00145 00146 familyCount = 0; 00147 families = new Family * [count]; 00148 00149 for (int first=0; first < count; first++) 00150 { 00151 int last = first; 00152 while (last < count) 00153 if (SlowCompare(persons[first]->famid, persons[last]->famid) == 0) 00154 last++; 00155 else break; 00156 00157 families[familyCount] = new Family(*this, first, --last, familyCount); 00158 00159 first = last; 00160 familyCount++; 00161 } 00162 } 00163 00164 // Utility functions for finding a person in a pedigree 00165 00166 struct PedigreeKey 00167 { 00168 const char * famid; 00169 const char * pid; 00170 }; 00171 00172 int CompareKeyToPerson(PedigreeKey * key, Person ** p) 00173 { 00174 int result = SlowCompare(key->famid, (**p).famid); 00175 00176 if (result != 0) 00177 return result; 00178 00179 return SlowCompare(key->pid, (**p).pid); 00180 } 00181 00182 int CompareKeyToFamily(PedigreeKey * key, Family ** f) 00183 { 00184 return SlowCompare(key->famid, (**f).famid); 00185 } 00186 00187 Person * Pedigree::FindPerson(const char * famid, const char * pid) 00188 { 00189 PedigreeKey key; 00190 key.famid = famid; 00191 key.pid = pid; 00192 00193 Person ** result = (Person **) BinarySearch 00194 (&key, persons, count, sizeof(Person *), 00195 COMPAREFUNC CompareKeyToPerson); 00196 00197 return (result == NULL) ? (Person *) NULL : *result; 00198 } 00199 00200 Person * Pedigree::FindPerson(const char *famid, const char *pid, int universe) 00201 { 00202 PedigreeKey key; 00203 key.famid = famid; 00204 key.pid = pid; 00205 00206 Person ** result = (Person **) BinarySearch 00207 (&key, persons, universe, sizeof(Person *), 00208 COMPAREFUNC CompareKeyToPerson); 00209 00210 return (result == NULL) ? (Person *) NULL : *result; 00211 } 00212 00213 Family * Pedigree::FindFamily(const char * famid) 00214 { 00215 PedigreeKey key; 00216 key.famid = famid; 00217 00218 Family ** result = (Family **) BinarySearch 00219 (&key, families, familyCount, sizeof(Family *), 00220 COMPAREFUNC CompareKeyToFamily); 00221 00222 return (result == NULL) ? (Family *) NULL : *result; 00223 } 00224 00225 int Pedigree::CountAlleles(int marker) 00226 { 00227 return ::CountAlleles(*this, marker); 00228 } 00229 00230 void Pedigree::LumpAlleles(double min, bool reorder) 00231 { 00232 if (min > 0.0) 00233 printf("Lumping alleles with frequencies of %.2f or less...\n\n", min); 00234 00235 for (int m=0; m < markerCount; m++) 00236 ::LumpAlleles(*this, m, min, reorder); 00237 } 00238 00239 void Pedigree::EstimateFrequencies(int estimator, bool quiet) 00240 { 00241 bool estimated = false; 00242 int line = 3; 00243 00244 const char * estimators[] = 00245 { "using all genotypes", "using founder genotypes", "assumed equal" }; 00246 00247 bool condensed = markerCount > 100; 00248 int grain = markerCount / 50, estimates = 0; 00249 00250 for (int m=0; m < markerCount; m++) 00251 if (::EstimateFrequencies(*this, m, estimator)) 00252 if (!quiet) 00253 { 00254 if (!estimated) 00255 printf("Estimating allele frequencies... [%s]\n ", 00256 estimators[estimator]), estimated = true; 00257 00258 if (condensed) 00259 { 00260 if (estimates++ % grain == 0) 00261 { 00262 printf("."); 00263 fflush(stdout); 00264 } 00265 continue; 00266 } 00267 00268 if (line + markerNames[m].Length() + 1 > 79) 00269 printf("\n "), line = 3; 00270 00271 printf("%s ", (const char *) markerNames[m]); 00272 line += markerNames[m].Length() + 1; 00273 } 00274 00275 if (estimated) 00276 printf(condensed ? "\nDone estimating frequencies for %d markers\n\n" : "\n\n", estimates); 00277 } 00278 00279 int Pedigree::ComparePersons(const Person ** p1, const Person ** p2) 00280 { 00281 int result = SlowCompare((**p1).famid, (**p2).famid); 00282 00283 if (result != 0) return result; 00284 00285 return SlowCompare((**p1).pid, (**p2).pid); 00286 } 00287 00288 int Pedigree::CompareParents(const Person ** p1, const Person ** p2) 00289 { 00290 int result = SlowCompare((**p1).famid, (**p2).famid); 00291 00292 if (result) return result; 00293 00294 result = SlowCompare((**p1).fatid, (**p2).fatid); 00295 00296 if (result) return result; 00297 00298 return SlowCompare((**p1).motid, (**p2).motid); 00299 } 00300 00301 void Pedigree::Grow() 00302 { 00303 size *= 2; 00304 00305 Person ** temp = new Person * [size]; 00306 if (temp == NULL) error("Out of memory"); 00307 00308 for (int i=0; i<count; i++) 00309 temp[i] = persons[i]; 00310 00311 delete [] persons; 00312 persons = temp; 00313 } 00314 00315 void Pedigree::Add(Person & rhs) 00316 { 00317 if (count == size) 00318 Grow(); 00319 00320 persons[count] = new Person(); 00321 persons[count++]->Copy(rhs); 00322 } 00323 00324 void Pedigree::WriteDataFile(FILE * output) 00325 { 00326 // write in the following order: 00327 // markers, traits, affections, covariates 00328 00329 if (haveTwins) 00330 fprintf(output, " Z Zygosity\n"); 00331 00332 for (int m = 0; m < markerCount; m++) 00333 fprintf(output, " M %s\n", (const char *) markerNames[m]); 00334 00335 for (int t = 0; t < traitCount; t++) 00336 fprintf(output, " T %s\n", (const char *) traitNames[t]); 00337 00338 for (int a = 0; a < affectionCount; a++) 00339 fprintf(output, " A %s\n", (const char *) affectionNames[a]); 00340 00341 for (int c = 0; c < covariateCount; c++) 00342 fprintf(output, " C %s\n", (const char *) covariateNames[c]); 00343 00344 for (int s = 0; s < stringCount; s++) 00345 fprintf(output, " $ %s\n", (const char *) stringNames[s]); 00346 00347 fprintf(output, " E END-OF-DATA \n"); 00348 } 00349 00350 void Pedigree::WritePedigreeFile(FILE * output) 00351 { 00352 MarkerInfo ** info = new MarkerInfo * [markerCount]; 00353 00354 for (int i = 0; i < markerCount; i++) 00355 info[i] = GetMarkerInfo(i); 00356 00357 for (int i = 0; i < count; i++) 00358 WriteRecodedPerson(output, i, info); 00359 fprintf(output, "end\n"); 00360 00361 delete [] info; 00362 } 00363 00364 void Pedigree::WritePerson(FILE * output, int person, const char * famid, 00365 const char * pid, const char * fatid, const char * motid) 00366 { 00367 WriteRecodedPerson(output, person, NULL, famid, pid, fatid, motid); 00368 } 00369 00370 void Pedigree::WriteRecodedPerson( 00371 FILE * output, int person, MarkerInfo ** markerInfo, 00372 const char * famid, const char * pid, const char * fatid, 00373 const char * motid) 00374 { 00375 Person * p = persons[person]; 00376 00377 if (famid == NULL) famid = p->famid; 00378 if (pid == NULL) pid = p->pid; 00379 if (fatid == NULL) fatid = p->fatid; 00380 if (motid == NULL) motid = p->motid; 00381 00382 // write in the following order: 00383 // markers, traits, affections, covariates 00384 00385 fprintf(output, "%s\t%s\t%s\t%s\t%d\t", 00386 famid, pid, fatid, motid, p->sex); 00387 00388 const char * twinCodes[] = {"0", "MZ", "DZ"}; 00389 00390 if (haveTwins) 00391 { 00392 if (p->zygosity <= 2) 00393 fprintf(output, "%s\t", twinCodes[p->zygosity]); 00394 else 00395 fprintf(output, "%d\t", p->zygosity); 00396 } 00397 00398 for (int m = 0; m < markerCount; m++) 00399 if (markerInfo == NULL) 00400 fprintf(output, markerCount < 20 ? "%3d/%3d\t" : "%d/%d\t", 00401 p->markers[m][0], p->markers[m][1]); 00402 else 00403 fprintf(output, markerCount < 20 ? "%3s/%3s\t" : "%s/%s\t", 00404 (const char *) markerInfo[m]->GetAlleleLabel(p->markers[m][0]), 00405 (const char *) markerInfo[m]->GetAlleleLabel(p->markers[m][1])); 00406 00407 for (int t = 0; t < traitCount; t++) 00408 if (p->isPhenotyped(t)) 00409 fprintf(output, "%.3f\t", p->traits[t]); 00410 else 00411 fprintf(output, "x\t"); 00412 00413 for (int a = 0; a < affectionCount; a++) 00414 if (p->isDiagnosed(a)) 00415 fprintf(output, "%d\t", p->affections[a]); 00416 else 00417 fprintf(output, "x\t"); 00418 00419 for (int c = 0; c < covariateCount; c++) 00420 if (p->isControlled(c)) 00421 fprintf(output, "%.3f\t", p->covariates[c]); 00422 else 00423 fprintf(output, "x\t"); 00424 00425 for (int s = 0; s < stringCount; s++) 00426 if (!p->strings[s].IsEmpty()) 00427 fprintf(output, "%s\t", (const char *) p->strings[s]); 00428 else 00429 fprintf(output, ".\t"); 00430 00431 fprintf(output, "\n"); 00432 } 00433 00434 void Pedigree::WriteDataFile(const char * output) 00435 { 00436 FILE * f = fopen(output, "wt"); 00437 if (f == NULL) error("Couldn't open data file %s", output); 00438 WriteDataFile(f); 00439 fclose(f); 00440 } 00441 00442 void Pedigree::WritePedigreeFile(const char * output) 00443 { 00444 FILE * f = fopen(output, "wt"); 00445 if (f == NULL) error("Couldn't open pedigree file %s", output); 00446 WritePedigreeFile(f); 00447 fclose(f); 00448 } 00449 00450 void Pedigree::PrepareDichotomization() 00451 { 00452 00453 for (int t = 0; t < traitCount; t++) 00454 { 00455 String new_affection = traitNames[t] + "*"; 00456 GetAffectionID(new_affection); 00457 } 00458 } 00459 00460 int Pedigree::Dichotomize(int t, double mean) 00461 { 00462 String new_affection = traitNames[t] + "*"; 00463 00464 int af = GetAffectionID(new_affection); 00465 00466 if (mean == _NAN_) 00467 { 00468 mean = 0.0; 00469 double dcount = 0; 00470 for (int i = 0; i < count; i++) 00471 if (persons[i]->isPhenotyped(t) && 00472 !persons[i]->isFounder()) 00473 { 00474 mean += persons[i]->traits[t]; 00475 dcount ++; 00476 } 00477 00478 if (!dcount) return af; 00479 00480 mean /= dcount; 00481 } 00482 00483 printf("Dichotomizing %s around mean of %.3f ...\n", 00484 (const char *) traitNames[t], mean); 00485 00486 for (int i = 0; i < count; i++) 00487 if (persons[i]->isPhenotyped(t) && !persons[i]->isFounder()) 00488 persons[i]->affections[af] = persons[i]->traits[t] > mean ? 2 : 1; 00489 else 00490 persons[i]->affections[af] = 0; 00491 00492 Sort(); 00493 00494 return af; 00495 } 00496 00497 void Pedigree::DichotomizeAll(double mean) 00498 { 00499 for (int t = 0; t < traitCount; t++) 00500 Dichotomize(t, mean); 00501 } 00502 00503 bool Pedigree::InheritanceCheck(bool abortIfInconsistent) 00504 { 00505 bool fail = false; 00506 00507 if (haveTwins) fail |= TwinCheck(); 00508 00509 if (chromosomeX) 00510 fail |= SexLinkedCheck(); 00511 else 00512 fail |= AutosomalCheck(); 00513 00514 if (fail && abortIfInconsistent) 00515 error("Mendelian inheritance errors detected\n"); 00516 00517 return !fail; 00518 } 00519 00520 bool Pedigree::AutosomalCheck() 00521 { 00522 // Arrays indicating which alleles and homozygotes occur 00523 IntArray haplos, genos, counts, failedFamilies; 00524 00525 bool fail = false; 00526 00527 // For each marker ... 00528 for (int m = 0; m < markerCount; m++) 00529 { 00530 MarkerInfo * info = GetMarkerInfo(m); 00531 00532 // Summary for marker 00533 int alleleCount = CountAlleles(m); 00534 int genoCount = alleleCount * (alleleCount + 1) / 2; 00535 00536 // Initialize arrays 00537 haplos.Dimension(alleleCount + 1); 00538 haplos.Set(-1); 00539 00540 genos.Dimension(genoCount + 1); 00541 genos.Set(-1); 00542 00543 failedFamilies.Dimension(familyCount); 00544 failedFamilies.Zero(); 00545 00546 counts.Dimension(alleleCount + 1); 00547 00548 for (int f = 0; f < familyCount; f++) 00549 for (int i = families[f]->first; i <= families[f]->last; i++) 00550 if (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i]) 00551 { 00552 // This loop runs once per sibship 00553 Alleles fat = persons[i]->father->markers[m]; 00554 Alleles mot = persons[i]->mother->markers[m]; 00555 bool fgeno = fat.isKnown(); 00556 bool mgeno = mot.isKnown(); 00557 00558 // Number of alleles, homozygotes and genotypes in this sibship 00559 int haplo = 0, homo = 0, diplo = 0; 00560 00561 // No. of different genotypes per allele 00562 counts.Zero(); 00563 00564 // In general, there should be no more than 3 genotypes per allele 00565 bool too_many_genos = false; 00566 00567 for (int j = 0; j < persons[i]->sibCount; j++) 00568 if (persons[i]->sibs[j]->isGenotyped(m)) 00569 { 00570 Alleles geno = persons[i]->sibs[j]->markers[m]; 00571 00572 int fat1 = fat.hasAllele(geno.one); 00573 int fat2 = fat.hasAllele(geno.two); 00574 int mot1 = mot.hasAllele(geno.one); 00575 int mot2 = mot.hasAllele(geno.two); 00576 00577 if ((fgeno && mgeno && !((fat1 && mot2) || (fat2 && mot1))) || 00578 (fgeno && !(fat1 || fat2)) || (mgeno && !(mot1 || mot2))) 00579 { 00580 printf("%s - Fam %s: Child %s [%s/%s] has ", 00581 (const char *) markerNames[m], 00582 (const char *) persons[i]->sibs[j]->famid, 00583 (const char *) persons[i]->sibs[j]->pid, 00584 (const char *) info->GetAlleleLabel(geno.one), 00585 (const char *) info->GetAlleleLabel(geno.two)); 00586 00587 if (!fgeno || !mgeno) 00588 printf("%s [%s/%s]\n", 00589 fgeno ? "father" : "mother", 00590 (const char *) info->GetAlleleLabel(fgeno ? fat.one : mot.one), 00591 (const char *) info->GetAlleleLabel(fgeno ? fat.two : mot.two)); 00592 else 00593 printf("parents [%s/%s]*[%s/%s]\n", 00594 (const char *) info->GetAlleleLabel(fat.one), 00595 (const char *) info->GetAlleleLabel(fat.two), 00596 (const char *) info->GetAlleleLabel(mot.one), 00597 (const char *) info->GetAlleleLabel(mot.two)); 00598 00599 fail = true; 00600 failedFamilies[f] = true; 00601 } 00602 else 00603 { 00604 if (haplos[geno.one] != i) 00605 { 00606 haplo++; 00607 haplos[geno.one] = i; 00608 }; 00609 if (haplos[geno.two] != i) 00610 { 00611 haplo++; 00612 haplos[geno.two] = i; 00613 }; 00614 00615 int index = geno.SequenceCoded(); 00616 00617 if (genos[index] != i) 00618 { 00619 genos[index] = i; 00620 diplo++; 00621 counts[geno.one]++; 00622 if (geno.isHomozygous()) 00623 homo++; 00624 else 00625 counts[geno.two]++; 00626 if (counts[geno.one] > 2) too_many_genos = true; 00627 if (counts[geno.two] > 2) too_many_genos = true; 00628 } 00629 } 00630 } 00631 00632 if (fgeno) 00633 { 00634 if (haplos[fat.one] != i) 00635 { 00636 haplo++; 00637 haplos[fat.one] = i; 00638 } 00639 if (haplos[fat.two] != i) 00640 { 00641 haplo++; 00642 haplos[fat.two] = i; 00643 } 00644 homo += fat.isHomozygous(); 00645 } 00646 00647 if (mgeno) 00648 { 00649 if (haplos[mot.one] != i) 00650 { 00651 haplo++; 00652 haplos[mot.one] = i; 00653 } 00654 if (haplos[mot.two] != i) 00655 { 00656 haplo++; 00657 haplos[mot.two] = i; 00658 } 00659 homo += mot.isHomozygous(); 00660 } 00661 00662 if (diplo > 4 || haplo + homo > 4 || (haplo == 4 && too_many_genos)) 00663 { 00664 printf("%s - Fam %s: ", 00665 (const char *) markerNames[m], 00666 (const char *) persons[i]->famid); 00667 if (persons[i]->father->markers[m].isKnown()) 00668 printf("Father %s [%s/%s] has children [", 00669 (const char *) persons[i]->father->pid, 00670 (const char *) info->GetAlleleLabel(fat.one), 00671 (const char *) info->GetAlleleLabel(fat.two)); 00672 else if (persons[i]->mother->markers[m].isKnown()) 00673 printf("Mother %s [%s/%s] has children [", 00674 (const char *) persons[i]->mother->pid, 00675 (const char *) info->GetAlleleLabel(mot.one), 00676 (const char *) info->GetAlleleLabel(mot.two)); 00677 else 00678 printf("Couple %s * %s has children [", 00679 (const char *) persons[i]->mother->pid, 00680 (const char *) persons[i]->father->pid); 00681 00682 for (int j = 0; j < persons[i]->sibCount; j++) 00683 printf("%s%s/%s", j == 0 ? "" : " ", 00684 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].one), 00685 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].two)); 00686 printf("]\n"); 00687 00688 fail = true; 00689 failedFamilies[f] = true; 00690 } 00691 } 00692 00693 for (int f = 0; f < familyCount; f++) 00694 if (!failedFamilies[f] && 00695 (families[f]->count > families[f]->founders + 1) && 00696 !families[f]->isNuclear()) 00697 fail |= !GenotypeList::EliminateGenotypes(*this, families[f], m); 00698 } 00699 00700 if (fail) 00701 printf("\nMendelian inheritance errors detected\n"); 00702 00703 return fail; 00704 } 00705 00706 bool Pedigree::SexLinkedCheck() 00707 { 00708 bool fail = false; 00709 00710 // Keep track of what families fail the basic inheritance check, 00711 // so that we can run later run genotype elimination check on the remainder 00712 IntArray failedFamilies(familyCount); 00713 00714 // For each marker ... 00715 for (int m = 0; m < markerCount; m++) 00716 { 00717 MarkerInfo * info = GetMarkerInfo(m); 00718 00719 failedFamilies.Zero(); 00720 00721 // Check for homozygous males 00722 for (int f = 0; f < familyCount; f++) 00723 for (int i = families[f]->first; i <= families[f]->last; i++) 00724 if (persons[i]->sex == SEX_MALE && persons[i]->markers[m].isKnown() && 00725 !persons[i]->markers[m].isHomozygous()) 00726 { 00727 printf("%s - Fam %s: Male %s has two X alleles [%s/%s]\n", 00728 (const char *) markerNames[m], 00729 (const char *) persons[i]->famid, (const char *) persons[i]->pid, 00730 (const char *) info->GetAlleleLabel(persons[i]->markers[m].one), 00731 (const char *) info->GetAlleleLabel(persons[i]->markers[m].two)); 00732 00733 // Wipe this genotype so we don't get cascading errors below 00734 persons[i]->markers[m][0] = persons[i]->markers[m][1] = 0; 00735 00736 fail = true; 00737 failedFamilies[f] = true; 00738 } 00739 00740 // Check full sibships for errors 00741 // TODO -- We could do better by grouping male half-sibs 00742 for (int f = 0; f < familyCount; f++) 00743 for (int i = families[f]->first; i <= families[f]->last; i++) 00744 if (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i]) 00745 { 00746 // This loop runs once per sibship 00747 Alleles fat = persons[i]->father->markers[m]; 00748 Alleles mot = persons[i]->mother->markers[m]; 00749 00750 bool fgeno = fat.isKnown(); 00751 bool mgeno = mot.isKnown(); 00752 00753 Alleles inferred_mother = mot; 00754 Alleles first_sister; 00755 Alleles inferred_father; 00756 00757 bool mother_ok = true; 00758 00759 int sisters = 0; 00760 00761 for (int j = 0; j < persons[i]->sibCount; j++) 00762 if (persons[i]->sibs[j]->isGenotyped(m)) 00763 { 00764 Alleles geno = persons[i]->sibs[j]->markers[m]; 00765 00766 bool fat1 = fat.hasAllele(geno.one); 00767 bool fat2 = fat.hasAllele(geno.two); 00768 bool mot1 = mot.hasAllele(geno.one); 00769 bool mot2 = mot.hasAllele(geno.two); 00770 00771 int sex = persons[i]->sibs[j]->sex; 00772 00773 if (sex == SEX_MALE) 00774 { 00775 if (mgeno && !mot1) 00776 { 00777 printf("%s - Fam %s: Child %s [%s/Y] has mother [%s/%s]\n", 00778 (const char *) markerNames[m], 00779 (const char *) persons[i]->famid, 00780 (const char *) persons[i]->sibs[j]->pid, 00781 (const char *) info->GetAlleleLabel(geno.one), 00782 (const char *) info->GetAlleleLabel(mot.one), 00783 (const char *) info->GetAlleleLabel(mot.two)); 00784 fail = true; 00785 failedFamilies[f] = true; 00786 } 00787 else 00788 mother_ok &= inferred_mother.AddAllele(geno.one); 00789 } 00790 if (sex == SEX_FEMALE) 00791 { 00792 if ((fgeno && mgeno && !((fat1 && mot2) || (fat2 && mot1))) || 00793 (fgeno && !(fat1 || fat2)) || (mgeno && !(mot1 || mot2))) 00794 { 00795 printf("%s - Fam %s: Child %s [%s/%s] has ", 00796 (const char *) markerNames[m], 00797 (const char *) persons[i]->famid, 00798 (const char *) persons[i]->sibs[j]->pid, 00799 (const char *) info->GetAlleleLabel(geno.one), 00800 (const char *) info->GetAlleleLabel(geno.two)); 00801 00802 if (!fgeno) 00803 printf("mother [%s/%s]\n", 00804 (const char *) info->GetAlleleLabel(mot.one), 00805 (const char *) info->GetAlleleLabel(mot.two)); 00806 else if (!mgeno) 00807 printf("father [%s/Y]\n", 00808 (const char *) info->GetAlleleLabel(fat.one)); 00809 else 00810 printf("parents [%s/Y]*[%s/%s]\n", 00811 (const char *) info->GetAlleleLabel(fat.one), 00812 (const char *) info->GetAlleleLabel(mot.one), 00813 (const char *) info->GetAlleleLabel(mot.two)); 00814 00815 fail = true; 00816 failedFamilies[f] = true; 00817 } 00818 else 00819 { 00820 if (!sisters++) 00821 inferred_father = first_sister = geno; 00822 else if (first_sister != geno) 00823 { 00824 inferred_father.Intersect(geno); 00825 00826 mother_ok &= inferred_mother.AddAllele( 00827 geno.otherAllele(inferred_father.one)); 00828 mother_ok &= inferred_mother.AddAllele( 00829 first_sister.otherAllele(inferred_father.one)); 00830 } 00831 00832 if (!fgeno && (mot1 ^ mot2)) 00833 inferred_father.Intersect(mot1 ? geno.two : geno.one); 00834 00835 if (!mgeno && (fat1 ^ fat2)) 00836 mother_ok &= inferred_mother.AddAllele(fat1 ? geno.two : geno.one); 00837 } 00838 } 00839 } 00840 00841 if (!mother_ok || (sisters && !inferred_father.isKnown())) 00842 { 00843 printf("%s - Fam %s: ", 00844 (const char *) markerNames[m], 00845 (const char *) persons[i]->famid); 00846 if (fgeno) 00847 printf("Father %s [%s/Y] has children [", 00848 (const char *) persons[i]->father->pid, 00849 (const char *) info->GetAlleleLabel(fat.one)); 00850 else if (mgeno) 00851 printf("Mother %s [%s/%s] has children [", 00852 (const char *) persons[i]->mother->pid, 00853 (const char *) info->GetAlleleLabel(mot.one), 00854 (const char *) info->GetAlleleLabel(mot.two)); 00855 else 00856 printf("Couple %s * %s has children [", 00857 (const char *) persons[i]->mother->pid, 00858 (const char *) persons[i]->father->pid); 00859 00860 for (int j = 0; j < persons[i]->sibCount; j++) 00861 printf( 00862 persons[i]->sibs[j]->sex == SEX_MALE ? "%s%s/Y" : "%s%s/%s", 00863 j == 0 ? "" : " ", 00864 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].one), 00865 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].two)); 00866 printf("]\n"); 00867 fail = true; 00868 failedFamilies[f] = true; 00869 } 00870 } 00871 00872 for (int f = 0; f < familyCount; f++) 00873 if (!failedFamilies[f] && 00874 (families[f]->count > families[f]->founders + 1) && 00875 !families[f]->isNuclear()) 00876 fail |= !GenotypeList::EliminateGenotypes(*this, families[f], m); 00877 } 00878 00879 if (fail) 00880 printf("\nMendelian inheritance errors detected\n"); 00881 00882 return fail; 00883 } 00884 00885 void Pedigree::ExtractFamily(int id, Pedigree & single_fam_ped) 00886 { 00887 for (int i = families[id]->first; i <= families[id]->last; i++) 00888 single_fam_ped.Add(*persons[i]); 00889 00890 single_fam_ped.Sort(); 00891 } 00892 00893 void Pedigree::ExtractOnAffection(int a, Pedigree & new_ped, int target_status) 00894 { 00895 for (int i = 0; i < count; i++) 00896 if (persons[i]->affections[a] == target_status) 00897 new_ped.Add(*persons[i]); 00898 else 00899 { 00900 Person blank_person; 00901 blank_person.CopyIDs(*persons[i]); 00902 new_ped.Add(blank_person); 00903 } 00904 00905 new_ped.Sort(); 00906 } 00907 00908 void Pedigree::Filter(IntArray & filter) 00909 { 00910 if (filter.Length() != count) 00911 error("Pedigree:Size of pedigree filter doesn't match number of persons in pedigree"); 00912 00913 for (int i = 0; i < count; i++) 00914 if (filter[i] == 1) 00915 { 00916 persons[i]->WipePhenotypes(); 00917 persons[i]->filter = true; 00918 } 00919 } 00920 00921 void Pedigree::AddPerson(const char * famid, const char * pid, 00922 const char * fatid, const char * motid, 00923 int sex, bool delay_sort) 00924 { 00925 if (count == size) Grow(); 00926 00927 persons[count] = new Person; 00928 00929 persons[count]->famid = famid; 00930 persons[count]->pid = pid; 00931 persons[count]->fatid = fatid; 00932 persons[count]->motid = motid; 00933 persons[count]->sex = sex; 00934 00935 count++; 00936 00937 if (!delay_sort) Sort(); 00938 } 00939 00940 void Pedigree::ShowMemoryInfo() 00941 { 00942 unsigned int bytes = 0; 00943 00944 for (int i = 0; i < count; i++) 00945 bytes += persons[i]->famid.BufferSize() + persons[i]->pid.BufferSize() + 00946 persons[i]->fatid.BufferSize() + persons[i]->motid.BufferSize(); 00947 00948 bytes += count * (markerCount * sizeof(Alleles) + traitCount * sizeof(double) + 00949 covariateCount * sizeof(double) + affectionCount * sizeof(char) + 00950 sizeof(Person)); 00951 00952 printf(" %40s %s\n", "Pedigree file ...", (const char *) MemoryInfo(bytes)); 00953 } 00954 00955