libStatGen Software  1
Pedigree.cpp
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "Pedigree.h"
00019 #include "GenotypeLists.h"
00020 #include "MemoryInfo.h"
00021 #include "Constant.h"
00022 #include "Error.h"
00023 #include "Sort.h"
00024 
00025 #include <stdlib.h>
00026 
00027 bool   Pedigree::sexAsCovariate = false;
00028 String Pedigree::missing("-99.999");
00029 
00030 Pedigree::Pedigree() : pd()
00031 {
00032     haveTwins = count = 0;
00033     size = 10000;
00034     persons = new Person *[size];
00035     familyCount = 0;
00036     families = new Family * [1];
00037     multiPd = NULL;
00038     multiFileCount = 0;
00039 }
00040 
00041 Pedigree::~Pedigree()
00042 {
00043     for (int i = 0; i < count; i++)
00044         delete persons[i];
00045 
00046     for (int i = 0; i < familyCount; i++)
00047         delete families[i];
00048 
00049     delete [] families;
00050     delete [] persons;
00051 
00052     if (multiPd != NULL)
00053         delete [] multiPd;
00054 }
00055 
00056 void Pedigree::Sort()
00057 {
00058     QuickSort(persons, count, sizeof(Person *),
00059               COMPAREFUNC Pedigree::ComparePersons);
00060 
00061     haveTwins = 0;
00062 
00063     // Check for structural problems in input pedigree
00064     bool problem = false;
00065 
00066     // Check that we have no duplicates...
00067     for (int i = 1; i < count; i++)
00068         if (ComparePersons((const Person **) &persons[i-1],
00069                            (const Person **) &persons[i]) == 0)
00070         {
00071             printf("Family %s: Person %s is duplicated\n",
00072                    (const char *) persons[i]->famid,
00073                    (const char *) persons[i]->pid);
00074             problem = true;
00075 
00076             do
00077             {
00078                 i++;
00079             }
00080             while (i < count &&
00081                     ComparePersons((const Person **) &persons[i-1],
00082                                    (const Person **) &persons[i]) == 0);
00083         }
00084 
00085     // Assign parents...
00086     for (int i = 0; i < count; i++)
00087     {
00088         persons[i]->serial = i;
00089         persons[i]->father = FindPerson(persons[i]->famid, persons[i]->fatid);
00090         persons[i]->mother = FindPerson(persons[i]->famid, persons[i]->motid);
00091 
00092         problem |= !persons[i]->CheckParents();
00093 
00094         persons[i]->AssessStatus();
00095 
00096         // Check if we have any twins...
00097         haveTwins |= persons[i]->zygosity;
00098     }
00099 
00100     if (problem)
00101         error("Please correct problems with pedigree structure\n");
00102 
00103     MakeSibships();
00104     MakeFamilies();
00105 }
00106 
00107 void Pedigree::MakeSibships()
00108 {
00109     Person ** sibs = new Person * [count];
00110     for (int i = 0; i < count; i++)
00111         sibs[i] = persons[i];
00112 
00113     QuickSort(sibs, count, sizeof(Person *),
00114               COMPAREFUNC Pedigree::CompareParents);
00115 
00116     for (int first = 0; first < count; first++)
00117         if (!sibs[first]->isFounder())
00118         {
00119             int last = first + 1;
00120             while (last < count)
00121                 if (sibs[first]-> mother != sibs[last]->mother ||
00122                         sibs[first]-> father != sibs[last]->father)
00123                     break;
00124                 else last++;
00125             last --;
00126 
00127             for (int j = first; j <= last; j++)
00128             {
00129                 if (sibs[j]->sibCount) delete [] sibs[j]->sibs;
00130                 sibs[j]->sibCount = last - first + 1;
00131                 sibs[j]->sibs = new Person * [sibs[j]->sibCount];
00132                 for (int k = first; k <= last; k++)
00133                     sibs[j]->sibs[k - first] = sibs[k];
00134             }
00135             first = last;
00136         }
00137     delete [] sibs;
00138 }
00139 
00140 void Pedigree::MakeFamilies()
00141 {
00142     for (int i = 0; i < familyCount; i++)
00143         delete families[i];
00144     delete [] families;
00145 
00146     familyCount = 0;
00147     families = new Family * [count];
00148 
00149     for (int first=0; first < count; first++)
00150     {
00151         int last = first;
00152         while (last < count)
00153             if (SlowCompare(persons[first]->famid, persons[last]->famid) == 0)
00154                 last++;
00155             else break;
00156 
00157         families[familyCount] = new Family(*this, first, --last, familyCount);
00158 
00159         first = last;
00160         familyCount++;
00161     }
00162 }
00163 
00164 // Utility functions for finding a person in a pedigree
00165 
00166 struct PedigreeKey
00167 {
00168     const char * famid;
00169     const char * pid;
00170 };
00171 
00172 int CompareKeyToPerson(PedigreeKey * key, Person ** p)
00173 {
00174     int result = SlowCompare(key->famid, (**p).famid);
00175 
00176     if (result != 0)
00177         return result;
00178 
00179     return SlowCompare(key->pid, (**p).pid);
00180 }
00181 
00182 int CompareKeyToFamily(PedigreeKey * key, Family ** f)
00183 {
00184     return SlowCompare(key->famid, (**f).famid);
00185 }
00186 
00187 Person * Pedigree::FindPerson(const char * famid, const char * pid)
00188 {
00189     PedigreeKey key;
00190     key.famid = famid;
00191     key.pid   = pid;
00192 
00193     Person ** result = (Person **) BinarySearch
00194                        (&key, persons, count, sizeof(Person *),
00195                         COMPAREFUNC CompareKeyToPerson);
00196 
00197     return (result == NULL) ? (Person *) NULL : *result;
00198 }
00199 
00200 Person * Pedigree::FindPerson(const char *famid, const char *pid, int universe)
00201 {
00202     PedigreeKey key;
00203     key.famid = famid;
00204     key.pid   = pid;
00205 
00206     Person ** result = (Person **) BinarySearch
00207                        (&key, persons, universe, sizeof(Person *),
00208                         COMPAREFUNC CompareKeyToPerson);
00209 
00210     return (result == NULL) ? (Person *) NULL : *result;
00211 }
00212 
00213 Family * Pedigree::FindFamily(const char * famid)
00214 {
00215     PedigreeKey key;
00216     key.famid = famid;
00217 
00218     Family ** result = (Family **) BinarySearch
00219                        (&key, families, familyCount, sizeof(Family *),
00220                         COMPAREFUNC CompareKeyToFamily);
00221 
00222     return (result == NULL) ? (Family *) NULL : *result;
00223 }
00224 
00225 int Pedigree::CountAlleles(int marker)
00226 {
00227     return ::CountAlleles(*this, marker);
00228 }
00229 
00230 void Pedigree::LumpAlleles(double min, bool reorder)
00231 {
00232     if (min > 0.0)
00233         printf("Lumping alleles with frequencies of %.2f or less...\n\n", min);
00234 
00235     for (int m=0; m < markerCount; m++)
00236         ::LumpAlleles(*this, m, min, reorder);
00237 }
00238 
00239 void Pedigree::EstimateFrequencies(int estimator, bool quiet)
00240 {
00241     bool estimated = false;
00242     int  line = 3;
00243 
00244     const char * estimators[] = 
00245         { "using all genotypes", "using founder genotypes", "assumed equal" };
00246 
00247     bool condensed = markerCount > 100;
00248     int  grain = markerCount / 50, estimates = 0;
00249 
00250     for (int m=0; m < markerCount; m++)
00251         if (::EstimateFrequencies(*this, m, estimator))
00252             if (!quiet)
00253             {
00254                 if (!estimated)
00255                     printf("Estimating allele frequencies... [%s]\n   ",
00256                            estimators[estimator]), estimated = true;
00257 
00258                 if (condensed)
00259                 {
00260                     if (estimates++ % grain == 0)
00261                     {
00262                         printf(".");
00263                         fflush(stdout);
00264                     }
00265                     continue;
00266                 }
00267 
00268                 if (line + markerNames[m].Length() + 1 > 79)
00269                     printf("\n   "), line = 3;
00270 
00271                 printf("%s ", (const char *) markerNames[m]);
00272                 line += markerNames[m].Length() + 1;
00273             }
00274 
00275     if (estimated)
00276         printf(condensed ? "\nDone estimating frequencies for %d markers\n\n" : "\n\n", estimates);
00277 }
00278 
00279 int Pedigree::ComparePersons(const Person ** p1, const Person ** p2)
00280 {
00281     int result = SlowCompare((**p1).famid, (**p2).famid);
00282 
00283     if (result != 0) return result;
00284 
00285     return SlowCompare((**p1).pid, (**p2).pid);
00286 }
00287 
00288 int Pedigree::CompareParents(const Person ** p1, const Person ** p2)
00289 {
00290     int result = SlowCompare((**p1).famid, (**p2).famid);
00291 
00292     if (result) return result;
00293 
00294     result = SlowCompare((**p1).fatid, (**p2).fatid);
00295 
00296     if (result) return result;
00297 
00298     return SlowCompare((**p1).motid, (**p2).motid);
00299 }
00300 
00301 void Pedigree::Grow()
00302 {
00303     size *= 2;
00304 
00305     Person ** temp = new Person * [size];
00306     if (temp == NULL) error("Out of memory");
00307 
00308     for (int i=0; i<count; i++)
00309         temp[i] = persons[i];
00310 
00311     delete [] persons;
00312     persons = temp;
00313 }
00314 
00315 void Pedigree::Add(Person & rhs)
00316 {
00317     if (count == size)
00318         Grow();
00319 
00320     persons[count] = new Person();
00321     persons[count++]->Copy(rhs);
00322 }
00323 
00324 void Pedigree::WriteDataFile(FILE * output)
00325 {
00326     // write in the following order:
00327     // markers, traits, affections, covariates
00328 
00329     if (haveTwins)
00330         fprintf(output, " Z  Zygosity\n");
00331 
00332     for (int m = 0; m < markerCount; m++)
00333         fprintf(output, " M  %s\n", (const char *) markerNames[m]);
00334 
00335     for (int t = 0; t < traitCount; t++)
00336         fprintf(output, " T  %s\n", (const char *) traitNames[t]);
00337 
00338     for (int a = 0; a < affectionCount; a++)
00339         fprintf(output, " A  %s\n", (const char *) affectionNames[a]);
00340 
00341     for (int c = 0; c < covariateCount; c++)
00342         fprintf(output, " C  %s\n", (const char *) covariateNames[c]);
00343 
00344     for (int s = 0; s < stringCount; s++)
00345         fprintf(output, " $  %s\n", (const char *) stringNames[s]);
00346 
00347     fprintf(output, " E  END-OF-DATA \n");
00348 }
00349 
00350 void Pedigree::WritePedigreeFile(FILE * output)
00351 {
00352     MarkerInfo ** info = new MarkerInfo * [markerCount];
00353 
00354     for (int i = 0; i < markerCount; i++)
00355         info[i] = GetMarkerInfo(i);
00356 
00357     for (int i = 0; i < count; i++)
00358         WriteRecodedPerson(output, i, info);
00359     fprintf(output, "end\n");
00360 
00361     delete [] info;
00362 }
00363 
00364 void Pedigree::WritePerson(FILE * output, int person, const char * famid,
00365                            const char * pid, const char * fatid, const char * motid)
00366 {
00367     WriteRecodedPerson(output, person, NULL, famid, pid, fatid, motid);
00368 }
00369 
00370 void Pedigree::WriteRecodedPerson(
00371     FILE * output, int person, MarkerInfo ** markerInfo,
00372     const char * famid, const char * pid, const char * fatid,
00373     const char * motid)
00374 {
00375     Person * p = persons[person];
00376 
00377     if (famid == NULL) famid = p->famid;
00378     if (pid == NULL)   pid = p->pid;
00379     if (fatid == NULL) fatid = p->fatid;
00380     if (motid == NULL) motid = p->motid;
00381 
00382     // write in the following order:
00383     // markers, traits, affections, covariates
00384 
00385     fprintf(output, "%s\t%s\t%s\t%s\t%d\t",
00386             famid, pid, fatid, motid, p->sex);
00387 
00388     const char * twinCodes[] = {"0", "MZ", "DZ"};
00389 
00390     if (haveTwins)
00391     {
00392         if (p->zygosity <= 2)
00393             fprintf(output, "%s\t", twinCodes[p->zygosity]);
00394         else
00395             fprintf(output, "%d\t", p->zygosity);
00396     }
00397 
00398     for (int m = 0; m < markerCount; m++)
00399         if (markerInfo == NULL)
00400             fprintf(output, markerCount < 20 ? "%3d/%3d\t" : "%d/%d\t",
00401                     p->markers[m][0], p->markers[m][1]);
00402         else
00403             fprintf(output, markerCount < 20 ? "%3s/%3s\t" : "%s/%s\t",
00404                     (const char *) markerInfo[m]->GetAlleleLabel(p->markers[m][0]),
00405                     (const char *) markerInfo[m]->GetAlleleLabel(p->markers[m][1]));
00406 
00407     for (int t = 0; t < traitCount; t++)
00408         if (p->isPhenotyped(t))
00409             fprintf(output, "%.3f\t", p->traits[t]);
00410         else
00411             fprintf(output, "x\t");
00412 
00413     for (int a = 0; a < affectionCount; a++)
00414         if (p->isDiagnosed(a))
00415             fprintf(output, "%d\t", p->affections[a]);
00416         else
00417             fprintf(output, "x\t");
00418 
00419     for (int c = 0; c < covariateCount; c++)
00420         if (p->isControlled(c))
00421             fprintf(output, "%.3f\t", p->covariates[c]);
00422         else
00423             fprintf(output, "x\t");
00424 
00425     for (int s = 0; s < stringCount; s++)
00426         if (!p->strings[s].IsEmpty())
00427             fprintf(output, "%s\t", (const char *) p->strings[s]);
00428         else
00429             fprintf(output, ".\t");
00430 
00431     fprintf(output, "\n");
00432 }
00433 
00434 void Pedigree::WriteDataFile(const char * output)
00435 {
00436     FILE * f = fopen(output, "wt");
00437     if (f == NULL) error("Couldn't open data file %s", output);
00438     WriteDataFile(f);
00439     fclose(f);
00440 }
00441 
00442 void Pedigree::WritePedigreeFile(const char * output)
00443 {
00444     FILE * f = fopen(output, "wt");
00445     if (f == NULL) error("Couldn't open pedigree file %s", output);
00446     WritePedigreeFile(f);
00447     fclose(f);
00448 }
00449 
00450 void Pedigree::PrepareDichotomization()
00451 {
00452 
00453     for (int t = 0; t < traitCount; t++)
00454     {
00455         String new_affection = traitNames[t] + "*";
00456         GetAffectionID(new_affection);
00457     }
00458 }
00459 
00460 int Pedigree::Dichotomize(int t, double mean)
00461 {
00462     String new_affection = traitNames[t] + "*";
00463 
00464     int af = GetAffectionID(new_affection);
00465 
00466     if (mean == _NAN_)
00467     {
00468         mean  = 0.0;
00469         double dcount = 0;
00470         for (int i = 0; i < count; i++)
00471             if (persons[i]->isPhenotyped(t) &&
00472                     !persons[i]->isFounder())
00473             {
00474                 mean += persons[i]->traits[t];
00475                 dcount ++;
00476             }
00477 
00478         if (!dcount) return af;
00479 
00480         mean /= dcount;
00481     }
00482 
00483     printf("Dichotomizing %s around mean of %.3f ...\n",
00484            (const char *) traitNames[t], mean);
00485 
00486     for (int i = 0; i < count; i++)
00487         if (persons[i]->isPhenotyped(t) && !persons[i]->isFounder())
00488             persons[i]->affections[af] = persons[i]->traits[t] > mean ? 2 : 1;
00489         else
00490             persons[i]->affections[af] = 0;
00491 
00492     Sort();
00493 
00494     return af;
00495 }
00496 
00497 void Pedigree::DichotomizeAll(double mean)
00498 {
00499     for (int t = 0; t < traitCount; t++)
00500         Dichotomize(t, mean);
00501 }
00502 
00503 bool Pedigree::InheritanceCheck(bool abortIfInconsistent)
00504 {
00505     bool fail = false;
00506 
00507     if (haveTwins) fail |= TwinCheck();
00508 
00509     if (chromosomeX)
00510         fail |= SexLinkedCheck();
00511     else
00512         fail |= AutosomalCheck();
00513 
00514     if (fail && abortIfInconsistent)
00515         error("Mendelian inheritance errors detected\n");
00516 
00517     return !fail;
00518 }
00519 
00520 bool Pedigree::AutosomalCheck()
00521 {
00522     // Arrays indicating which alleles and homozygotes occur
00523     IntArray haplos, genos, counts, failedFamilies;
00524 
00525     bool fail = false;
00526 
00527     // For each marker ...
00528     for (int m = 0; m < markerCount; m++)
00529     {
00530         MarkerInfo * info = GetMarkerInfo(m);
00531 
00532         // Summary for marker
00533         int alleleCount = CountAlleles(m);
00534         int genoCount = alleleCount * (alleleCount + 1) / 2;
00535 
00536         // Initialize arrays
00537         haplos.Dimension(alleleCount + 1);
00538         haplos.Set(-1);
00539 
00540         genos.Dimension(genoCount + 1);
00541         genos.Set(-1);
00542 
00543         failedFamilies.Dimension(familyCount);
00544         failedFamilies.Zero();
00545 
00546         counts.Dimension(alleleCount + 1);
00547 
00548         for (int f = 0; f < familyCount; f++)
00549             for (int i = families[f]->first; i <= families[f]->last; i++)
00550                 if (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i])
00551                 {
00552                     // This loop runs once per sibship
00553                     Alleles fat = persons[i]->father->markers[m];
00554                     Alleles mot = persons[i]->mother->markers[m];
00555                     bool    fgeno = fat.isKnown();
00556                     bool    mgeno = mot.isKnown();
00557 
00558                     // Number of alleles, homozygotes and genotypes in this sibship
00559                     int haplo = 0, homo = 0, diplo = 0;
00560 
00561                     // No. of different genotypes per allele
00562                     counts.Zero();
00563 
00564                     // In general, there should be no more than 3 genotypes per allele
00565                     bool too_many_genos = false;
00566 
00567                     for (int j = 0; j < persons[i]->sibCount; j++)
00568                         if (persons[i]->sibs[j]->isGenotyped(m))
00569                         {
00570                             Alleles geno = persons[i]->sibs[j]->markers[m];
00571 
00572                             int fat1 = fat.hasAllele(geno.one);
00573                             int fat2 = fat.hasAllele(geno.two);
00574                             int mot1 = mot.hasAllele(geno.one);
00575                             int mot2 = mot.hasAllele(geno.two);
00576 
00577                             if ((fgeno && mgeno && !((fat1 && mot2) || (fat2 && mot1))) ||
00578                                     (fgeno && !(fat1 || fat2)) || (mgeno && !(mot1 || mot2)))
00579                             {
00580                                 printf("%s - Fam %s: Child %s [%s/%s] has ",
00581                                        (const char *) markerNames[m],
00582                                        (const char *) persons[i]->sibs[j]->famid,
00583                                        (const char *) persons[i]->sibs[j]->pid,
00584                                        (const char *) info->GetAlleleLabel(geno.one),
00585                                        (const char *) info->GetAlleleLabel(geno.two));
00586 
00587                                 if (!fgeno || !mgeno)
00588                                     printf("%s [%s/%s]\n",
00589                                            fgeno ? "father" : "mother",
00590                                            (const char *) info->GetAlleleLabel(fgeno ? fat.one : mot.one),
00591                                            (const char *) info->GetAlleleLabel(fgeno ? fat.two : mot.two));
00592                                 else
00593                                     printf("parents [%s/%s]*[%s/%s]\n",
00594                                            (const char *) info->GetAlleleLabel(fat.one),
00595                                            (const char *) info->GetAlleleLabel(fat.two),
00596                                            (const char *) info->GetAlleleLabel(mot.one),
00597                                            (const char *) info->GetAlleleLabel(mot.two));
00598 
00599                                 fail = true;
00600                                 failedFamilies[f] = true;
00601                             }
00602                             else
00603                             {
00604                                 if (haplos[geno.one] != i)
00605                                 {
00606                                     haplo++;
00607                                     haplos[geno.one] = i;
00608                                 };
00609                                 if (haplos[geno.two] != i)
00610                                 {
00611                                     haplo++;
00612                                     haplos[geno.two] = i;
00613                                 };
00614 
00615                                 int index = geno.SequenceCoded();
00616 
00617                                 if (genos[index] != i)
00618                                 {
00619                                     genos[index] = i;
00620                                     diplo++;
00621                                     counts[geno.one]++;
00622                                     if (geno.isHomozygous())
00623                                         homo++;
00624                                     else
00625                                         counts[geno.two]++;
00626                                     if (counts[geno.one] > 2) too_many_genos = true;
00627                                     if (counts[geno.two] > 2) too_many_genos = true;
00628                                 }
00629                             }
00630                         }
00631 
00632                     if (fgeno)
00633                     {
00634                         if (haplos[fat.one] != i)
00635                         {
00636                             haplo++;
00637                             haplos[fat.one] = i;
00638                         }
00639                         if (haplos[fat.two] != i)
00640                         {
00641                             haplo++;
00642                             haplos[fat.two] = i;
00643                         }
00644                         homo += fat.isHomozygous();
00645                     }
00646 
00647                     if (mgeno)
00648                     {
00649                         if (haplos[mot.one] != i)
00650                         {
00651                             haplo++;
00652                             haplos[mot.one] = i;
00653                         }
00654                         if (haplos[mot.two] != i)
00655                         {
00656                             haplo++;
00657                             haplos[mot.two] = i;
00658                         }
00659                         homo += mot.isHomozygous();
00660                     }
00661 
00662                     if (diplo > 4 || haplo + homo > 4 || (haplo == 4 && too_many_genos))
00663                     {
00664                         printf("%s - Fam %s: ",
00665                                (const char *) markerNames[m],
00666                                (const char *) persons[i]->famid);
00667                         if (persons[i]->father->markers[m].isKnown())
00668                             printf("Father %s [%s/%s] has children [",
00669                                    (const char *) persons[i]->father->pid,
00670                                    (const char *) info->GetAlleleLabel(fat.one),
00671                                    (const char *) info->GetAlleleLabel(fat.two));
00672                         else if (persons[i]->mother->markers[m].isKnown())
00673                             printf("Mother %s [%s/%s] has children [",
00674                                    (const char *) persons[i]->mother->pid,
00675                                    (const char *) info->GetAlleleLabel(mot.one),
00676                                    (const char *) info->GetAlleleLabel(mot.two));
00677                         else
00678                             printf("Couple %s * %s has children [",
00679                                    (const char *) persons[i]->mother->pid,
00680                                    (const char *) persons[i]->father->pid);
00681 
00682                         for (int j = 0; j < persons[i]->sibCount; j++)
00683                             printf("%s%s/%s", j == 0 ? "" : " ",
00684                                    (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].one),
00685                                    (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].two));
00686                         printf("]\n");
00687 
00688                         fail = true;
00689                         failedFamilies[f] = true;
00690                     }
00691                 }
00692 
00693         for (int f = 0; f < familyCount; f++)
00694             if (!failedFamilies[f] &&
00695                     (families[f]->count > families[f]->founders + 1) &&
00696                     !families[f]->isNuclear())
00697                 fail |= !GenotypeList::EliminateGenotypes(*this, families[f], m);
00698     }
00699 
00700     if (fail)
00701         printf("\nMendelian inheritance errors detected\n");
00702 
00703     return fail;
00704 }
00705 
00706 bool Pedigree::SexLinkedCheck()
00707 {
00708     bool fail = false;
00709 
00710     // Keep track of what families fail the basic inheritance check,
00711     // so that we can run later run genotype elimination check on the remainder
00712     IntArray failedFamilies(familyCount);
00713 
00714     // For each marker ...
00715     for (int m = 0; m < markerCount; m++)
00716     {
00717         MarkerInfo * info = GetMarkerInfo(m);
00718 
00719         failedFamilies.Zero();
00720 
00721         // Check for homozygous males
00722         for (int f = 0; f < familyCount; f++)
00723             for (int i = families[f]->first; i <= families[f]->last; i++)
00724                 if (persons[i]->sex == SEX_MALE && persons[i]->markers[m].isKnown() &&
00725                         !persons[i]->markers[m].isHomozygous())
00726                 {
00727                     printf("%s - Fam %s: Male %s has two X alleles [%s/%s]\n",
00728                            (const char *) markerNames[m],
00729                            (const char *) persons[i]->famid, (const char *) persons[i]->pid,
00730                            (const char *) info->GetAlleleLabel(persons[i]->markers[m].one),
00731                            (const char *) info->GetAlleleLabel(persons[i]->markers[m].two));
00732 
00733                     // Wipe this genotype so we don't get cascading errors below
00734                     persons[i]->markers[m][0] = persons[i]->markers[m][1] = 0;
00735 
00736                     fail = true;
00737                     failedFamilies[f] = true;
00738                 }
00739 
00740         // Check full sibships for errors
00741         // TODO -- We could do better by grouping male half-sibs
00742         for (int f = 0; f < familyCount; f++)
00743             for (int i = families[f]->first; i <= families[f]->last; i++)
00744                 if (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i])
00745                 {
00746                     // This loop runs once per sibship
00747                     Alleles fat = persons[i]->father->markers[m];
00748                     Alleles mot = persons[i]->mother->markers[m];
00749 
00750                     bool fgeno = fat.isKnown();
00751                     bool mgeno = mot.isKnown();
00752 
00753                     Alleles inferred_mother = mot;
00754                     Alleles first_sister;
00755                     Alleles inferred_father;
00756 
00757                     bool mother_ok = true;
00758 
00759                     int sisters = 0;
00760 
00761                     for (int j = 0; j < persons[i]->sibCount; j++)
00762                         if (persons[i]->sibs[j]->isGenotyped(m))
00763                         {
00764                             Alleles geno = persons[i]->sibs[j]->markers[m];
00765 
00766                             bool fat1 = fat.hasAllele(geno.one);
00767                             bool fat2 = fat.hasAllele(geno.two);
00768                             bool mot1 = mot.hasAllele(geno.one);
00769                             bool mot2 = mot.hasAllele(geno.two);
00770 
00771                             int sex = persons[i]->sibs[j]->sex;
00772 
00773                             if (sex == SEX_MALE)
00774                             {
00775                                 if (mgeno && !mot1)
00776                                 {
00777                                     printf("%s - Fam %s: Child %s [%s/Y] has mother [%s/%s]\n",
00778                                            (const char *) markerNames[m],
00779                                            (const char *) persons[i]->famid,
00780                                            (const char *) persons[i]->sibs[j]->pid,
00781                                            (const char *) info->GetAlleleLabel(geno.one),
00782                                            (const char *) info->GetAlleleLabel(mot.one),
00783                                            (const char *) info->GetAlleleLabel(mot.two));
00784                                     fail = true;
00785                                     failedFamilies[f] = true;
00786                                 }
00787                                 else
00788                                     mother_ok &= inferred_mother.AddAllele(geno.one);
00789                             }
00790                             if (sex == SEX_FEMALE)
00791                             {
00792                                 if ((fgeno && mgeno && !((fat1 && mot2) || (fat2 && mot1))) ||
00793                                         (fgeno && !(fat1 || fat2)) || (mgeno && !(mot1 || mot2)))
00794                                 {
00795                                     printf("%s - Fam %s: Child %s [%s/%s] has ",
00796                                            (const char *) markerNames[m],
00797                                            (const char *) persons[i]->famid,
00798                                            (const char *) persons[i]->sibs[j]->pid,
00799                                            (const char *) info->GetAlleleLabel(geno.one),
00800                                            (const char *) info->GetAlleleLabel(geno.two));
00801 
00802                                     if (!fgeno)
00803                                         printf("mother [%s/%s]\n",
00804                                                (const char *) info->GetAlleleLabel(mot.one),
00805                                                (const char *) info->GetAlleleLabel(mot.two));
00806                                     else if (!mgeno)
00807                                         printf("father [%s/Y]\n",
00808                                                (const char *) info->GetAlleleLabel(fat.one));
00809                                     else
00810                                         printf("parents [%s/Y]*[%s/%s]\n",
00811                                                (const char *) info->GetAlleleLabel(fat.one),
00812                                                (const char *) info->GetAlleleLabel(mot.one),
00813                                                (const char *) info->GetAlleleLabel(mot.two));
00814 
00815                                     fail = true;
00816                                     failedFamilies[f] = true;
00817                                 }
00818                                 else
00819                                 {
00820                                     if (!sisters++)
00821                                         inferred_father = first_sister = geno;
00822                                     else if (first_sister != geno)
00823                                     {
00824                                         inferred_father.Intersect(geno);
00825 
00826                                         mother_ok &= inferred_mother.AddAllele(
00827                                                          geno.otherAllele(inferred_father.one));
00828                                         mother_ok &= inferred_mother.AddAllele(
00829                                                          first_sister.otherAllele(inferred_father.one));
00830                                     }
00831 
00832                                     if (!fgeno && (mot1 ^ mot2))
00833                                         inferred_father.Intersect(mot1 ? geno.two : geno.one);
00834 
00835                                     if (!mgeno && (fat1 ^ fat2))
00836                                         mother_ok &= inferred_mother.AddAllele(fat1 ? geno.two : geno.one);
00837                                 }
00838                             }
00839                         }
00840 
00841                     if (!mother_ok || (sisters && !inferred_father.isKnown()))
00842                     {
00843                         printf("%s - Fam %s: ",
00844                                (const char *) markerNames[m],
00845                                (const char *) persons[i]->famid);
00846                         if (fgeno)
00847                             printf("Father %s [%s/Y] has children [",
00848                                    (const char *) persons[i]->father->pid,
00849                                    (const char *) info->GetAlleleLabel(fat.one));
00850                         else if (mgeno)
00851                             printf("Mother %s [%s/%s] has children [",
00852                                    (const char *) persons[i]->mother->pid,
00853                                    (const char *) info->GetAlleleLabel(mot.one),
00854                                    (const char *) info->GetAlleleLabel(mot.two));
00855                         else
00856                             printf("Couple %s * %s has children [",
00857                                    (const char *) persons[i]->mother->pid,
00858                                    (const char *) persons[i]->father->pid);
00859 
00860                         for (int j = 0; j < persons[i]->sibCount; j++)
00861                             printf(
00862                                 persons[i]->sibs[j]->sex == SEX_MALE ? "%s%s/Y" : "%s%s/%s",
00863                                 j == 0 ? "" : " ",
00864                                 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].one),
00865                                 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].two));
00866                         printf("]\n");
00867                         fail = true;
00868                         failedFamilies[f] = true;
00869                     }
00870                 }
00871 
00872         for (int f = 0; f < familyCount; f++)
00873             if (!failedFamilies[f] &&
00874                     (families[f]->count > families[f]->founders + 1) &&
00875                     !families[f]->isNuclear())
00876                 fail |= !GenotypeList::EliminateGenotypes(*this, families[f], m);
00877     }
00878 
00879     if (fail)
00880         printf("\nMendelian inheritance errors detected\n");
00881 
00882     return fail;
00883 }
00884 
00885 void Pedigree::ExtractFamily(int id, Pedigree & single_fam_ped)
00886 {
00887     for (int i = families[id]->first; i <= families[id]->last; i++)
00888         single_fam_ped.Add(*persons[i]);
00889 
00890     single_fam_ped.Sort();
00891 }
00892 
00893 void Pedigree::ExtractOnAffection(int a, Pedigree & new_ped, int target_status)
00894 {
00895     for (int i = 0; i < count; i++)
00896         if (persons[i]->affections[a] == target_status)
00897             new_ped.Add(*persons[i]);
00898         else
00899         {
00900             Person blank_person;
00901             blank_person.CopyIDs(*persons[i]);
00902             new_ped.Add(blank_person);
00903         }
00904 
00905     new_ped.Sort();
00906 }
00907 
00908 void Pedigree::Filter(IntArray & filter)
00909 {
00910     if (filter.Length() != count)
00911         error("Pedigree:Size of pedigree filter doesn't match number of persons in pedigree");
00912 
00913     for (int i = 0; i < count; i++)
00914         if (filter[i] == 1)
00915         {
00916             persons[i]->WipePhenotypes();
00917             persons[i]->filter = true;
00918         }
00919 }
00920 
00921 void Pedigree::AddPerson(const char * famid, const char * pid,
00922                          const char * fatid, const char * motid,
00923                          int sex, bool delay_sort)
00924 {
00925     if (count == size) Grow();
00926 
00927     persons[count] = new Person;
00928 
00929     persons[count]->famid = famid;
00930     persons[count]->pid = pid;
00931     persons[count]->fatid = fatid;
00932     persons[count]->motid = motid;
00933     persons[count]->sex = sex;
00934 
00935     count++;
00936 
00937     if (!delay_sort) Sort();
00938 }
00939 
00940 void Pedigree::ShowMemoryInfo()
00941 {
00942     unsigned int bytes = 0;
00943 
00944     for (int i = 0; i < count; i++)
00945         bytes += persons[i]->famid.BufferSize() + persons[i]->pid.BufferSize() +
00946                  persons[i]->fatid.BufferSize() + persons[i]->motid.BufferSize();
00947 
00948     bytes += count * (markerCount * sizeof(Alleles) + traitCount * sizeof(double) +
00949                       covariateCount * sizeof(double) + affectionCount * sizeof(char) +
00950                       sizeof(Person));
00951 
00952     printf("   %40s %s\n", "Pedigree file ...", (const char *) MemoryInfo(bytes));
00953 }
00954 
00955 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends