Pedigree.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "Pedigree.h"
00019 #include "GenotypeLists.h"
00020 #include "MemoryInfo.h"
00021 #include "Constant.h"
00022 #include "Error.h"
00023 #include "Sort.h"
00024 
00025 #include <stdlib.h>
00026 
00027 bool   Pedigree::sexAsCovariate = false;
00028 String Pedigree::missing("-99.999");
00029 
00030 Pedigree::Pedigree() : pd()
00031 {
00032     haveTwins = count = 0;
00033     size = 10000;
00034     persons = new Person *[size];
00035     familyCount = 0;
00036     families = new Family * [1];
00037     multiPd = NULL;
00038     multiFileCount = 0;
00039 }
00040 
00041 Pedigree::~Pedigree()
00042 {
00043     for (int i = 0; i < count; i++)
00044         delete persons[i];
00045 
00046     for (int i = 0; i < familyCount; i++)
00047         delete families[i];
00048 
00049     delete [] families;
00050     delete [] persons;
00051 
00052     if (multiPd != NULL)
00053         delete [] multiPd;
00054 }
00055 
00056 void Pedigree::Sort()
00057 {
00058     QuickSort(persons, count, sizeof(Person *),
00059               COMPAREFUNC Pedigree::ComparePersons);
00060 
00061     haveTwins = 0;
00062 
00063     // Check for structural problems in input pedigree
00064     bool problem = false;
00065 
00066     // Check that we have no duplicates...
00067     for (int i = 1; i < count; i++)
00068         if (ComparePersons((const Person **) &persons[i-1],
00069                            (const Person **) &persons[i]) == 0)
00070         {
00071             printf("Family %s: Person %s is duplicated\n",
00072                    (const char *) persons[i]->famid,
00073                    (const char *) persons[i]->pid);
00074             problem = true;
00075 
00076             do
00077             {
00078                 i++;
00079             }
00080             while (i < count &&
00081                     ComparePersons((const Person **) &persons[i-1],
00082                                    (const Person **) &persons[i]) == 0);
00083         }
00084 
00085     // Assign parents...
00086     for (int i = 0; i < count; i++)
00087     {
00088         persons[i]->serial = i;
00089         persons[i]->father = FindPerson(persons[i]->famid, persons[i]->fatid);
00090         persons[i]->mother = FindPerson(persons[i]->famid, persons[i]->motid);
00091 
00092         problem |= !persons[i]->CheckParents();
00093 
00094         persons[i]->AssessStatus();
00095 
00096         // Check if we have any twins...
00097         haveTwins |= persons[i]->zygosity;
00098     }
00099 
00100     if (problem)
00101         error("Please correct problems with pedigree structure\n");
00102 
00103     MakeSibships();
00104     MakeFamilies();
00105 }
00106 
00107 void Pedigree::MakeSibships()
00108 {
00109     Person ** sibs = new Person * [count];
00110     for (int i = 0; i < count; i++)
00111         sibs[i] = persons[i];
00112 
00113     QuickSort(sibs, count, sizeof(Person *),
00114               COMPAREFUNC Pedigree::CompareParents);
00115 
00116     for (int first = 0; first < count; first++)
00117         if (!sibs[first]->isFounder())
00118         {
00119             int last = first + 1;
00120             while (last < count)
00121                 if (sibs[first]-> mother != sibs[last]->mother ||
00122                         sibs[first]-> father != sibs[last]->father)
00123                     break;
00124                 else last++;
00125             last --;
00126 
00127             for (int j = first; j <= last; j++)
00128             {
00129                 if (sibs[j]->sibCount) delete [] sibs[j]->sibs;
00130                 sibs[j]->sibCount = last - first + 1;
00131                 sibs[j]->sibs = new Person * [sibs[j]->sibCount];
00132                 for (int k = first; k <= last; k++)
00133                     sibs[j]->sibs[k - first] = sibs[k];
00134             }
00135             first = last;
00136         }
00137     delete [] sibs;
00138 }
00139 
00140 void Pedigree::MakeFamilies()
00141 {
00142     for (int i = 0; i < familyCount; i++)
00143         delete families[i];
00144     delete [] families;
00145 
00146     familyCount = 0;
00147     families = new Family * [count];
00148 
00149     for (int first=0; first < count; first++)
00150     {
00151         int last = first;
00152         while (last < count)
00153             if (SlowCompare(persons[first]->famid, persons[last]->famid) == 0)
00154                 last++;
00155             else break;
00156 
00157         families[familyCount] = new Family(*this, first, --last, familyCount);
00158 
00159         first = last;
00160         familyCount++;
00161     }
00162 }
00163 
00164 // Utility functions for finding a person in a pedigree
00165 
00166 struct PedigreeKey
00167 {
00168     const char * famid;
00169     const char * pid;
00170 };
00171 
00172 int CompareKeyToPerson(PedigreeKey * key, Person ** p)
00173 {
00174     int result = SlowCompare(key->famid, (**p).famid);
00175 
00176     if (result != 0)
00177         return result;
00178 
00179     return SlowCompare(key->pid, (**p).pid);
00180 }
00181 
00182 int CompareKeyToFamily(PedigreeKey * key, Family ** f)
00183 {
00184     return SlowCompare(key->famid, (**f).famid);
00185 }
00186 
00187 Person * Pedigree::FindPerson(const char * famid, const char * pid)
00188 {
00189     PedigreeKey key;
00190     key.famid = famid;
00191     key.pid   = pid;
00192 
00193     Person ** result = (Person **) BinarySearch
00194                        (&key, persons, count, sizeof(Person *),
00195                         COMPAREFUNC CompareKeyToPerson);
00196 
00197     return (result == NULL) ? (Person *) NULL : *result;
00198 }
00199 
00200 Person * Pedigree::FindPerson(const char *famid, const char *pid, int universe)
00201 {
00202     PedigreeKey key;
00203     key.famid = famid;
00204     key.pid   = pid;
00205 
00206     Person ** result = (Person **) BinarySearch
00207                        (&key, persons, universe, sizeof(Person *),
00208                         COMPAREFUNC CompareKeyToPerson);
00209 
00210     return (result == NULL) ? (Person *) NULL : *result;
00211 }
00212 
00213 Family * Pedigree::FindFamily(const char * famid)
00214 {
00215     PedigreeKey key;
00216     key.famid = famid;
00217 
00218     Family ** result = (Family **) BinarySearch
00219                        (&key, families, familyCount, sizeof(Family *),
00220                         COMPAREFUNC CompareKeyToFamily);
00221 
00222     return (result == NULL) ? (Family *) NULL : *result;
00223 }
00224 
00225 int Pedigree::CountAlleles(int marker)
00226 {
00227     return ::CountAlleles(*this, marker);
00228 }
00229 
00230 void Pedigree::LumpAlleles(double min, bool reorder)
00231 {
00232     if (min > 0.0)
00233         printf("Lumping alleles with frequencies of %.2f or less...\n\n", min);
00234 
00235     for (int m=0; m < markerCount; m++)
00236         ::LumpAlleles(*this, m, min, reorder);
00237 }
00238 
00239 void Pedigree::EstimateFrequencies(int estimator, bool quiet)
00240 {
00241     bool estimated = false;
00242     int  line = 3;
00243 
00244     const char * estimators[] = { "using all genotypes", "using founder genotypes", "assumed equal" };
00245 
00246     bool condensed = markerCount > 100;
00247     int  grain = markerCount / 50, estimates = 0;
00248 
00249     for (int m=0; m < markerCount; m++)
00250         if (::EstimateFrequencies(*this, m, estimator))
00251             if (!quiet)
00252             {
00253                 if (!estimated)
00254                     printf("Estimating allele frequencies... [%s]\n   ",
00255                            estimators[estimator]), estimated = true;
00256 
00257                 if (condensed)
00258                 {
00259                     if (estimates++ % grain == 0)
00260                     {
00261                         printf(".");
00262                         fflush(stdout);
00263                     }
00264                     continue;
00265                 }
00266 
00267                 if (line + markerNames[m].Length() + 1 > 79)
00268                     printf("\n   "), line = 3;
00269 
00270                 printf("%s ", (const char *) markerNames[m]);
00271                 line += markerNames[m].Length() + 1;
00272             }
00273 
00274     if (estimated)
00275         printf(condensed ? "\nDone estimating frequencies for %d markers\n\n" : "\n\n", estimates);
00276 }
00277 
00278 int Pedigree::ComparePersons(const Person ** p1, const Person ** p2)
00279 {
00280     int result = SlowCompare((**p1).famid, (**p2).famid);
00281 
00282     if (result != 0) return result;
00283 
00284     return SlowCompare((**p1).pid, (**p2).pid);
00285 }
00286 
00287 int Pedigree::CompareParents(const Person ** p1, const Person ** p2)
00288 {
00289     int result = SlowCompare((**p1).famid, (**p2).famid);
00290 
00291     if (result) return result;
00292 
00293     result = SlowCompare((**p1).fatid, (**p2).fatid);
00294 
00295     if (result) return result;
00296 
00297     return SlowCompare((**p1).motid, (**p2).motid);
00298 }
00299 
00300 void Pedigree::Grow()
00301 {
00302     size *= 2;
00303 
00304     Person ** temp = new Person * [size];
00305     if (temp == NULL) error("Out of memory");
00306 
00307     for (int i=0; i<count; i++)
00308         temp[i] = persons[i];
00309 
00310     delete [] persons;
00311     persons = temp;
00312 }
00313 
00314 void Pedigree::Add(Person & rhs)
00315 {
00316     if (count == size)
00317         Grow();
00318 
00319     persons[count] = new Person();
00320     persons[count++]->Copy(rhs);
00321 }
00322 
00323 void Pedigree::WriteDataFile(FILE * output)
00324 {
00325     // write in the following order:
00326     // markers, traits, affections, covariates
00327 
00328     if (haveTwins)
00329         fprintf(output, " Z  Zygosity \n");
00330 
00331     for (int m = 0; m < markerCount; m++)
00332         fprintf(output, " M  %s \n", (const char *) markerNames[m]);
00333 
00334     for (int t = 0; t < traitCount; t++)
00335         fprintf(output, " T  %s \n", (const char *) traitNames[t]);
00336 
00337     for (int a = 0; a < affectionCount; a++)
00338         fprintf(output, " A  %s \n", (const char *) affectionNames[a]);
00339 
00340     for (int c = 0; c < covariateCount; c++)
00341         fprintf(output, " C  %s \n", (const char *) covariateNames[c]);
00342 
00343     fprintf(output, " E  END-OF-DATA \n");
00344 }
00345 
00346 void Pedigree::WritePedigreeFile(FILE * output)
00347 {
00348     MarkerInfo ** info = new MarkerInfo * [markerCount];
00349 
00350     for (int i = 0; i < markerCount; i++)
00351         info[i] = GetMarkerInfo(i);
00352 
00353     for (int i = 0; i < count; i++)
00354         WriteRecodedPerson(output, i, info);
00355     fprintf(output, "end\n");
00356 
00357     delete [] info;
00358 }
00359 
00360 void Pedigree::WritePerson(FILE * output, int person, const char * famid,
00361                            const char * pid, const char * fatid, const char * motid)
00362 {
00363     WriteRecodedPerson(output, person, NULL, famid, pid, fatid, motid);
00364 }
00365 
00366 void Pedigree::WriteRecodedPerson(
00367     FILE * output, int person, MarkerInfo ** markerInfo,
00368     const char * famid, const char * pid, const char * fatid,
00369     const char * motid)
00370 {
00371     Person * p = persons[person];
00372 
00373     if (famid == NULL) famid = p->famid;
00374     if (pid == NULL)   pid = p->pid;
00375     if (fatid == NULL) fatid = p->fatid;
00376     if (motid == NULL) motid = p->motid;
00377 
00378     // write in the following order:
00379     // markers, traits, affections, covariates
00380 
00381     fprintf(output, "%s\t%s\t%s\t%s\t%d\t",
00382             famid, pid, fatid, motid, p->sex);
00383 
00384     const char * twinCodes[] = {"0", "MZ", "DZ"};
00385 
00386     if (haveTwins)
00387     {
00388         if (p->zygosity <= 2)
00389             fprintf(output, "%s\t", twinCodes[p->zygosity]);
00390         else
00391             fprintf(output, "%d\t", p->zygosity);
00392     }
00393 
00394     for (int m = 0; m < markerCount; m++)
00395         if (markerInfo == NULL)
00396             fprintf(output, markerCount < 20 ? "%3d/%3d\t" : "%d/%d\t",
00397                     p->markers[m][0], p->markers[m][1]);
00398         else
00399             fprintf(output, markerCount < 20 ? "%3s/%3s\t" : "%s/%s\t",
00400                     (const char *) markerInfo[m]->GetAlleleLabel(p->markers[m][0]),
00401                     (const char *) markerInfo[m]->GetAlleleLabel(p->markers[m][1]));
00402 
00403     for (int t = 0; t < traitCount; t++)
00404         if (p->isPhenotyped(t))
00405             fprintf(output, "%.3f\t", p->traits[t]);
00406         else
00407             fprintf(output, "x\t");
00408 
00409     for (int a = 0; a < affectionCount; a++)
00410         if (p->isDiagnosed(a))
00411             fprintf(output, "%d\t", p->affections[a]);
00412         else
00413             fprintf(output, "x\t");
00414 
00415     for (int c = 0; c < covariateCount; c++)
00416         if (p->isControlled(c))
00417             fprintf(output, "%.3f\t", p->covariates[c]);
00418         else
00419             fprintf(output, "x\t");
00420 
00421     fprintf(output, "\n");
00422 }
00423 
00424 void Pedigree::WriteDataFile(const char * output)
00425 {
00426     FILE * f = fopen(output, "wt");
00427     if (f == NULL) error("Couldn't open data file %s", output);
00428     WriteDataFile(f);
00429     fclose(f);
00430 }
00431 
00432 void Pedigree::WritePedigreeFile(const char * output)
00433 {
00434     FILE * f = fopen(output, "wt");
00435     if (f == NULL) error("Couldn't open pedigree file %s", output);
00436     WritePedigreeFile(f);
00437     fclose(f);
00438 }
00439 
00440 void Pedigree::PrepareDichotomization()
00441 {
00442 
00443     for (int t = 0; t < traitCount; t++)
00444     {
00445         String new_affection = traitNames[t] + "*";
00446         GetAffectionID(new_affection);
00447     }
00448 }
00449 
00450 int Pedigree::Dichotomize(int t, double mean)
00451 {
00452     String new_affection = traitNames[t] + "*";
00453 
00454     int af = GetAffectionID(new_affection);
00455 
00456     if (mean == _NAN_)
00457     {
00458         mean  = 0.0;
00459         double dcount = 0;
00460         for (int i = 0; i < count; i++)
00461             if (persons[i]->isPhenotyped(t) &&
00462                     !persons[i]->isFounder())
00463             {
00464                 mean += persons[i]->traits[t];
00465                 dcount ++;
00466             }
00467 
00468         if (!dcount) return af;
00469 
00470         mean /= dcount;
00471     }
00472 
00473     printf("Dichotomizing %s around mean of %.3f ...\n",
00474            (const char *) traitNames[t], mean);
00475 
00476     for (int i = 0; i < count; i++)
00477         if (persons[i]->isPhenotyped(t) && !persons[i]->isFounder())
00478             persons[i]->affections[af] = persons[i]->traits[t] > mean ? 2 : 1;
00479         else
00480             persons[i]->affections[af] = 0;
00481 
00482     Sort();
00483 
00484     return af;
00485 }
00486 
00487 void Pedigree::DichotomizeAll(double mean)
00488 {
00489     for (int t = 0; t < traitCount; t++)
00490         Dichotomize(t, mean);
00491 }
00492 
00493 bool Pedigree::InheritanceCheck(bool abortIfInconsistent)
00494 {
00495     bool fail = false;
00496 
00497     if (haveTwins) fail |= TwinCheck();
00498 
00499     if (chromosomeX)
00500         fail |= SexLinkedCheck();
00501     else
00502         fail |= AutosomalCheck();
00503 
00504     if (fail && abortIfInconsistent)
00505         error("Mendelian inheritance errors detected\n");
00506 
00507     return !fail;
00508 }
00509 
00510 bool Pedigree::AutosomalCheck()
00511 {
00512     // Arrays indicating which alleles and homozygotes occur
00513     IntArray haplos, genos, counts, failedFamilies;
00514 
00515     bool fail = false;
00516 
00517     // For each marker ...
00518     for (int m = 0; m < markerCount; m++)
00519     {
00520         MarkerInfo * info = GetMarkerInfo(m);
00521 
00522         // Summary for marker
00523         int alleleCount = CountAlleles(m);
00524         int genoCount = alleleCount * (alleleCount + 1) / 2;
00525 
00526         // Initialize arrays
00527         haplos.Dimension(alleleCount + 1);
00528         haplos.Set(-1);
00529 
00530         genos.Dimension(genoCount + 1);
00531         genos.Set(-1);
00532 
00533         failedFamilies.Dimension(familyCount);
00534         failedFamilies.Zero();
00535 
00536         counts.Dimension(alleleCount + 1);
00537 
00538         for (int f = 0; f < familyCount; f++)
00539             for (int i = families[f]->first; i <= families[f]->last; i++)
00540                 if (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i])
00541                 {
00542                     // This loop runs once per sibship
00543                     Alleles fat = persons[i]->father->markers[m];
00544                     Alleles mot = persons[i]->mother->markers[m];
00545                     bool    fgeno = fat.isKnown();
00546                     bool    mgeno = mot.isKnown();
00547 
00548                     // Number of alleles, homozygotes and genotypes in this sibship
00549                     int haplo = 0, homo = 0, diplo = 0;
00550 
00551                     // No. of different genotypes per allele
00552                     counts.Zero();
00553 
00554                     // In general, there should be no more than 3 genotypes per allele
00555                     bool too_many_genos = false;
00556 
00557                     for (int j = 0; j < persons[i]->sibCount; j++)
00558                         if (persons[i]->sibs[j]->isGenotyped(m))
00559                         {
00560                             Alleles geno = persons[i]->sibs[j]->markers[m];
00561 
00562                             int fat1 = fat.hasAllele(geno.one);
00563                             int fat2 = fat.hasAllele(geno.two);
00564                             int mot1 = mot.hasAllele(geno.one);
00565                             int mot2 = mot.hasAllele(geno.two);
00566 
00567                             if ((fgeno && mgeno && !((fat1 && mot2) || (fat2 && mot1))) ||
00568                                     (fgeno && !(fat1 || fat2)) || (mgeno && !(mot1 || mot2)))
00569                             {
00570                                 printf("%s - Fam %s: Child %s [%s/%s] has ",
00571                                        (const char *) markerNames[m],
00572                                        (const char *) persons[i]->sibs[j]->famid,
00573                                        (const char *) persons[i]->sibs[j]->pid,
00574                                        (const char *) info->GetAlleleLabel(geno.one),
00575                                        (const char *) info->GetAlleleLabel(geno.two));
00576 
00577                                 if (!fgeno || !mgeno)
00578                                     printf("%s [%s/%s]\n",
00579                                            fgeno ? "father" : "mother",
00580                                            (const char *) info->GetAlleleLabel(fgeno ? fat.one : mot.one),
00581                                            (const char *) info->GetAlleleLabel(fgeno ? fat.two : mot.two));
00582                                 else
00583                                     printf("parents [%s/%s]*[%s/%s]\n",
00584                                            (const char *) info->GetAlleleLabel(fat.one),
00585                                            (const char *) info->GetAlleleLabel(fat.two),
00586                                            (const char *) info->GetAlleleLabel(mot.one),
00587                                            (const char *) info->GetAlleleLabel(mot.two));
00588 
00589                                 fail = true;
00590                                 failedFamilies[f] = true;
00591                             }
00592                             else
00593                             {
00594                                 if (haplos[geno.one] != i)
00595                                 {
00596                                     haplo++;
00597                                     haplos[geno.one] = i;
00598                                 };
00599                                 if (haplos[geno.two] != i)
00600                                 {
00601                                     haplo++;
00602                                     haplos[geno.two] = i;
00603                                 };
00604 
00605                                 int index = geno.SequenceCoded();
00606 
00607                                 if (genos[index] != i)
00608                                 {
00609                                     genos[index] = i;
00610                                     diplo++;
00611                                     counts[geno.one]++;
00612                                     if (geno.isHomozygous())
00613                                         homo++;
00614                                     else
00615                                         counts[geno.two]++;
00616                                     if (counts[geno.one] > 2) too_many_genos = true;
00617                                     if (counts[geno.two] > 2) too_many_genos = true;
00618                                 }
00619                             }
00620                         }
00621 
00622                     if (fgeno)
00623                     {
00624                         if (haplos[fat.one] != i)
00625                         {
00626                             haplo++;
00627                             haplos[fat.one] = i;
00628                         }
00629                         if (haplos[fat.two] != i)
00630                         {
00631                             haplo++;
00632                             haplos[fat.two] = i;
00633                         }
00634                         homo += fat.isHomozygous();
00635                     }
00636 
00637                     if (mgeno)
00638                     {
00639                         if (haplos[mot.one] != i)
00640                         {
00641                             haplo++;
00642                             haplos[mot.one] = i;
00643                         }
00644                         if (haplos[mot.two] != i)
00645                         {
00646                             haplo++;
00647                             haplos[mot.two] = i;
00648                         }
00649                         homo += mot.isHomozygous();
00650                     }
00651 
00652                     if (diplo > 4 || haplo + homo > 4 || (haplo == 4 && too_many_genos))
00653                     {
00654                         printf("%s - Fam %s: ",
00655                                (const char *) markerNames[m],
00656                                (const char *) persons[i]->famid);
00657                         if (persons[i]->father->markers[m].isKnown())
00658                             printf("Father %s [%s/%s] has children [",
00659                                    (const char *) persons[i]->father->pid,
00660                                    (const char *) info->GetAlleleLabel(fat.one),
00661                                    (const char *) info->GetAlleleLabel(fat.two));
00662                         else if (persons[i]->mother->markers[m].isKnown())
00663                             printf("Mother %s [%s/%s] has children [",
00664                                    (const char *) persons[i]->mother->pid,
00665                                    (const char *) info->GetAlleleLabel(mot.one),
00666                                    (const char *) info->GetAlleleLabel(mot.two));
00667                         else
00668                             printf("Couple %s * %s has children [",
00669                                    (const char *) persons[i]->mother->pid,
00670                                    (const char *) persons[i]->father->pid);
00671 
00672                         for (int j = 0; j < persons[i]->sibCount; j++)
00673                             printf("%s%s/%s", j == 0 ? "" : " ",
00674                                    (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].one),
00675                                    (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].two));
00676                         printf("]\n");
00677 
00678                         fail = true;
00679                         failedFamilies[f] = true;
00680                     }
00681                 }
00682 
00683         for (int f = 0; f < familyCount; f++)
00684             if (!failedFamilies[f] &&
00685                     (families[f]->count > families[f]->founders + 1) &&
00686                     !families[f]->isNuclear())
00687                 fail |= !GenotypeList::EliminateGenotypes(*this, families[f], m);
00688     }
00689 
00690     if (fail)
00691         printf("\nMendelian inheritance errors detected\n");
00692 
00693     return fail;
00694 }
00695 
00696 bool Pedigree::SexLinkedCheck()
00697 {
00698     bool fail = false;
00699 
00700     // Keep track of what families fail the basic inheritance check,
00701     // so that we can run later run genotype elimination check on the remainder
00702     IntArray failedFamilies(familyCount);
00703 
00704     // For each marker ...
00705     for (int m = 0; m < markerCount; m++)
00706     {
00707         MarkerInfo * info = GetMarkerInfo(m);
00708 
00709         failedFamilies.Zero();
00710 
00711         // Check for homozygous males
00712         for (int f = 0; f < familyCount; f++)
00713             for (int i = families[f]->first; i <= families[f]->last; i++)
00714                 if (persons[i]->sex == SEX_MALE && persons[i]->markers[m].isKnown() &&
00715                         !persons[i]->markers[m].isHomozygous())
00716                 {
00717                     printf("%s - Fam %s: Male %s has two X alleles [%s/%s]\n",
00718                            (const char *) markerNames[m],
00719                            (const char *) persons[i]->famid, (const char *) persons[i]->pid,
00720                            (const char *) info->GetAlleleLabel(persons[i]->markers[m].one),
00721                            (const char *) info->GetAlleleLabel(persons[i]->markers[m].two));
00722 
00723                     // Wipe this genotype so we don't get cascading errors below
00724                     persons[i]->markers[m][0] = persons[i]->markers[m][1] = 0;
00725 
00726                     fail = true;
00727                     failedFamilies[f] = true;
00728                 }
00729 
00730         // Check full sibships for errors
00731         // TODO -- We could do better by grouping male half-sibs
00732         for (int f = 0; f < familyCount; f++)
00733             for (int i = families[f]->first; i <= families[f]->last; i++)
00734                 if (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i])
00735                 {
00736                     // This loop runs once per sibship
00737                     Alleles fat = persons[i]->father->markers[m];
00738                     Alleles mot = persons[i]->mother->markers[m];
00739 
00740                     bool fgeno = fat.isKnown();
00741                     bool mgeno = mot.isKnown();
00742 
00743                     Alleles inferred_mother = mot;
00744                     Alleles first_sister;
00745                     Alleles inferred_father;
00746 
00747                     bool mother_ok = true;
00748 
00749                     int sisters = 0;
00750 
00751                     for (int j = 0; j < persons[i]->sibCount; j++)
00752                         if (persons[i]->sibs[j]->isGenotyped(m))
00753                         {
00754                             Alleles geno = persons[i]->sibs[j]->markers[m];
00755 
00756                             bool fat1 = fat.hasAllele(geno.one);
00757                             bool fat2 = fat.hasAllele(geno.two);
00758                             bool mot1 = mot.hasAllele(geno.one);
00759                             bool mot2 = mot.hasAllele(geno.two);
00760 
00761                             int sex = persons[i]->sibs[j]->sex;
00762 
00763                             if (sex == SEX_MALE)
00764                             {
00765                                 if (mgeno && !mot1)
00766                                 {
00767                                     printf("%s - Fam %s: Child %s [%s/Y] has mother [%s/%s]\n",
00768                                            (const char *) markerNames[m],
00769                                            (const char *) persons[i]->famid,
00770                                            (const char *) persons[i]->sibs[j]->pid,
00771                                            (const char *) info->GetAlleleLabel(geno.one),
00772                                            (const char *) info->GetAlleleLabel(mot.one),
00773                                            (const char *) info->GetAlleleLabel(mot.two));
00774                                     fail = true;
00775                                     failedFamilies[f] = true;
00776                                 }
00777                                 else
00778                                     mother_ok &= inferred_mother.AddAllele(geno.one);
00779                             }
00780                             if (sex == SEX_FEMALE)
00781                             {
00782                                 if ((fgeno && mgeno && !((fat1 && mot2) || (fat2 && mot1))) ||
00783                                         (fgeno && !(fat1 || fat2)) || (mgeno && !(mot1 || mot2)))
00784                                 {
00785                                     printf("%s - Fam %s: Child %s [%s/%s] has ",
00786                                            (const char *) markerNames[m],
00787                                            (const char *) persons[i]->famid,
00788                                            (const char *) persons[i]->sibs[j]->pid,
00789                                            (const char *) info->GetAlleleLabel(geno.one),
00790                                            (const char *) info->GetAlleleLabel(geno.two));
00791 
00792                                     if (!fgeno)
00793                                         printf("mother [%s/%s]\n",
00794                                                (const char *) info->GetAlleleLabel(mot.one),
00795                                                (const char *) info->GetAlleleLabel(mot.two));
00796                                     else if (!mgeno)
00797                                         printf("father [%s/Y]\n",
00798                                                (const char *) info->GetAlleleLabel(fat.one));
00799                                     else
00800                                         printf("parents [%s/Y]*[%s/%s]\n",
00801                                                (const char *) info->GetAlleleLabel(fat.one),
00802                                                (const char *) info->GetAlleleLabel(mot.one),
00803                                                (const char *) info->GetAlleleLabel(mot.two));
00804 
00805                                     fail = true;
00806                                     failedFamilies[f] = true;
00807                                 }
00808                                 else
00809                                 {
00810                                     if (!sisters++)
00811                                         inferred_father = first_sister = geno;
00812                                     else if (first_sister != geno)
00813                                     {
00814                                         inferred_father.Intersect(geno);
00815 
00816                                         mother_ok &= inferred_mother.AddAllele(
00817                                                          geno.otherAllele(inferred_father.one));
00818                                         mother_ok &= inferred_mother.AddAllele(
00819                                                          first_sister.otherAllele(inferred_father.one));
00820                                     }
00821 
00822                                     if (!fgeno && (mot1 ^ mot2))
00823                                         inferred_father.Intersect(mot1 ? geno.two : geno.one);
00824 
00825                                     if (!mgeno && (fat1 ^ fat2))
00826                                         mother_ok &= inferred_mother.AddAllele(fat1 ? geno.two : geno.one);
00827                                 }
00828                             }
00829                         }
00830 
00831                     if (!mother_ok || (sisters && !inferred_father.isKnown()))
00832                     {
00833                         printf("%s - Fam %s: ",
00834                                (const char *) markerNames[m],
00835                                (const char *) persons[i]->famid);
00836                         if (fgeno)
00837                             printf("Father %s [%s/Y] has children [",
00838                                    (const char *) persons[i]->father->pid,
00839                                    (const char *) info->GetAlleleLabel(fat.one));
00840                         else if (mgeno)
00841                             printf("Mother %s [%s/%s] has children [",
00842                                    (const char *) persons[i]->mother->pid,
00843                                    (const char *) info->GetAlleleLabel(mot.one),
00844                                    (const char *) info->GetAlleleLabel(mot.two));
00845                         else
00846                             printf("Couple %s * %s has children [",
00847                                    (const char *) persons[i]->mother->pid,
00848                                    (const char *) persons[i]->father->pid);
00849 
00850                         for (int j = 0; j < persons[i]->sibCount; j++)
00851                             printf(
00852                                 persons[i]->sibs[j]->sex == SEX_MALE ? "%s%s/Y" : "%s%s/%s",
00853                                 j == 0 ? "" : " ",
00854                                 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].one),
00855                                 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].two));
00856                         printf("]\n");
00857                         fail = true;
00858                         failedFamilies[f] = true;
00859                     }
00860                 }
00861 
00862         for (int f = 0; f < familyCount; f++)
00863             if (!failedFamilies[f] &&
00864                     (families[f]->count > families[f]->founders + 1) &&
00865                     !families[f]->isNuclear())
00866                 fail |= !GenotypeList::EliminateGenotypes(*this, families[f], m);
00867     }
00868 
00869     if (fail)
00870         printf("\nMendelian inheritance errors detected\n");
00871 
00872     return fail;
00873 }
00874 
00875 void Pedigree::ExtractFamily(int id, Pedigree & single_fam_ped)
00876 {
00877     for (int i = families[id]->first; i <= families[id]->last; i++)
00878         single_fam_ped.Add(*persons[i]);
00879 
00880     single_fam_ped.Sort();
00881 }
00882 
00883 void Pedigree::ExtractOnAffection(int a, Pedigree & new_ped, int target_status)
00884 {
00885     for (int i = 0; i < count; i++)
00886         if (persons[i]->affections[a] == target_status)
00887             new_ped.Add(*persons[i]);
00888         else
00889         {
00890             Person blank_person;
00891             blank_person.CopyIDs(*persons[i]);
00892             new_ped.Add(blank_person);
00893         }
00894 
00895     new_ped.Sort();
00896 }
00897 
00898 void Pedigree::Filter(IntArray & filter)
00899 {
00900     if (filter.Length() != count)
00901         error("Pedigree:Size of pedigree filter doesn't match number of persons in pedigree");
00902 
00903     for (int i = 0; i < count; i++)
00904         if (filter[i] == 1)
00905         {
00906             persons[i]->WipePhenotypes();
00907             persons[i]->filter = true;
00908         }
00909 }
00910 
00911 void Pedigree::AddPerson(const char * famid, const char * pid,
00912                          const char * fatid, const char * motid,
00913                          int sex, bool delay_sort)
00914 {
00915     if (count == size) Grow();
00916 
00917     persons[count] = new Person;
00918 
00919     persons[count]->famid = famid;
00920     persons[count]->pid = pid;
00921     persons[count]->fatid = fatid;
00922     persons[count]->motid = motid;
00923     persons[count]->sex = sex;
00924 
00925     count++;
00926 
00927     if (!delay_sort) Sort();
00928 }
00929 
00930 void Pedigree::ShowMemoryInfo()
00931 {
00932     unsigned int bytes = 0;
00933 
00934     for (int i = 0; i < count; i++)
00935         bytes += persons[i]->famid.BufferSize() + persons[i]->pid.BufferSize() +
00936                  persons[i]->fatid.BufferSize() + persons[i]->motid.BufferSize();
00937 
00938     bytes += count * (markerCount * sizeof(Alleles) + traitCount * sizeof(double) +
00939                       covariateCount * sizeof(double) + affectionCount * sizeof(char) +
00940                       sizeof(Person));
00941 
00942     printf("   %40s %s\n", "Pedigree file ...", (const char *) MemoryInfo(bytes));
00943 }
00944 
00945 
Generated on Wed Nov 17 15:38:29 2010 for StatGen Software by  doxygen 1.6.3