libStatGen Software  1
GenotypeLists.cpp
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "GenotypeLists.h"
00019 
00020 // When the next line is uncommented, the genotype elimination routines
00021 // produce a lot of output useful for debugging
00022 // #define DEBUG_ELIMINATOR
00023 
00024 GenotypeList::GenotypeList()
00025 {
00026     ignore = false;
00027 }
00028 
00029 bool GenotypeList::EliminateGenotypes(Pedigree & ped, Family * family, int marker)
00030 {
00031     // First, allocate a genotype list for the family
00032     GenotypeList * list = new GenotypeList [family->count];
00033 
00034     // Next, update the possible allele lists for each individual
00035     InitializeList(list, ped, family, marker);
00036 
00037     // Then, do multiple rounds of elimination until a problem is found
00038     // or no changes are made
00039 
00040 #ifdef DEBUG_ELIMINATOR
00041     Print(list, ped, family, marker);
00042 #endif
00043 
00044     while (PairwiseCheck(list, ped, family) || FamilyCheck(list, ped, family))
00045 #ifdef DEBUG_ELIMINATOR
00046         Print(list, ped, family, marker)
00047 #endif
00048         ;
00049 
00050     for (int i = 0; i < family->count; i++)
00051         if (!list[i].ignore && list[i].allele1.Length() == 0)
00052         {
00053             printf("%s - Family %s has a subtle genotype inconsistency\n",
00054                    (const char *) ped.markerNames[marker], (const char *) family->famid);
00055 
00056             delete [] list;
00057             return false;
00058         }
00059 
00060     delete [] list;
00061     return true;
00062 }
00063 
00064 void GenotypeList::InitializeList(GenotypeList * list, Pedigree & ped, Family * family, int marker)
00065 {
00066     for (int i = family->count - 1; i >= 0; i--)
00067     {
00068         Person & person = ped[family->path[i]];
00069         int id = person.traverse;
00070         bool maleX = person.sex == SEX_MALE && ped.chromosomeX;
00071 
00072 #ifdef DEBUG_ELIMINATOR
00073         printf("Initializing genotype list for %s ...\n", (const char *) person.pid);
00074 #endif
00075 
00076         // If an individual is genotyped ...
00077         if (person.markers[marker].isKnown())
00078         {
00079             // Their genotype list starts with just one entry!
00080             list[id].Dimension(1);
00081             list[id].SetGenotype(0, person.markers[marker][0], person.markers[marker][1]);
00082             list[id].alleles.Clear();
00083             list[id].alleles.Push(person.markers[marker][0]);
00084             list[id].alleles.PushIfNew(person.markers[marker][1]);
00085             list[id].ignore = false;
00086 
00087             // "Heterozygous" males have no possible genotypes
00088             if (maleX && person.markers[marker].isHeterozygous())
00089                 list[id].Dimension(0);
00090         }
00091         else if (list[id].alleles.Length())
00092             if (person.sex == SEX_MALE && ped.chromosomeX)
00093             {
00094                 // Males only carry one X chromosome
00095                 list[id].Dimension(list[id].alleles.Length() + 1);
00096 
00097                 for (int i = 0, out = 0; i < list[id].alleles.Length(); i++)
00098                     list[id].SetGenotype(out++, list[id].alleles[i], list[id].alleles[i]);
00099                 list[id].SetGenotype(list[id].alleles.Length(), -1, -1);
00100 
00101                 list[id].ignore = false;
00102             }
00103             else
00104             {
00105                 // Build the genotype list based on the available allele lists
00106                 int count = list[id].alleles.Length() * (list[id].alleles.Length() + 3) / 2 + 1;
00107 
00108                 list[id].Dimension(count);
00109 
00110                 for (int i = 0, out = 0; i < list[id].alleles.Length(); i++)
00111                 {
00112                     // Allow for all pairs of "transmitted" alleles
00113                     for (int j = 0; j <= i; j++)
00114                         list[id].SetGenotype(out++, list[id].alleles[i], list[id].alleles[j]);
00115 
00116                     // Allow for an unstransmitted allele
00117                     list[id].SetGenotype(out++, list[id].alleles[i], -1);
00118                 }
00119 
00120                 // Allow for a pair of untransmitted alleles
00121                 list[id].SetGenotype(count - 1, -1, -1);
00122 
00123                 list[id].ignore = false;
00124             }
00125         else
00126             list[id].ignore = true;
00127 
00128         // If the individual is a founder this is all there is to it
00129         if (i < family->founders) continue;
00130 
00131         // If the individual is not a founder, update the parental genotype lists...
00132         int fatid = person.father->traverse;
00133         int motid = person.mother->traverse;
00134 
00135         for (int i = 0; i < list[id].alleles.Length(); i++)
00136         {
00137             list[motid].alleles.PushIfNew(list[id].alleles[i]);
00138             if (!maleX) list[fatid].alleles.PushIfNew(list[id].alleles[i]);
00139         }
00140     }
00141 }
00142 
00143 bool GenotypeList::PairwiseCheck(GenotypeList * list, Pedigree & ped, Family * family)
00144 {
00145 #ifdef DEBUG_ELIMINATOR
00146     printf("Checking Relative Pairs ...\n");
00147 #endif
00148 
00149     bool changed = false;
00150 
00151     for (int i = family->count - 1; i >= family->founders; i--)
00152     {
00153         Person & person = ped[family->path[i]];
00154 
00155         int id = person.traverse;
00156         int fatid = person.father->traverse;
00157         int motid = person.mother->traverse;
00158 
00159         bool maleX = person.sex == SEX_MALE && ped.chromosomeX;
00160 
00161         if (list[id].ignore) continue;
00162 
00163         // Check if genotypes are consistent with paternal genotypes
00164         for (int i = 0; i < list[id].allele1.Length(); i++)
00165         {
00166             int al1 = list[id].allele1[i];
00167             int al2 = list[id].allele2[i];
00168 
00169             // Remove offspring genotypes incompatible with parental genotypes
00170             if ((maleX && !list[motid].Matches(al1) && al1 != -1) ||
00171                     (!maleX && !(al1 == -1 && al2 == -1) &&
00172                      !(list[fatid].Matches(al1) && (al2 == -1 || list[motid].Matches(al2))) &&
00173                      !((al2 == -1 || list[fatid].Matches(al2)) && list[motid].Matches(al1))))
00174             {
00175                 list[id].Delete(i--);
00176                 changed = true;
00177             }
00178         }
00179 
00180         // The offspring genotype list allows for a wild-card untransmitted allele
00181         // so any single parental genotype is possible
00182         if (list[id].Matches(-1))
00183             continue;
00184 
00185         // Check if genotypes are consistent with offspring genotypes
00186         for (int i = 0; i < list[motid].allele1.Length(); i++)
00187         {
00188             int al1 = list[motid].allele1[i];
00189             int al2 = list[motid].allele2[i];
00190 
00191             // Remove genotypes incompatible with offspring genotypes
00192             if (!list[id].Matches(al1) &&
00193                     !list[id].Matches(al2))
00194             {
00195                 list[motid].Delete(i--);
00196                 changed = true;
00197             }
00198         }
00199 
00200         // Males don't affect genotype lists for their fathers
00201         if (maleX) continue;
00202 
00203         // Check if genotypes are consistent with offspring genotypes
00204         for (int i = 0; i < list[fatid].allele1.Length(); i++)
00205         {
00206             int al1 = list[fatid].allele1[i];
00207             int al2 = list[fatid].allele2[i];
00208 
00209             // Remove genotypes incompatible with offspring genotypes
00210             if (!list[id].Matches(al1) &&
00211                     !list[id].Matches(al2))
00212             {
00213                 list[fatid].Delete(i--);
00214                 changed = true;
00215             }
00216         }
00217 
00218 #ifdef DEBUG_ELIMINATOR
00219         printf("Done checking individual %s\n", (const char *) person.pid);
00220         Print(list, ped, family, 0);
00221 #endif
00222     }
00223 
00224     return changed;
00225 }
00226 
00227 
00228 bool GenotypeList::FamilyCheck(GenotypeList * list, Pedigree & ped, Family * family)
00229 {
00230 #ifdef DEBUG_ELIMINATOR
00231     printf("Checking Nuclear Families ...\n");
00232 #endif
00233 
00234     bool changed = false;
00235 
00236     for (int i = family->count - 1; i >= family->founders; i--)
00237     {
00238         Person & person = ped[family->path[i]];
00239 
00240         int fatid = person.father->traverse;
00241         int motid = person.mother->traverse;
00242 
00243         // Only go through the loop once per sibship
00244         if (person.sibs[0] != &person || list[fatid].ignore || list[motid].ignore)
00245             continue;
00246 
00247 #ifdef DEBUG_ELIMINATOR
00248         printf("Checking Sibship with %s ...\n", (const char *) person.pid);
00249 #endif
00250 
00251         // Reset checked genotypes for the mother, father and child
00252         list[fatid].checked = 0;
00253         list[motid].checked = 0;
00254 
00255         for (int i = 0; i < person.sibCount; i++)
00256             list[person.sibs[i]->traverse].checked = 0;
00257 
00258         // Go through each of the paternal genotypes
00259         changed |= TrimParent(list, person, fatid, motid);
00260 
00261         // Go through each of maternal genotypes
00262         changed |= TrimParent(list, person, motid, fatid);
00263 
00264         // Sort out the unchecked offspring genotypes ...
00265         for (int i = 0; i < person.sibCount; i++)
00266         {
00267             int sibid = person.sibs[i]->traverse;
00268             bool maleX = person.sibs[i]->sex == SEX_MALE && ped.chromosomeX;
00269 
00270             // For dealing with male X chromosomes, the pairwise check is all we need
00271             if (maleX) continue;
00272 
00273             for (int j = list[sibid].checked; j < list[sibid].allele1.Length(); j++)
00274                 changed |= Cleanup(list, person, motid, fatid, sibid, j);
00275         }
00276 
00277 #ifdef DEBUG_ELIMINATOR
00278 //      Print(list, ped, family, 0);
00279 #endif
00280     }
00281 
00282     return changed;
00283 }
00284 
00285 bool GenotypeList::Matches(int genotype, int allele)
00286 {
00287     return allele1[genotype] == allele || allele2[genotype] == allele;
00288 }
00289 
00290 bool GenotypeList::Matches(int allele)
00291 {
00292     return allele1.Find(allele) != -1 || allele2.Find(allele) != -1;
00293 }
00294 
00295 int GenotypeList::SaveGenotype(int genotype)
00296 {
00297     if (checked > genotype)
00298         return genotype;
00299 
00300     if (checked != genotype)
00301     {
00302         allele1.Swap(genotype, checked);
00303         allele2.Swap(genotype, checked);
00304     }
00305 
00306     return checked++;
00307 }
00308 
00309 bool GenotypeList::CheckTrio(GenotypeList * list, int fatid, int motid, int child,
00310                              int i, int j, int k)
00311 {
00312     // TODO: add tests for this code...
00313     return   (list[fatid].Matches(i, list[child].allele1[k]) &&
00314              (list[motid].Matches(j, list[child].allele2[k]) || list[child].allele2[k] == -1)) ||
00315              ((list[fatid].Matches(i, list[child].allele2[k]) || list[child].allele2[k] == -1) &&
00316              list[motid].Matches(j, list[child].allele1[k])) ||
00317              (list[child].allele1[k] == -1 && list[child].allele2[k] == -1);
00318 }
00319 
00320 void GenotypeList::Dimension(int genotypes)
00321 {
00322     allele1.Dimension(genotypes);
00323     allele2.Dimension(genotypes);
00324 }
00325 
00326 void GenotypeList::SetGenotype(int genotype, int al1, int al2)
00327 {
00328     allele1[genotype] = al1;
00329     allele2[genotype] = al2;
00330 }
00331 
00332 void GenotypeList::Delete(int genotype)
00333 {
00334     allele1.Delete(genotype);
00335     allele2.Delete(genotype);
00336 }
00337 
00338 bool GenotypeList::TrimParent(GenotypeList * list, Person & person, int motid, int fatid)
00339 {
00340     bool trimmed = false;
00341 
00342     while (list[motid].checked < list[motid].allele1.Length())
00343     {
00344         int  current = list[motid].allele1.Length() - 1;
00345         bool saved = false;
00346 
00347         // Pair it with each possible paternal genotype
00348         for (int i = list[fatid].allele1.Length() - 1; i >= 0; i--)
00349         {
00350             int matches = 0;
00351 
00352             // Find out if the pairing is compatible with at least one genotype for each child
00353             for (int j = 0; j < person.sibCount; j++)
00354             {
00355                 int sibid = person.sibs[j]->traverse;
00356                 int maleX = person.sibs[j]->sex == SEX_MALE && person.chromosomeX;
00357 
00358                 // Since we have done the pairwise check, there is nothing more
00359                 // to do for males ...
00360                 if (list[sibid].ignore || maleX)
00361                 {
00362                     matches++;
00363                     continue;
00364                 }
00365 
00366                 for (int k = list[sibid].allele1.Length() - 1; k >= 0; k--)
00367                     if (CheckTrio(list, motid, fatid, sibid, current, i, k))
00368                     {
00369                         matches++;
00370                         break;
00371                     }
00372 
00373                 if (matches != j + 1)
00374                     break;
00375             }
00376 
00377             // Save maternal and paternal genotypes, mark all compatible sibling genotypes
00378             if (matches == person.sibCount)
00379             {
00380                 for (int j = 0; j < person.sibCount; j++)
00381                 {
00382                     int sibid = person.sibs[j]->traverse;
00383 
00384                     for (int k = list[sibid].checked; k < list[sibid].allele1.Length(); k++)
00385                         if (CheckTrio(list, motid, fatid, sibid, current, i, k))
00386                             list[sibid].SaveGenotype(k);
00387                 }
00388 
00389                 list[motid].SaveGenotype(current);
00390                 list[fatid].SaveGenotype(i);
00391 
00392                 saved = true;
00393 
00394                 break;
00395             }
00396         }
00397 
00398         if (!saved)
00399         {
00400             list[motid].Delete(current);
00401             trimmed = true;
00402         }
00403     }
00404 
00405     return trimmed;
00406 }
00407 
00408 bool GenotypeList::Cleanup(GenotypeList * list, Person & person, int motid, int fatid, int child, int geno)
00409 {
00410     for (int current = 0; current < list[motid].allele1.Length(); current++)
00411         for (int i = list[fatid].allele1.Length() - 1; i >= 0; i--)
00412             if (CheckTrio(list, motid, fatid, child, current, i, geno))
00413             {
00414                 int matches = 0;
00415 
00416                 // Find out if the pairing is compatible with at least one genotype for each child
00417                 for (int j = 0; j < person.sibCount; j++)
00418                 {
00419                     int sibid = person.sibs[j]->traverse;
00420                     int maleX = person.sibs[j]->sex == SEX_MALE && person.chromosomeX;
00421 
00422                     // After completing the pairwise check, all males are guaranteed
00423                     // to be compatible with their mothers
00424                     if (list[sibid].ignore || maleX)
00425                     {
00426                         matches++;
00427                         continue;
00428                     }
00429 
00430                     for (int k = list[sibid].allele1.Length() - 1; k >= 0; k--)
00431                         if (CheckTrio(list, motid, fatid, sibid, current, i, k))
00432                         {
00433                             matches++;
00434                             break;
00435                         }
00436 
00437                     if (matches != j + 1)
00438                         break;
00439                 }
00440 
00441                 // Update list of compatible sibling genotypes
00442                 if (matches == person.sibCount)
00443                     for (int j = 0; j < person.sibCount; j++)
00444                     {
00445                         int sibid = person.sibs[j]->traverse;
00446 
00447                         for (int k = list[sibid].checked; k < list[sibid].allele1.Length(); k++)
00448                             if (CheckTrio(list, motid, fatid, sibid, current, i, k))
00449                                 list[sibid].SaveGenotype(k);
00450 
00451                         return false;
00452                     }
00453             }
00454 
00455     list[child].Delete(geno);
00456 
00457     return true;
00458 }
00459 
00460 void GenotypeList::Print(GenotypeList * list, Pedigree & ped, Family * family, int marker)
00461 {
00462     MarkerInfo * info = ped.GetMarkerInfo(marker);
00463 
00464     for (int i = 0; i < family->count; i++)
00465     {
00466         printf("%s - ", (const char *) ped[family->path[i]].pid);
00467 
00468         for (int j = 0; j < list[i].allele1.Length(); j++)
00469         {
00470             if (list[i].allele1[j] == -1)
00471                 printf("*/");
00472             else
00473                 printf("%s/", (const char *) info->GetAlleleLabel(list[i].allele1[j]));
00474 
00475             if (list[i].allele2[j] == -1)
00476                 printf("* ");
00477             else
00478                 printf("%s ", (const char *) info->GetAlleleLabel(list[i].allele2[j]));
00479         }
00480 
00481         printf("\n");
00482     }
00483     printf("\n");
00484 }
00485 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends