libStatGen Software  1
PedigreeTrim.cpp
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "Pedigree.h"
00019 
00020 void Pedigree::ShowTrimHeader(bool & flag)
00021 {
00022     if (flag)
00023     {
00024         printf("Trimming uninformative individuals...\n");
00025         flag = false;
00026     }
00027 }
00028 
00029 void Pedigree::Trim(bool quiet, int * informative)
00030 {
00031     int       newCount = 0;
00032     Person ** newPersons = new Person * [count];
00033 
00034     // This function applies the following filters to reduce complexity
00035     // of pedigree
00036     //
00037     // RULE 1:         Remove all pedigrees no genotype or phenotype data
00038     // RULE 2:         Remove leaf individuals with no data
00039     // RULE 3:         Remove founder couples with <2 offspring and no data
00040 
00041     bool     showHeader = true;
00042     IntArray discardable, offspring, mates, haveData;
00043 
00044     for (int f = 0; f < familyCount; f++)
00045     {
00046         Family * fam = families[f];
00047 
00048         // Cache for storing indicators about whether each family member is
00049         // informative
00050         haveData.Dimension(fam->count);
00051 
00052         // Check that some data is available in the family
00053         int hasData = false;
00054         for (int i = fam->first; i <= fam->last; i++)
00055             if (informative == NULL)
00056                 hasData |= haveData[persons[i]->traverse] = persons[i]->haveData();
00057             else
00058                 hasData |= haveData[persons[i]->traverse] = informative[i];
00059 
00060         if (!hasData)
00061         {
00062             if (!quiet)
00063             {
00064                 ShowTrimHeader(showHeader);
00065                 printf("   Removing family %s: No data\n", (const char *) fam->famid);
00066             }
00067 
00068             for (int i = fam->first; i <= fam->last; i++)
00069                 delete persons[i];
00070 
00071             continue;
00072         }
00073 
00074         // Assume that we need everyone in the family
00075         discardable.Dimension(fam->count);
00076         discardable.Set(0);
00077 
00078         bool trimming = true;
00079 
00080         while (trimming)
00081         {
00082             trimming = false;
00083 
00084             // Tally the number of offspring for each individual
00085             offspring.Dimension(fam->count);
00086             offspring.Zero();
00087 
00088             // Tally the number of mates for each individual
00089             mates.Dimension(fam->count);
00090             mates.Set(-1);
00091 
00092             // In the first round, we count the number of offspring
00093             // for each individual in the current trimmed version of the
00094             // pedigree
00095             for (int i = fam->count - 1; i >= fam->founders; i--)
00096             {
00097                 if (discardable[i]) continue;
00098 
00099                 Person & p = *(persons[fam->path[i]]);
00100 
00101                 if (discardable[p.father->traverse])
00102                     continue;
00103 
00104                 if (offspring[i] == 0 && !haveData[p.traverse])
00105                 {
00106                     trimming = true;
00107                     discardable[i] = true;
00108                     continue;
00109                 }
00110 
00111                 int father = p.father->traverse;
00112                 int mother = p.mother->traverse;
00113 
00114                 if (mates[father] == -1 && mates[mother] == -1)
00115                 {
00116                     mates[father] = mother,
00117                                     mates[mother] = father;
00118                 }
00119                 else if (mates[father] != mother)
00120                 {
00121                     if (mates[father] >= 0)
00122                         mates[mates[father]] = -2;
00123 
00124                     if (mates[mother] >= 0)
00125                         mates[mates[mother]] = -2;
00126 
00127                     mates[mother] = -2;
00128                     mates[father] = -2;
00129                 }
00130 
00131                 offspring[father]++;
00132                 offspring[mother]++;
00133             }
00134 
00135             // In the second pass, we remove individuals with no
00136             // data who are founders with a single offspring (and
00137             // no multiple matings) or who have no descendants
00138             for (int i = fam->count - 1; i >= 0; i--)
00139             {
00140                 if (discardable[i]) continue;
00141 
00142                 Person & p = *(persons[fam->path[i]]);
00143 
00144                 if (p.isFounder() || discardable[p.father->traverse])
00145                 {
00146                     if (mates[i] == -2 ||
00147                             offspring[i] > 1 ||
00148                             (mates[i] >= fam->founders &&
00149                             !discardable[persons[fam->path[mates[i]]]->father->traverse]) ||
00150                             haveData[p.traverse] ||
00151                             (mates[i] != -1 && haveData[mates[i]]))
00152                         continue;
00153 
00154                     trimming = true;
00155                     discardable[i] = true;
00156                     continue;
00157                 }
00158             }
00159         }
00160 
00161         for (int i = fam->count - 1; i >= 0; i--)
00162             if (discardable[i])
00163             {
00164                 if (!quiet)
00165                 {
00166                     ShowTrimHeader(showHeader);
00167                     printf("   Removing person %s->%s: No data\n",
00168                            (const char *) fam->famid,
00169                            (const char *) persons[fam->path[i]]->pid);
00170                 }
00171                 delete persons[fam->path[i]];
00172             }
00173             else
00174                 newPersons[newCount++] = persons[fam->path[i]];
00175     }
00176 
00177     if (!showHeader)
00178         printf("\n");
00179 
00180     delete [] persons;
00181 
00182     persons = newPersons;
00183     count = newCount;
00184     Sort();
00185 }
00186 
00187 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends