libStatGen Software  1
PedigreeFamily.cpp
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "Pedigree.h"
00019 #include "Constant.h"
00020 #include "MathConstant.h"
00021 #include "Error.h"
00022 
00023 #include <stdlib.h>
00024 #include <ctype.h>
00025 #include <string.h>
00026 #include <limits.h>
00027 
00028 Family::Family(Pedigree & pedigree, int _first, int _last, int _serial) :
00029         ped(pedigree)
00030 {
00031     serial = _serial;
00032     first = _first;
00033     last = _last;
00034     count = last - first + 1;
00035     path = new int [count];
00036     famid = ped[first].famid;
00037 
00038     founders = mzTwins = 0;
00039 
00040     for (int i=first; i<=last; i++)
00041         if (ped[i].isFounder())
00042         {
00043             ped[i].traverse = founders;
00044             path[founders++] = ped[i].serial;
00045         }
00046         else
00047         {
00048             ped[i].traverse = -1;
00049             if (ped[i].isMzTwin(ped[i]))
00050                 for (int j = first; j < i; j++)
00051                     if (ped[i].isMzTwin(ped[j]))
00052                     {
00053                         mzTwins++;
00054                         break;
00055                     }
00056         }
00057 
00058     nonFounders = count - founders;
00059     generations = nonFounders == 0 ? 1 : 2;
00060 
00061     int next = founders;
00062     while (next < count)
00063     {
00064         bool check = false;
00065 
00066         // Create traversal where path ancestors precede their offspring
00067         for (int i=first; i<=last; i++)
00068             if (ped[i].traverse == -1)
00069             {
00070                 int fatherSerial = ped[i].father->traverse;
00071                 int motherSerial = ped[i].mother->traverse;
00072 
00073                 if (fatherSerial >= 0 && motherSerial >= 0)
00074                 {
00075                     check = true;
00076 
00077                     ped[i].traverse = next;
00078                     path[next++] = i;
00079 
00080                     if (fatherSerial >= founders || motherSerial >= founders)
00081                         generations = 3;
00082 
00083                     // If this individual is part of a set of MZ twins
00084                     if (ped[i].zygosity & 1)
00085                         for (int j = 0; j < ped[i].sibCount; j++)
00086                         {
00087                             Person & sib = *ped[i].sibs[j];
00088 
00089                             // Insert all co-twins at the same position in traversal
00090                             // order
00091                             if (sib.traverse == -1 && ped[i].zygosity == sib.zygosity)
00092                             {
00093                                 sib.traverse = next;
00094                                 path[next++] = sib.serial;
00095                             }
00096                         }
00097                 }
00098             }
00099 
00100         if (!check) ShowInvalidCycles();
00101     }
00102 }
00103 
00104 Family::~Family()
00105 {
00106     delete [] path;
00107 }
00108 
00109 void Family::ShowInvalidCycles()
00110 {
00111     // Try and identify key individuals responsible for
00112     // pedigree mess-up ... when this function is called
00113     // pedigree has been traversed top-down and individuals
00114     // that are correctly specified have IDs of >= 0.
00115 
00116     // This routine traverses the pedigree bottom up to
00117     // identify a subset of individuals likely to be causing
00118     // the problem
00119     IntArray descendants(ped.count);
00120     descendants.Zero();
00121 
00122     for (int i = first; i <= last; i++)
00123         if (ped[i].traverse == -1)
00124         {
00125             descendants[ped[i].father->serial]++;
00126             descendants[ped[i].mother->serial]++;
00127         }
00128 
00129     IntArray stack;
00130 
00131     for (int i = first; i <= last; i++)
00132         if (ped[i].traverse == -1 && descendants[i] == 0)
00133         {
00134             stack.Push(i);
00135 
00136             do
00137             {
00138                 int j = stack.Pop();
00139 
00140                 if (ped[j].traverse != -1) continue;
00141 
00142                 ped[j].traverse = 9999;
00143 
00144                 if (--descendants[ped[j].father->serial] == 0)
00145                     stack.Push(ped[j].father->serial);
00146                 if (--descendants[ped[j].mother->serial] == 0)
00147                     stack.Push(ped[j].mother->serial);
00148             }
00149             while (stack.Length());
00150         }
00151 
00152     printf("The structure of family %s requires\n"
00153            "an individual to be his own ancestor.\n\n"
00154            "To identify the problem(s), examine the\n"
00155            "following key individuals:\n\n",
00156            (const char *) famid);
00157 
00158     for (int i = first; i <= last; i++)
00159         if (ped[i].traverse == -1)
00160             printf("Problem Person: %s\n", (const char *) ped[i].pid);
00161 
00162     error("Invalid pedigree structure.");
00163 }
00164 
00165 int Family::ConnectedGroups(IntArray * groupMembership)
00166 {
00167     IntArray groups(count);
00168 
00169     // Use the quick union algorithm to identify connected groups
00170     groups.SetSequence(0, 1);
00171     for (int i = count - 1; i >= founders; i--)
00172     {
00173         // Lookup parents
00174         int group0 = i;
00175         int group1 = ped[path[i]].father->traverse;
00176         int group2 = ped[path[i]].mother->traverse;
00177 
00178         // Identify their corresponding groupings
00179         while (groups[group0] != group0) group0 = groups[group0];
00180         while (groups[group1] != group1) group1 = groups[group1];
00181         while (groups[group2] != group2) group2 = groups[group2];
00182 
00183         int group = group1 < group2 ? group1 : group2;
00184         if (group0 < group) group = group0;
00185 
00186         groups[group0] = groups[group1] = groups[group2] = group;
00187     }
00188 
00189     // Count groupings
00190     int groupCount = 0;
00191     for (int i = 0; i < founders; i++)
00192         if (groups[i] == i)
00193             groupCount++;
00194 
00195     if (groupMembership == NULL)
00196         return groupCount;
00197 
00198     // Flatten tree so all items point to root
00199     for (int i = 1; i < count; i++)
00200         groups[i] = groups[groups[i]];
00201 
00202     // Update group membership info
00203     int group = 0;
00204     groupMembership->Dimension(count);
00205     for (int i = 0; i < count; i++)
00206         if (groups[i] == i)
00207             (*groupMembership)[i] = ++group;
00208         else
00209             (*groupMembership)[i] = (*groupMembership)[groups[i]];
00210 
00211 #if 0
00212     // This stretch of code outputs family structure and group membership
00213     // And should usually be commented out!
00214     for (int j = first; j <= last; j++)
00215         printf("%s %s %s %s %d %d\n",
00216                (const char *) famid, (const char *) ped[j].pid,
00217                (const char *) ped[j].fatid, (const char *) ped[j].motid,
00218                ped[j].sex, groups[ped[j].traverse]);
00219 #endif
00220 
00221     return groupCount;
00222 }
00223 
00224 /*
00225 int Family::ConnectedGroups(IntArray * groupMembership)
00226    {
00227    IntArray * stack = new IntArray[count];
00228    IntArray groups(count);
00229 
00230    groups.Zero();
00231 
00232    int group = 0;
00233    int seed = count - 1;
00234 
00235    // Search for connected sets of individuals until everyone is accounted for
00236    while (true)
00237       {
00238       while ((seed >= 0) && (groups[seed] != 0))
00239          seed--;
00240 
00241       if (seed == -1)
00242          break;
00243 
00244       Mark(seed, ++group, stack, groups);
00245 
00246       for (int j = seed; j >= founders; j--)
00247          if (groups[j] == 0)
00248             {
00249             int fat_j = ped[path[j]].father->traverse;
00250             int mot_j = ped[path[j]].mother->traverse;
00251 
00252             if (groups[fat_j] == group || groups[mot_j] == group)
00253                Mark(j, group, stack, groups);
00254             else
00255                stack[mot_j].Push(j),
00256                stack[fat_j].Push(j);
00257             }
00258 
00259       for (int j = 0; j < count; j++)
00260          stack[j].Clear();
00261       }
00262 
00263    if (groupMembership != NULL)
00264       (*groupMembership) = groups;
00265 
00266    // This stretch of code outputs family structure and group membership
00267    // And should usually be commented out!
00268 #if 0
00269    for (int j = first; j <= last; j++)
00270       printf("%s %s %s %s %d %d\n",
00271          (const char *) famid, (const char *) ped[j].pid,
00272          (const char *) ped[j].fatid, (const char *) ped[j].motid,
00273          ped[j].sex, groups[ped[j].traverse]);
00274 #endif
00275 
00276    delete [] stack;
00277 
00278    return group;
00279    }
00280 
00281 void Family::Mark(int j, int group, IntArray * stack, IntArray & groups)
00282    {
00283    if (groups[j] == group) return;
00284 
00285    groups[j] = group;
00286 
00287    while (stack[j].Length())
00288       Mark(stack[j].Pop(), group, stack, groups);
00289 
00290    if (j < founders) return;
00291 
00292    Mark(ped[path[j]].father->traverse, group, stack, groups);
00293    Mark(ped[path[j]].mother->traverse, group, stack, groups);
00294    }
00295 */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends