libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include "Pedigree.h" 00019 00020 void Pedigree::ShowTrimHeader(bool & flag) 00021 { 00022 if (flag) 00023 { 00024 printf("Trimming uninformative individuals...\n"); 00025 flag = false; 00026 } 00027 } 00028 00029 void Pedigree::Trim(bool quiet, int * informative) 00030 { 00031 int newCount = 0; 00032 Person ** newPersons = new Person * [count]; 00033 00034 // This function applies the following filters to reduce complexity 00035 // of pedigree 00036 // 00037 // RULE 1: Remove all pedigrees no genotype or phenotype data 00038 // RULE 2: Remove leaf individuals with no data 00039 // RULE 3: Remove founder couples with <2 offspring and no data 00040 00041 bool showHeader = true; 00042 IntArray discardable, offspring, mates, haveData; 00043 00044 for (int f = 0; f < familyCount; f++) 00045 { 00046 Family * fam = families[f]; 00047 00048 // Cache for storing indicators about whether each family member is 00049 // informative 00050 haveData.Dimension(fam->count); 00051 00052 // Check that some data is available in the family 00053 int hasData = false; 00054 for (int i = fam->first; i <= fam->last; i++) 00055 if (informative == NULL) 00056 hasData |= haveData[persons[i]->traverse] = persons[i]->haveData(); 00057 else 00058 hasData |= haveData[persons[i]->traverse] = informative[i]; 00059 00060 if (!hasData) 00061 { 00062 if (!quiet) 00063 { 00064 ShowTrimHeader(showHeader); 00065 printf(" Removing family %s: No data\n", (const char *) fam->famid); 00066 } 00067 00068 for (int i = fam->first; i <= fam->last; i++) 00069 delete persons[i]; 00070 00071 continue; 00072 } 00073 00074 // Assume that we need everyone in the family 00075 discardable.Dimension(fam->count); 00076 discardable.Set(0); 00077 00078 bool trimming = true; 00079 00080 while (trimming) 00081 { 00082 trimming = false; 00083 00084 // Tally the number of offspring for each individual 00085 offspring.Dimension(fam->count); 00086 offspring.Zero(); 00087 00088 // Tally the number of mates for each individual 00089 mates.Dimension(fam->count); 00090 mates.Set(-1); 00091 00092 // In the first round, we count the number of offspring 00093 // for each individual in the current trimmed version of the 00094 // pedigree 00095 for (int i = fam->count - 1; i >= fam->founders; i--) 00096 { 00097 if (discardable[i]) continue; 00098 00099 Person & p = *(persons[fam->path[i]]); 00100 00101 if (discardable[p.father->traverse]) 00102 continue; 00103 00104 if (offspring[i] == 0 && !haveData[p.traverse]) 00105 { 00106 trimming = true; 00107 discardable[i] = true; 00108 continue; 00109 } 00110 00111 int father = p.father->traverse; 00112 int mother = p.mother->traverse; 00113 00114 if (mates[father] == -1 && mates[mother] == -1) 00115 { 00116 mates[father] = mother, 00117 mates[mother] = father; 00118 } 00119 else if (mates[father] != mother) 00120 { 00121 if (mates[father] >= 0) 00122 mates[mates[father]] = -2; 00123 00124 if (mates[mother] >= 0) 00125 mates[mates[mother]] = -2; 00126 00127 mates[mother] = -2; 00128 mates[father] = -2; 00129 } 00130 00131 offspring[father]++; 00132 offspring[mother]++; 00133 } 00134 00135 // In the second pass, we remove individuals with no 00136 // data who are founders with a single offspring (and 00137 // no multiple matings) or who have no descendants 00138 for (int i = fam->count - 1; i >= 0; i--) 00139 { 00140 if (discardable[i]) continue; 00141 00142 Person & p = *(persons[fam->path[i]]); 00143 00144 if (p.isFounder() || discardable[p.father->traverse]) 00145 { 00146 if (mates[i] == -2 || 00147 offspring[i] > 1 || 00148 (mates[i] >= fam->founders && 00149 !discardable[persons[fam->path[mates[i]]]->father->traverse]) || 00150 haveData[p.traverse] || 00151 (mates[i] != -1 && haveData[mates[i]])) 00152 continue; 00153 00154 trimming = true; 00155 discardable[i] = true; 00156 continue; 00157 } 00158 } 00159 } 00160 00161 for (int i = fam->count - 1; i >= 0; i--) 00162 if (discardable[i]) 00163 { 00164 if (!quiet) 00165 { 00166 ShowTrimHeader(showHeader); 00167 printf(" Removing person %s->%s: No data\n", 00168 (const char *) fam->famid, 00169 (const char *) persons[fam->path[i]]->pid); 00170 } 00171 delete persons[fam->path[i]]; 00172 } 00173 else 00174 newPersons[newCount++] = persons[fam->path[i]]; 00175 } 00176 00177 if (!showHeader) 00178 printf("\n"); 00179 00180 delete [] persons; 00181 00182 persons = newPersons; 00183 count = newCount; 00184 Sort(); 00185 } 00186 00187