libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include "PedigreeDescription.h" 00019 #include "MapFunction.h" 00020 #include "MathVector.h" 00021 #include "Constant.h" 00022 #include "FortranFormat.h" 00023 #include "Error.h" 00024 00025 #include <stdlib.h> 00026 #include <ctype.h> 00027 #include <string.h> 00028 #include <math.h> 00029 00030 PedigreeDescription::PedigreeDescription() 00031 { 00032 columnCount = 0; 00033 mendelFormat = false; 00034 } 00035 00036 PedigreeDescription::~PedigreeDescription() 00037 { }; 00038 00039 PedigreeDescription & PedigreeDescription::operator = (PedigreeDescription & rhs) 00040 { 00041 columnCount = rhs.columnCount; 00042 00043 columns = rhs.columns; 00044 columnHash = rhs.columnHash; 00045 00046 return *this; 00047 }; 00048 00049 void PedigreeDescription::Load(IFILE & input, bool warnIfLinkage) 00050 { 00051 // Check if we are dealing with a linkage format data file 00052 String buffer; 00053 StringArray tokens; 00054 00055 mendelFormat = false; 00056 00057 ReadLineHelper(input, buffer, tokens); 00058 ifrewind(input); 00059 00060 if (tokens.Length() == 4 && isdigit(tokens[0][0])) 00061 { 00062 if (warnIfLinkage) printf("Data file looks like a LINKAGE format file...\n\n"); 00063 LoadLinkageDataFile(input); 00064 return; 00065 } 00066 00067 if (buffer.Length() > 18 00068 && (buffer.SubStr(8,8).SlowCompare("AUTOSOME") == 0 || 00069 buffer.SubStr(8,8).SlowCompare("X-LINKED") == 0) 00070 && (isdigit(buffer[16]) || isdigit(buffer[17])) 00071 && (isdigit(buffer[18]) || isdigit(buffer[19]) || 00072 (buffer.Length() > 19 && isdigit(buffer[20])))) 00073 { 00074 printf("Data file looks like a MENDEL format file...\n" 00075 " Activating EXPERIMENTAL support for this format\n\n"); 00076 LoadMendelDataFile(input); 00077 return; 00078 } 00079 00080 // Reset things 00081 ifrewind(input); 00082 int done = 0; 00083 int line = 0; 00084 00085 columns.Clear(); 00086 columnHash.Clear(); 00087 columnCount = 0; 00088 00089 while (!ifeof(input) && !done) 00090 { 00091 int i; 00092 00093 buffer.ReadLine(input); 00094 line++; 00095 00096 tokens.Clear(); 00097 tokens.AddTokens(buffer, WHITESPACE); 00098 00099 if (tokens.Length() < 1) continue; 00100 00101 if (tokens.Length() == 1) 00102 error("Problem reading data file:\n" 00103 "Item #%d (of type %s) has no name.", 00104 columnCount+1, (const char *) tokens[0]); 00105 00106 switch (toupper(tokens[0][0])) 00107 { 00108 case 'A' : 00109 columnHash.Push(GetAffectionID(tokens[1])); 00110 columns.Push(pcAffection); 00111 columnCount++; 00112 break; 00113 case 'M' : 00114 columnHash.Push(GetMarkerID(tokens[1])); 00115 columns.Push(pcMarker); 00116 columnCount++; 00117 break; 00118 case 'T' : 00119 columnHash.Push(GetTraitID(tokens[1])); 00120 columns.Push(pcTrait); 00121 columnCount++; 00122 break; 00123 case 'C' : 00124 columnHash.Push(GetCovariateID(tokens[1])); 00125 columns.Push(pcCovariate); 00126 columnCount++; 00127 break; 00128 case '$' : 00129 columnHash.Push(GetStringID(tokens[1])); 00130 columns.Push(pcString); 00131 columnCount++; 00132 break; 00133 case 'S' : 00134 i = (int) tokens[0].SubStr(1); 00135 i = i > 0 ? i : 1; 00136 while (i--) 00137 { 00138 columns.Push(pcSkip); 00139 columnHash.Push(0); 00140 columnCount++; 00141 } 00142 break; 00143 case 'Z' : 00144 columnHash.Push(0); 00145 columns.Push(pcZygosity); 00146 columnCount++; 00147 break; 00148 case 'V' : 00149 GetMarkerID(tokens[1]); 00150 break; 00151 case 'E' : 00152 done = 1; 00153 break; 00154 case 'U' : 00155 if (toupper(tokens[0][1]) == 'T' && toupper(tokens[0][2]) == 'C') 00156 { 00157 int c = GetCovariateID(tokens[1]); 00158 int t = GetTraitID(tokens[1]); 00159 00160 if (c >= 32767 || t >= 32767) 00161 error("Internal error processing data file\n"); 00162 00163 columnHash.Push(t * 32768 + c); 00164 columns.Push(pcUndocumentedTraitCovariate); 00165 columnCount++; 00166 break; 00167 } 00168 default : 00169 error("Problem in data file (line %d):\n%s\n", 00170 line, (const char *) buffer); 00171 } 00172 } 00173 00174 columns.Push(pcEnd); 00175 columnHash.Push(0); 00176 }; 00177 00178 void PedigreeDescription::Load(const char * iFilename, bool warnIfLinkage) 00179 { 00180 IFILE f = ifopen(iFilename, "rb"); 00181 00182 if (f == NULL) 00183 error( 00184 "The datafile %s cannot be opened\n\n" 00185 "Common causes for this problem are:\n" 00186 " * You might not have used the correct options to specify input file names,\n" 00187 " please check the program documentation for information on how to do this\n\n" 00188 " * The file doesn't exist or the filename might have been misspelt\n\n" 00189 " * The file exists but it is being used by another program which you will need\n" 00190 " to close before continuing\n\n" 00191 " * The file is larger than 2GB and you haven't compiled this application with\n" 00192 " large file support.\n\n", 00193 iFilename); 00194 00195 Load(f, warnIfLinkage); 00196 ifclose(f); 00197 00198 filename = iFilename; 00199 }; 00200 00201 void PedigreeDescription::LoadMap(const char * iFilename) 00202 { 00203 IFILE f = ifopen(iFilename, "rb"); 00204 00205 if (f == NULL) 00206 error( 00207 "The mapfile %s cannot be opened\n\n" 00208 "Please check that the file exists and is not being used by another program\n" 00209 "To find out how to set input filenames, check the documentation\n", 00210 iFilename); 00211 00212 LoadMap(f); 00213 ifclose(f); 00214 }; 00215 00216 void PedigreeDescription::LoadMap(IFILE & input) 00217 { 00218 columns.Clear(); 00219 columnHash.Clear(); 00220 columnCount = 0; 00221 00222 int lastposition = 0; 00223 String buffer; 00224 StringArray tokens; 00225 00226 buffer.ReadLine(input); 00227 tokens.AddTokens(buffer, WHITESPACE); 00228 00229 while (tokens.Length() == 0 && !ifeof(input)) 00230 { 00231 buffer.ReadLine(input); 00232 tokens.AddTokens(buffer, WHITESPACE); 00233 } 00234 00235 if (tokens.Length() != 3) 00236 error("Error reading map file header, which has %d columns.\n" 00237 "Three columns were expected, corresponding to\n" 00238 "MARKER_ID, MARKER_NAME and BASE_PAIR_POSITION\n" 00239 "The offending header is transcribed below:\n\n" 00240 "%s", tokens.Length(), (const char *) buffer); 00241 else 00242 printf("Map file column labels\n" 00243 " -- COLUMN 1, Expecting MARKER_ID, Read %s\n" 00244 " -- COLUMN 2, Expecting MARKER_NAME, Read %s\n" 00245 " -- COLUMN 3, Expection BASE_PAIR_POSITION, Read %s\n\n", 00246 (const char *)(tokens[0]), (const char *)(tokens[1]), 00247 (const char *)(tokens[2])); 00248 00249 int line = 1; 00250 while (!ifeof(input)) 00251 { 00252 int serial; 00253 long position; 00254 00255 buffer.ReadLine(input); 00256 line++; 00257 00258 tokens.Clear(); 00259 tokens.AddTokens(buffer, WHITESPACE); 00260 00261 if (tokens.Length() < 1) continue; 00262 if (tokens.Length() != 3) 00263 error("Each line in the map file should have 3 tokens, corresponding\n" 00264 "to MARKER_ID, MARKER_NAME and BASE_PAIR_POSITION respectively\n" 00265 "However, there are %d tokens in line %d, transcribed below:\n\n" 00266 "%s", tokens.Length(), line, (const char *) buffer); 00267 00268 serial = (int) tokens[0]; 00269 if (serial != columnCount + 1) 00270 error("Reading Marker Index from Map File...\n" 00271 "Markers should be indexed consecutively starting at 1\n" 00272 "Marker %d does not fit this pattern\n", columnCount + 1); 00273 00274 position = (int) tokens[2]; 00275 if (position < lastposition) 00276 error("Reading Marker Position from Map File...\n" 00277 "Marker position should be in base-pairs\n" 00278 "and markers should be in map order\n"); 00279 00280 // TODO -- store marker locations somewhere! 00281 lastposition = position; 00282 00283 columnHash.Push(GetMarkerID(tokens[1])); 00284 columns.Push(pcMarker); 00285 columnCount++; 00286 00287 GetMarkerInfo(tokens[1])->position = position * 1e-8; 00288 } 00289 00290 columns.Push(pcEnd); 00291 columnHash.Push(0); 00292 }; 00293 00294 int PedigreeDescription::CountTextColumns() 00295 { 00296 int count = 0; 00297 00298 for (int i = 0; i < columnCount; i++, count++) 00299 if (columns[i] == pcMarker) 00300 count++; 00301 00302 return count; 00303 } 00304 00305 void PedigreeDescription::LoadLinkageDataFile(const char * iFilename) 00306 { 00307 IFILE f = ifopen(iFilename, "rb"); 00308 00309 if (f == NULL) 00310 error( 00311 "The linkage format datafile %s cannot be opened\n\n" 00312 "Please check that the file exists and is not being used by another program\n" 00313 "To find out how to set input filenames, check the documentation\n", 00314 iFilename); 00315 00316 LoadLinkageDataFile(f); 00317 ifclose(f); 00318 00319 filename = iFilename; 00320 }; 00321 00322 void PedigreeDescription::LoadLinkageDataFile(IFILE & input) 00323 { 00324 columns.Clear(); 00325 columnHash.Clear(); 00326 columnCount = 0; 00327 00328 String buffer, label; 00329 StringArray tokens; 00330 00331 ReadLineHelper(input, buffer, tokens); 00332 00333 if (tokens.Length() != 4 || tokens[2].AsInteger() != (int) chromosomeX || 00334 tokens[0].AsInteger() < 0) 00335 error("Cannot handle first line of data file\n\n" 00336 "Expecting four (4) numeric values, which correspond to:\n" 00337 " num-loci -- number of loci in the pedigree\n" 00338 " this value must be positive\n" 00339 " risk-locus -- locus for which risks should be calculated\n" 00340 " this value will be ignored\n" 00341 " sex-link -- are the loci sex linked [0 - No, 1 - Yes]\n" 00342 " %s\n" 00343 " program -- which LINKAGE program do you want to use?\n" 00344 " this value will also be ignored\n\n" 00345 "The actual input read:\n%s\n", 00346 chromosomeX ? "expecting X-linked data, so this value must be ONE (1)" 00347 : "expecting autosomal data, so this must be ZERO (0)", 00348 (const char *) buffer); 00349 00350 int numloci = tokens[0]; 00351 00352 ReadLineHelper(input, buffer, tokens); 00353 00354 if (tokens.Length() != 4 || 00355 tokens[0].AsInteger() != 0 || 00356 tokens[3].AsInteger() != 0) 00357 error("Cannot handle second line of data file\n\n" 00358 "Expecting four (4) numeric values, which correspond to:\n" 00359 " mutation-model -- must be zero, corresponding to no mutation\n" 00360 " male-mutation-rate -- ignored\n" 00361 " female-mutation-rate -- ignored\n" 00362 " linkage-disequilibrium -- must be zero, may be used in the future to\n" 00363 " read haplotype frequencies\n\n" 00364 "The actual input read:\n%s\n", (const char *) buffer); 00365 00366 StringArray markerOrder; 00367 int unknown = 0; 00368 00369 ReadLineHelper(input, buffer, markerOrder); 00370 00371 if (markerOrder.Length() > numloci) 00372 error("The third line of the data file lists marker order\n\n" 00373 "Although %d loci are defined [in the first line],\n" 00374 "this line includes %d values:\n%s\n", 00375 numloci, markerOrder.Length(), (const char *) buffer); 00376 00377 IntArray locus; 00378 bool need_blank_line = false; 00379 00380 while (!ifeof(input) && numloci--) 00381 { 00382 if (ReadLineHelper(input, buffer, tokens) == 0) 00383 error("Linkage data file ends unexpectedly"); 00384 00385 if (tokens.Length() < 2) 00386 error("Incomplete locus information in data file\n" 00387 "Information for each locus should include 2 or more fiels\n" 00388 "The expected fields are:\n" 00389 " field_type -- indicator of locus type (trait, marker,...)\n" 00390 " alleles -- number of alleles\n" 00391 " name -- locus name, preceded by hash (#) sign\n\n" 00392 "The actual input read:\n%s\n", (const char *) buffer); 00393 00394 int locus_type = (int) tokens[0]; 00395 int alleles = (int) tokens[1]; 00396 00397 String locus_name("LOCUS"); 00398 locus_name += ++unknown; 00399 00400 if (tokens.Length() > 2 && tokens[2][0] == '#') 00401 { 00402 if (tokens[2][1] != 0) 00403 locus_name = tokens[2].SubStr(1); 00404 else if (tokens.Length() > 3) 00405 locus_name = tokens[3]; 00406 } 00407 00408 if ((locus_type == 4 && alleles == 0) || 00409 (locus_type == 4 && alleles == 1)) 00410 { 00411 columnHash.Push(GetCovariateID(locus_name)); 00412 columns.Push(pcCovariate); 00413 columnCount++; 00414 continue; 00415 } 00416 00417 if (locus_type == 0 && alleles == 0) 00418 { 00419 columnHash.Push(GetTraitID(locus_name)); 00420 columns.Push(pcTrait); 00421 columnCount++; 00422 continue; 00423 } 00424 00425 if (ReadLineHelper(input, buffer, tokens) != alleles) 00426 error("Expecting %d allele frequencies, but input has %d columns:\n" 00427 "%s\n", alleles, tokens.Length(), (const char *) buffer); 00428 00429 Vector frequencies(alleles + 1); 00430 00431 frequencies[0] = 0.0; 00432 for (int i = 1; i <= alleles; i++) 00433 frequencies[i] = (double) tokens[i - 1]; 00434 00435 double sum = frequencies.Sum(); 00436 00437 if (sum <= 0.0) 00438 error("Allele frequencies at %s sum to %f, which doesn't make sense\n", 00439 (const char *) locus_name, sum); 00440 00441 if (fabs(sum - 1.0) > 1.2e-5) 00442 { 00443 printf("Allele frequencies at %s sum to %f, adjusted to 1.0\n", 00444 (const char *) locus_name, sum); 00445 need_blank_line = true; 00446 } 00447 00448 if (sum != 1.0) 00449 frequencies *= 1.0 / sum; 00450 00451 switch (locus_type) 00452 { 00453 case 1 : 00454 { 00455 // Affection 00456 columnHash.Push(GetAffectionID(locus_name)); 00457 columns.Push(pcAffection); 00458 columnCount++; 00459 00460 // Read number of liability classes 00461 if (ReadLineHelper(input, buffer, tokens) == 0) 00462 error("Linkage data file ends unexpectedly\n"); 00463 00464 // Skip liability class data 00465 int classes = tokens[0]; 00466 if (classes > 1) 00467 { 00468 columnHash.Push(0); 00469 columns.Push(pcSkip); 00470 columnCount++; 00471 } 00472 00473 // Separate liability class rows for males and females for X-linked data 00474 if (chromosomeX) classes *= 2; 00475 00476 while (classes--) 00477 if (ReadLineHelper(input, buffer, tokens) == 0) 00478 error("Linkage data file ends unexpectedly\n"); 00479 00480 // Ignore map location for quantitative variables 00481 locus.Push(-1); 00482 } 00483 break; 00484 case 3 : 00485 { 00486 columnHash.Push(GetMarkerID(locus_name)); 00487 columns.Push(pcMarker); 00488 columnCount++; 00489 00490 // Store allele frequencies 00491 MarkerInfo * info = GetMarkerInfo(locus_name); 00492 00493 info->freq = frequencies; 00494 00495 // Initialize allele labels 00496 info->alleleLabels.Clear(); 00497 for (int i = 0; i < frequencies.Length(); i++) 00498 info->alleleLabels.Push(label = i); 00499 info->IndexAlleles(); 00500 00501 // Store marker id, so that we can track map location 00502 locus.Push(GetMarkerID(locus_name)); 00503 } 00504 break; 00505 case 0 : 00506 { 00507 // Read number of quantitative variables 00508 if (ReadLineHelper(input, buffer, tokens) == 0) 00509 error("Linkage data file ends unexpectedly\n"); 00510 00511 // Add each quantitative variable to pedigree 00512 // Discard information on means 00513 for (int vars = tokens[0], i = 0; i < vars; i++) 00514 { 00515 if (ReadLineHelper(input, buffer, tokens) == 0) 00516 error("Linkage data file ends unexpectedly\n"); 00517 00518 String trait_name(locus_name); 00519 00520 if (i) 00521 { 00522 trait_name += "."; 00523 trait_name += i + 1; 00524 } 00525 00526 columnHash.Push(GetTraitID(trait_name)); 00527 columns.Push(pcTrait); 00528 columnCount++; 00529 } 00530 00531 // Skip var-covar matrix 00532 if (ReadLineHelper(input, buffer, tokens) == 0) 00533 error("Linkage data file ends unexpectedly\n"); 00534 00535 // Skip heterozygote scaling factor for var-covar matrix 00536 if (ReadLineHelper(input, buffer, tokens) == 0) 00537 error("Linkage data file ends unexpectedly\n"); 00538 00539 // Ignore map location for quantitative variables 00540 locus.Push(-1); 00541 } 00542 break; 00543 case 2 : 00544 error("The data file includes binary factors\n" 00545 "Regretably, loci of this type are not supported\n\n"); 00546 break; 00547 default : 00548 error("Unsupported locus type [%d] in data file", locus_type); 00549 break; 00550 } 00551 } 00552 00553 if (need_blank_line) printf("\n"); 00554 00555 columns.Push(pcEnd); 00556 columnHash.Push(0); 00557 00558 ReadLineHelper(input, buffer, tokens); 00559 int sexDifference = tokens.Length() ? tokens[0].AsInteger() : -1; 00560 if (tokens.Length() != 2 || 00561 (sexDifference != 0 && sexDifference != 2) || 00562 tokens[1].AsInteger() != 0) 00563 error("Error retrieving recombination information\n\n" 00564 "Expecting two (2) numeric values, which correspond to:\n" 00565 " sex-difference -- must be zero (no difference) or two (sex specific recombination)\n" 00566 " map-function -- must be zero, that is, no interference\n" 00567 "The actual input read:\n%s\n", (const char *) buffer); 00568 00569 Vector distances[2]; 00570 bool distance_in_centimorgans = false; 00571 00572 for (int r = 0; r <= sexDifference; r += 2) 00573 { 00574 ReadLineHelper(input, buffer, tokens); 00575 if (tokens.Length() != markerOrder.Length() - 1) 00576 error("Error retrieving recombination information\n\n" 00577 "Expecting %d recombination fractions (current map includes %d loci)\n" 00578 "Instead the following line was input:\n%s\n", 00579 markerOrder.Length() - 1, markerOrder.Length(), (const char *) buffer); 00580 00581 distances[r >> 1].Dimension(tokens.Length()); 00582 for (int i = 0; i < tokens.Length(); i++) 00583 distances[r >> 1][i] = (double) tokens[i]; 00584 00585 if (distances[r >> 1].Min() < 0.0) 00586 error("Linkage datafile specifies negative recombination fractions"); 00587 00588 bool centimorgans = distances[r >> 1].Max() > 0.5; 00589 00590 if (centimorgans && !distance_in_centimorgans) 00591 printf(" Some recombination fractions in datafile are greater than 0.5,\n" 00592 " so recombination fractions will be interpreted as cM distances\n\n"); 00593 00594 distance_in_centimorgans |= centimorgans; 00595 } 00596 00597 double position = 0.0, positionMale = 0.0; 00598 00599 for (int i = 0, moving = false; i < markerOrder.Length(); i++) 00600 { 00601 int m = markerOrder[i].AsInteger() - 1; 00602 00603 if (m < 0 || m >= locus.Length()) 00604 error("The marker order in the linkage datafile is invalid\n"); 00605 00606 m = locus[m]; 00607 00608 if (m != -1) 00609 { 00610 MarkerInfo * info = GetMarkerInfo(m); 00611 info->chromosome = chromosomeX ? 9999 : 0; 00612 00613 if (sexDifference == 2) 00614 info->position = (position + positionMale) * 0.5, 00615 info->positionFemale = position, 00616 info->positionMale = positionMale; 00617 else 00618 info->position = info->positionMale = info->positionFemale = position; 00619 00620 moving = true; 00621 } 00622 00623 if (i < markerOrder.Length() - 1 && moving) 00624 position += distance_in_centimorgans ? 00625 0.01 * distances[0][i] : RecombinationToDistance(distances[0][i]); 00626 00627 if (sexDifference == 2 && i < markerOrder.Length() - 1 && moving) 00628 positionMale += distance_in_centimorgans ? 00629 0.01 * distances[1][i] : RecombinationToDistance(distances[1][i]); 00630 } 00631 } 00632 00633 int PedigreeDescription::ReadLineHelper(IFILE & input, 00634 String & buffer, 00635 StringArray & tokens) 00636 { 00637 do 00638 { 00639 // Read Line 00640 buffer.ReadLine(input); 00641 buffer.Trim(); 00642 00643 // Strip comments marked with >> 00644 int pos = buffer.FastFind(">>"); 00645 if (pos == -1) pos = buffer.FastFind("<<"); 00646 if (pos == -1) pos = buffer.Length() + 1; 00647 if (buffer[0] == '#') pos = 0; 00648 00649 // Find space/tab delimited tokens 00650 tokens.Clear(); 00651 tokens.AddTokens(buffer.Left(pos - 1), WHITESPACE); 00652 00653 } 00654 while (tokens.Length() == 0 && !ifeof(input)); 00655 00656 return tokens.Length(); 00657 } 00658 00659 void PedigreeDescription::LoadMendelDataFile(const char * iFilename) 00660 { 00661 IFILE f = ifopen(iFilename, "rb"); 00662 00663 if (f == NULL) 00664 error( 00665 "The MENDEL format datafile %s cannot be opened\n\n" 00666 "Please check that the file exists and is not being used by another program\n" 00667 "To find out how to set input filenames, check the documentation\n", 00668 iFilename); 00669 00670 LoadMendelDataFile(f); 00671 ifclose(f); 00672 }; 00673 00674 void PedigreeDescription::LoadMendelDataFile(IFILE & file) 00675 { 00676 // Processes mendel format file 00677 mendelFormat = true; 00678 00679 // Codominant markers are mapped to markers 00680 // Non-codominant markers are mapped into multiple "affection status" 00681 // (Y/N) variables 00682 columns.Clear(); 00683 columnHash.Clear(); 00684 columnCount = 0; 00685 00686 FortranFormat parser; 00687 00688 // Variables for storing parsed input 00689 String locusName; 00690 String locusType; 00691 String alleleLabel; 00692 String alleleFreq; 00693 String phenotype; 00694 String genotype; 00695 int phenoCount; 00696 int alleleCount; 00697 00698 while (!ifeof(file)) 00699 { 00700 // Cycle through headers for each locus 00701 parser.SetInputFile(file); 00702 parser.SetFormat("(2A8,I2,I3)"); 00703 00704 // After retrieving locus name, check that we haven't tried to 00705 // read past the end-of-file 00706 parser.GetNextField(locusName); 00707 parser.GetNextField(locusType); 00708 alleleCount = parser.GetNextInteger(); 00709 phenoCount = parser.GetNextInteger(); 00710 00711 if (locusName.IsEmpty() && locusType.IsEmpty() && alleleCount == 0 && 00712 phenoCount == 0 && ifeof(file)) 00713 break; 00714 00715 // Only recognize autosomal and x-linked loci 00716 if (locusType.Compare("AUTOSOME") != 0 && locusType.Compare("X-LINKED")) 00717 error("Unrecognized locus type '%s' in Mendel data file\n\n" 00718 "Recognized locus types are \"AUTOSOME\" and \"X-LINKED\".", 00719 (const char *) locusType); 00720 00721 if (locusType.Compare("AUTOSOME") == 0 && chromosomeX) 00722 error("The data file indicates that locus %s is AUTOSOMAL, but\n" 00723 "X-LINKED loci were expected as input\n", 00724 (const char *) locusName); 00725 00726 if (locusType.Compare("X-LINKED") == 0 && !chromosomeX) 00727 error("The data file indicates that locus %s is X-LINKED, but\n" 00728 "AUTOSOMAL loci were expected as input\n", 00729 (const char *) locusName); 00730 00731 if (locusName.IsEmpty()) 00732 error("Blank locus name encountered in data file\n"); 00733 00734 if (phenoCount == 0) 00735 { 00736 // Co-dominant marker 00737 columns.Push(pcMarker); 00738 columnHash.Push(GetMarkerID(locusName)); 00739 columnCount++; 00740 00741 // Update marker info with allele labels and frequencies 00742 MarkerInfo * info = GetMarkerInfo(locusName); 00743 00744 info->alleleLabels.Clear(); 00745 info->alleleLabels.Push(""); 00746 info->freq.Clear(); 00747 00748 parser.SetFormat("(2A8)"); 00749 00750 // Mendel allows allele names to be specified with frequencies 00751 // left blank 00752 for (int i = 0; i < alleleCount; i++) 00753 { 00754 parser.GetNextField(alleleLabel); 00755 parser.GetNextField(alleleFreq); 00756 00757 if (alleleLabel.IsEmpty()) 00758 error("Locus %s is missing allele label for allele #%d\n", 00759 (const char *) locusName, i+1); 00760 00761 info->alleleLabels.Push(alleleLabel); 00762 00763 if (!alleleFreq.IsEmpty()) 00764 { 00765 if (info->freq.Length() == 0) 00766 info->freq.Push(0.0); 00767 00768 info->freq.Push(alleleFreq.AsDouble()); 00769 } 00770 } 00771 info->IndexAlleles(); 00772 00773 if (info->alleleLabels.Length() != info->freq.Length() && 00774 info->freq.Length() != 0) 00775 error("Locus %s is missing allele frequency information for %d alleles\n", 00776 (const char *) locusName, 00777 info->alleleLabels.Length() - info->freq.Length()); 00778 } 00779 else 00780 { 00781 // Non-codominant marker, which we decompose into multiple traits... 00782 parser.SetFormat("(2A8)"); 00783 00784 // First skip allele frequency information 00785 for (int i = 0; i < alleleCount; i++) 00786 { 00787 parser.GetNextField(alleleLabel); 00788 parser.GetNextField(alleleFreq); 00789 } 00790 00791 // Then read in each phenotype 00792 for (int i = 0; i < alleleCount; i++) 00793 { 00794 parser.SetFormat("(A8,I3)"); 00795 parser.GetNextField(phenotype); 00796 int genoCount = parser.GetNextInteger(); 00797 00798 parser.SetFormat("(A17)"); 00799 for (int j = 0; j < genoCount; j++) 00800 parser.GetNextField(genotype); 00801 00802 columns.Push(pcAffection); 00803 columnHash.Push(GetAffectionID(locusName + "->" + phenotype)); 00804 columnCount++; 00805 } 00806 } 00807 } 00808 00809 columns.Push(pcEnd); 00810 columnHash.Push(0); 00811 } 00812 00813 int PedigreeDescription::CountColumns(int type) 00814 { 00815 int count = 0; 00816 00817 for (int i = 0; i < columns.Length(); i++) 00818 if (columns[i] == type) 00819 count++; 00820 00821 return count; 00822 } 00823 00824 const char * PedigreeDescription::ColumnSummary(String & string) 00825 { 00826 string.Clear(); 00827 UpdateSummary(string, pcMarker, " markers [x2 cols]"); 00828 UpdateSummary(string, pcTrait, " traits"); 00829 UpdateSummary(string, pcAffection, " discrete traits"); 00830 UpdateSummary(string, pcCovariate, " covariates"); 00831 UpdateSummary(string, pcString, " strings"); 00832 UpdateSummary(string, pcZygosity, " zygosity"); 00833 UpdateSummary(string, pcSkip, " skipped"); 00834 return string; 00835 } 00836 00837 void PedigreeDescription::UpdateSummary(String & string, int type, const char * label) 00838 { 00839 int count = CountColumns(type); 00840 00841 if (count) 00842 { 00843 if (string.Length()) 00844 string += ", "; 00845 string += count; 00846 string += label; 00847 } 00848 } 00849 00850 00851 void PedigreeDescription::AddMarkerColumn(const char * markerName) 00852 { 00853 if (columns.Last() == pcEnd) 00854 { 00855 columns.Pop(); 00856 columnHash.Pop(); 00857 } 00858 00859 columnHash.Push(GetMarkerID(markerName)); 00860 columns.Push(pcMarker); 00861 columnCount++; 00862 } 00863 00864 void PedigreeDescription::AddCovariateColumn(const char * covariateName) 00865 { 00866 if (columns.Last() == pcEnd) 00867 { 00868 columns.Pop(); 00869 columnHash.Pop(); 00870 } 00871 00872 columnHash.Push(GetCovariateID(covariateName)); 00873 columns.Push(pcCovariate); 00874 columnCount++; 00875 } 00876 00877 void PedigreeDescription::AddTraitColumn(const char * traitName) 00878 { 00879 if (columns.Last() == pcEnd) 00880 { 00881 columns.Pop(); 00882 columnHash.Pop(); 00883 } 00884 00885 columnHash.Push(GetCovariateID(traitName)); 00886 columns.Push(pcTrait); 00887 columnCount++; 00888 } 00889 00890 void PedigreeDescription::AddAffectionColumn(const char * affectionName) 00891 { 00892 if (columns.Last() == pcEnd) 00893 { 00894 columns.Pop(); 00895 columnHash.Pop(); 00896 } 00897 00898 columnHash.Push(GetAffectionID(affectionName)); 00899 columns.Push(pcAffection); 00900 columnCount++; 00901 } 00902 00903 void PedigreeDescription::AddStringColumn(const char * stringName) 00904 { 00905 if (columns.Last() == pcEnd) 00906 { 00907 columns.Pop(); 00908 columnHash.Pop(); 00909 } 00910 00911 columnHash.Push(GetStringID(stringName)); 00912 columns.Push(pcString); 00913 columnCount++; 00914 } 00915 00916 void PedigreeDescription::AddZygosityColumn() 00917 { 00918 if (columns.Last() == pcEnd) 00919 { 00920 columns.Pop(); 00921 columnHash.Pop(); 00922 } 00923 00924 columnHash.Push(0); 00925 columns.Push(pcZygosity); 00926 columnCount++; 00927 } 00928 00929 void PedigreeDescription::AddSkippedColumn() 00930 { 00931 if (columns.Last() == pcEnd) 00932 { 00933 columns.Pop(); 00934 columnHash.Pop(); 00935 } 00936 00937 columnHash.Push(0); 00938 columns.Push(pcSkip); 00939 columnCount++; 00940 } 00941