libStatGen Software  1
PedigreeDescription.cpp
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "PedigreeDescription.h"
00019 #include "MapFunction.h"
00020 #include "MathVector.h"
00021 #include "Constant.h"
00022 #include "FortranFormat.h"
00023 #include "Error.h"
00024 
00025 #include <stdlib.h>
00026 #include <ctype.h>
00027 #include <string.h>
00028 #include <math.h>
00029 
00030 PedigreeDescription::PedigreeDescription()
00031 {
00032     columnCount = 0;
00033     mendelFormat = false;
00034 }
00035 
00036 PedigreeDescription::~PedigreeDescription()
00037 { };
00038 
00039 PedigreeDescription & PedigreeDescription::operator = (PedigreeDescription & rhs)
00040 {
00041     columnCount = rhs.columnCount;
00042 
00043     columns = rhs.columns;
00044     columnHash = rhs.columnHash;
00045 
00046     return *this;
00047 };
00048 
00049 void PedigreeDescription::Load(IFILE & input, bool warnIfLinkage)
00050 {
00051     // Check if we are dealing with a linkage format data file
00052     String      buffer;
00053     StringArray tokens;
00054 
00055     mendelFormat = false;
00056 
00057     ReadLineHelper(input, buffer, tokens);
00058     ifrewind(input);
00059 
00060     if (tokens.Length() == 4 && isdigit(tokens[0][0]))
00061     {
00062         if (warnIfLinkage) printf("Data file looks like a LINKAGE format file...\n\n");
00063         LoadLinkageDataFile(input);
00064         return;
00065     }
00066 
00067     if (buffer.Length() > 18
00068             && (buffer.SubStr(8,8).SlowCompare("AUTOSOME") == 0 ||
00069                 buffer.SubStr(8,8).SlowCompare("X-LINKED") == 0)
00070             && (isdigit(buffer[16])  || isdigit(buffer[17]))
00071             && (isdigit(buffer[18])  || isdigit(buffer[19]) ||
00072                 (buffer.Length() > 19 && isdigit(buffer[20]))))
00073     {
00074         printf("Data file looks like a MENDEL format file...\n"
00075                "   Activating EXPERIMENTAL support for this format\n\n");
00076         LoadMendelDataFile(input);
00077         return;
00078     }
00079 
00080     // Reset things
00081     ifrewind(input);
00082     int done = 0;
00083     int line = 0;
00084 
00085     columns.Clear();
00086     columnHash.Clear();
00087     columnCount = 0;
00088 
00089     while (!ifeof(input) && !done)
00090     {
00091         int   i;
00092 
00093         buffer.ReadLine(input);
00094         line++;
00095 
00096         tokens.Clear();
00097         tokens.AddTokens(buffer, WHITESPACE);
00098 
00099         if (tokens.Length() < 1) continue;
00100 
00101         if (tokens.Length() == 1)
00102             error("Problem reading data file:\n"
00103                   "Item #%d (of type %s) has no name.",
00104                   columnCount+1, (const char *) tokens[0]);
00105 
00106         switch (toupper(tokens[0][0]))
00107         {
00108             case 'A' :
00109                 columnHash.Push(GetAffectionID(tokens[1]));
00110                 columns.Push(pcAffection);
00111                 columnCount++;
00112                 break;
00113             case 'M' :
00114                 columnHash.Push(GetMarkerID(tokens[1]));
00115                 columns.Push(pcMarker);
00116                 columnCount++;
00117                 break;
00118             case 'T' :
00119                 columnHash.Push(GetTraitID(tokens[1]));
00120                 columns.Push(pcTrait);
00121                 columnCount++;
00122                 break;
00123             case 'C' :
00124                 columnHash.Push(GetCovariateID(tokens[1]));
00125                 columns.Push(pcCovariate);
00126                 columnCount++;
00127                 break;
00128             case '$' :
00129                 columnHash.Push(GetStringID(tokens[1]));
00130                 columns.Push(pcString);
00131                 columnCount++;
00132                 break;
00133             case 'S' :
00134                 i = (int) tokens[0].SubStr(1);
00135                 i = i > 0 ? i : 1;
00136                 while (i--)
00137                 {
00138                     columns.Push(pcSkip);
00139                     columnHash.Push(0);
00140                     columnCount++;
00141                 }
00142                 break;
00143             case 'Z' :
00144                 columnHash.Push(0);
00145                 columns.Push(pcZygosity);
00146                 columnCount++;
00147                 break;
00148             case 'V' :
00149                 GetMarkerID(tokens[1]);
00150                 break;
00151             case 'E' :
00152                 done = 1;
00153                 break;
00154             case 'U' :
00155                 if (toupper(tokens[0][1]) == 'T' && toupper(tokens[0][2]) == 'C')
00156                 {
00157                     int c = GetCovariateID(tokens[1]);
00158                     int t = GetTraitID(tokens[1]);
00159 
00160                     if (c >= 32767 || t >= 32767)
00161                         error("Internal error processing data file\n");
00162 
00163                     columnHash.Push(t * 32768 + c);
00164                     columns.Push(pcUndocumentedTraitCovariate);
00165                     columnCount++;
00166                     break;
00167                 }
00168             default :
00169                 error("Problem in data file (line %d):\n%s\n",
00170                       line, (const char *) buffer);
00171         }
00172     }
00173 
00174     columns.Push(pcEnd);
00175     columnHash.Push(0);
00176 };
00177 
00178 void PedigreeDescription::Load(const char * iFilename, bool warnIfLinkage)
00179 {
00180     IFILE f = ifopen(iFilename, "rb");
00181 
00182     if (f == NULL)
00183         error(
00184             "The datafile %s cannot be opened\n\n"
00185             "Common causes for this problem are:\n"
00186             "  * You might not have used the correct options to specify input file names,\n"
00187             "    please check the program documentation for information on how to do this\n\n"
00188             "  * The file doesn't exist or the filename might have been misspelt\n\n"
00189             "  * The file exists but it is being used by another program which you will need\n"
00190             "    to close before continuing\n\n"
00191             "  * The file is larger than 2GB and you haven't compiled this application with\n"
00192             "    large file support.\n\n",
00193             iFilename);
00194 
00195     Load(f, warnIfLinkage);
00196     ifclose(f);
00197 
00198     filename = iFilename;
00199 };
00200 
00201 void PedigreeDescription::LoadMap(const char * iFilename)
00202 {
00203     IFILE f = ifopen(iFilename, "rb");
00204 
00205     if (f == NULL)
00206         error(
00207             "The mapfile %s cannot be opened\n\n"
00208             "Please check that the file exists and is not being used by another program\n"
00209             "To find out how to set input filenames, check the documentation\n",
00210             iFilename);
00211 
00212     LoadMap(f);
00213     ifclose(f);
00214 };
00215 
00216 void PedigreeDescription::LoadMap(IFILE & input)
00217 {
00218     columns.Clear();
00219     columnHash.Clear();
00220     columnCount = 0;
00221 
00222     int         lastposition = 0;
00223     String      buffer;
00224     StringArray tokens;
00225 
00226     buffer.ReadLine(input);
00227     tokens.AddTokens(buffer, WHITESPACE);
00228 
00229     while (tokens.Length() == 0 && !ifeof(input))
00230     {
00231         buffer.ReadLine(input);
00232         tokens.AddTokens(buffer, WHITESPACE);
00233     }
00234 
00235     if (tokens.Length() != 3)
00236         error("Error reading map file header, which has %d columns.\n"
00237               "Three columns were expected, corresponding to\n"
00238               "MARKER_ID, MARKER_NAME and BASE_PAIR_POSITION\n"
00239               "The offending header is transcribed below:\n\n"
00240               "%s", tokens.Length(), (const char *) buffer);
00241     else
00242         printf("Map file column labels\n"
00243                "  -- COLUMN 1, Expecting MARKER_ID, Read %s\n"
00244                "  -- COLUMN 2, Expecting MARKER_NAME, Read %s\n"
00245                "  -- COLUMN 3, Expection BASE_PAIR_POSITION, Read %s\n\n",
00246                (const char *)(tokens[0]), (const char *)(tokens[1]),
00247                (const char *)(tokens[2]));
00248 
00249     int line = 1;
00250     while (!ifeof(input))
00251     {
00252         int    serial;
00253         long   position;
00254 
00255         buffer.ReadLine(input);
00256         line++;
00257 
00258         tokens.Clear();
00259         tokens.AddTokens(buffer, WHITESPACE);
00260 
00261         if (tokens.Length() < 1) continue;
00262         if (tokens.Length() != 3)
00263             error("Each line in the map file should have 3 tokens, corresponding\n"
00264                   "to MARKER_ID, MARKER_NAME and BASE_PAIR_POSITION respectively\n"
00265                   "However, there are %d tokens in line %d, transcribed below:\n\n"
00266                   "%s", tokens.Length(), line, (const char *) buffer);
00267 
00268         serial = (int) tokens[0];
00269         if (serial != columnCount + 1)
00270             error("Reading Marker Index from Map File...\n"
00271                   "Markers should be indexed consecutively starting at 1\n"
00272                   "Marker %d does not fit this pattern\n", columnCount + 1);
00273 
00274         position = (int) tokens[2];
00275         if (position < lastposition)
00276             error("Reading Marker Position from Map File...\n"
00277                   "Marker position should be in base-pairs\n"
00278                   "and markers should be in map order\n");
00279 
00280         // TODO -- store marker locations somewhere!
00281         lastposition = position;
00282 
00283         columnHash.Push(GetMarkerID(tokens[1]));
00284         columns.Push(pcMarker);
00285         columnCount++;
00286 
00287         GetMarkerInfo(tokens[1])->position = position * 1e-8;
00288     }
00289 
00290     columns.Push(pcEnd);
00291     columnHash.Push(0);
00292 };
00293 
00294 int PedigreeDescription::CountTextColumns()
00295 {
00296     int count = 0;
00297 
00298     for (int i = 0; i < columnCount; i++, count++)
00299         if (columns[i] == pcMarker)
00300             count++;
00301 
00302     return count;
00303 }
00304 
00305 void PedigreeDescription::LoadLinkageDataFile(const char * iFilename)
00306 {
00307     IFILE f = ifopen(iFilename, "rb");
00308 
00309     if (f == NULL)
00310         error(
00311             "The linkage format datafile %s cannot be opened\n\n"
00312             "Please check that the file exists and is not being used by another program\n"
00313             "To find out how to set input filenames, check the documentation\n",
00314             iFilename);
00315 
00316     LoadLinkageDataFile(f);
00317     ifclose(f);
00318 
00319     filename = iFilename;
00320 };
00321 
00322 void PedigreeDescription::LoadLinkageDataFile(IFILE & input)
00323 {
00324     columns.Clear();
00325     columnHash.Clear();
00326     columnCount = 0;
00327 
00328     String      buffer, label;
00329     StringArray tokens;
00330 
00331     ReadLineHelper(input, buffer, tokens);
00332 
00333     if (tokens.Length() != 4 || tokens[2].AsInteger() != (int) chromosomeX ||
00334             tokens[0].AsInteger() < 0)
00335         error("Cannot handle first line of data file\n\n"
00336               "Expecting four (4) numeric values, which correspond to:\n"
00337               "   num-loci   -- number of loci in the pedigree\n"
00338               "                 this value must be positive\n"
00339               "   risk-locus -- locus for which risks should be calculated\n"
00340               "                 this value will be ignored\n"
00341               "   sex-link   -- are the loci sex linked [0 - No, 1 - Yes]\n"
00342               "                 %s\n"
00343               "   program    -- which LINKAGE program do you want to use?\n"
00344               "                 this value will also be ignored\n\n"
00345               "The actual input read:\n%s\n",
00346               chromosomeX ? "expecting X-linked data, so this value must be ONE (1)"
00347               : "expecting autosomal data, so this must be ZERO (0)",
00348               (const char *) buffer);
00349 
00350     int numloci = tokens[0];
00351 
00352     ReadLineHelper(input, buffer, tokens);
00353 
00354     if (tokens.Length() != 4 ||
00355             tokens[0].AsInteger() != 0 ||
00356             tokens[3].AsInteger() != 0)
00357         error("Cannot handle second line of data file\n\n"
00358               "Expecting four (4) numeric values, which correspond to:\n"
00359               "   mutation-model         -- must be zero, corresponding to no mutation\n"
00360               "   male-mutation-rate     -- ignored\n"
00361               "   female-mutation-rate   -- ignored\n"
00362               "   linkage-disequilibrium -- must be zero, may be used in the future to\n"
00363               "                             read haplotype frequencies\n\n"
00364               "The actual input read:\n%s\n", (const char *) buffer);
00365 
00366     StringArray markerOrder;
00367     int         unknown = 0;
00368 
00369     ReadLineHelper(input, buffer, markerOrder);
00370 
00371     if (markerOrder.Length() > numloci)
00372         error("The third line of the data file lists marker order\n\n"
00373               "Although %d loci are defined [in the first line],\n"
00374               "this line includes %d values:\n%s\n",
00375               numloci, markerOrder.Length(), (const char *) buffer);
00376 
00377     IntArray    locus;
00378     bool need_blank_line = false;
00379 
00380     while (!ifeof(input) && numloci--)
00381     {
00382         if (ReadLineHelper(input, buffer, tokens) == 0)
00383             error("Linkage data file ends unexpectedly");
00384 
00385         if (tokens.Length() < 2)
00386             error("Incomplete locus information in data file\n"
00387                   "Information for each locus should include 2 or more fiels\n"
00388                   "The expected fields are:\n"
00389                   "   field_type  -- indicator of locus type (trait, marker,...)\n"
00390                   "   alleles     -- number of alleles\n"
00391                   "   name        -- locus name, preceded by hash (#) sign\n\n"
00392                   "The actual input read:\n%s\n", (const char *) buffer);
00393 
00394         int locus_type = (int) tokens[0];
00395         int alleles    = (int) tokens[1];
00396 
00397         String locus_name("LOCUS");
00398         locus_name += ++unknown;
00399 
00400         if (tokens.Length() > 2 && tokens[2][0] == '#')
00401         {
00402             if (tokens[2][1] != 0)
00403                 locus_name = tokens[2].SubStr(1);
00404             else if (tokens.Length() > 3)
00405                 locus_name = tokens[3];
00406         }
00407 
00408         if ((locus_type == 4 && alleles == 0) ||
00409                 (locus_type == 4 && alleles == 1))
00410         {
00411             columnHash.Push(GetCovariateID(locus_name));
00412             columns.Push(pcCovariate);
00413             columnCount++;
00414             continue;
00415         }
00416 
00417         if (locus_type == 0 && alleles == 0)
00418         {
00419             columnHash.Push(GetTraitID(locus_name));
00420             columns.Push(pcTrait);
00421             columnCount++;
00422             continue;
00423         }
00424 
00425         if (ReadLineHelper(input, buffer, tokens) != alleles)
00426             error("Expecting %d allele frequencies, but input has %d columns:\n"
00427                   "%s\n", alleles, tokens.Length(), (const char *) buffer);
00428 
00429         Vector frequencies(alleles + 1);
00430 
00431         frequencies[0] = 0.0;
00432         for (int i = 1; i <= alleles; i++)
00433             frequencies[i] = (double) tokens[i - 1];
00434 
00435         double sum = frequencies.Sum();
00436 
00437         if (sum <= 0.0)
00438             error("Allele frequencies at %s sum to %f, which doesn't make sense\n",
00439                   (const char *) locus_name, sum);
00440 
00441         if (fabs(sum - 1.0) > 1.2e-5)
00442         {
00443             printf("Allele frequencies at %s sum to %f, adjusted to 1.0\n",
00444                    (const char *) locus_name, sum);
00445             need_blank_line = true;
00446         }
00447 
00448         if (sum != 1.0)
00449             frequencies *= 1.0 / sum;
00450 
00451         switch (locus_type)
00452         {
00453             case 1 :
00454             {
00455                 // Affection
00456                 columnHash.Push(GetAffectionID(locus_name));
00457                 columns.Push(pcAffection);
00458                 columnCount++;
00459 
00460                 // Read number of liability classes
00461                 if (ReadLineHelper(input, buffer, tokens) == 0)
00462                     error("Linkage data file ends unexpectedly\n");
00463 
00464                 // Skip liability class data
00465                 int classes = tokens[0];
00466                 if (classes > 1)
00467                 {
00468                     columnHash.Push(0);
00469                     columns.Push(pcSkip);
00470                     columnCount++;
00471                 }
00472 
00473                 // Separate liability class rows for males and females for X-linked data
00474                 if (chromosomeX) classes *= 2;
00475 
00476                 while (classes--)
00477                     if (ReadLineHelper(input, buffer, tokens) == 0)
00478                         error("Linkage data file ends unexpectedly\n");
00479 
00480                 // Ignore map location for quantitative variables
00481                 locus.Push(-1);
00482             }
00483             break;
00484             case 3 :
00485             {
00486                 columnHash.Push(GetMarkerID(locus_name));
00487                 columns.Push(pcMarker);
00488                 columnCount++;
00489 
00490                 // Store allele frequencies
00491                 MarkerInfo * info = GetMarkerInfo(locus_name);
00492 
00493                 info->freq = frequencies;
00494 
00495                 // Initialize allele labels
00496                 info->alleleLabels.Clear();
00497                 for (int i = 0; i < frequencies.Length(); i++)
00498                     info->alleleLabels.Push(label = i);
00499                 info->IndexAlleles();
00500 
00501                 // Store marker id, so that we can track map location
00502                 locus.Push(GetMarkerID(locus_name));
00503             }
00504             break;
00505             case 0 :
00506             {
00507                 // Read number of quantitative variables
00508                 if (ReadLineHelper(input, buffer, tokens) == 0)
00509                     error("Linkage data file ends unexpectedly\n");
00510 
00511                 // Add each quantitative variable to pedigree
00512                 // Discard information on means
00513                 for (int vars = tokens[0], i = 0; i < vars; i++)
00514                 {
00515                     if (ReadLineHelper(input, buffer, tokens) == 0)
00516                         error("Linkage data file ends unexpectedly\n");
00517 
00518                     String trait_name(locus_name);
00519 
00520                     if (i)
00521                     {
00522                         trait_name += ".";
00523                         trait_name += i + 1;
00524                     }
00525 
00526                     columnHash.Push(GetTraitID(trait_name));
00527                     columns.Push(pcTrait);
00528                     columnCount++;
00529                 }
00530 
00531                 // Skip var-covar matrix
00532                 if (ReadLineHelper(input, buffer, tokens) == 0)
00533                     error("Linkage data file ends unexpectedly\n");
00534 
00535                 // Skip heterozygote scaling factor for var-covar matrix
00536                 if (ReadLineHelper(input, buffer, tokens) == 0)
00537                     error("Linkage data file ends unexpectedly\n");
00538 
00539                 // Ignore map location for quantitative variables
00540                 locus.Push(-1);
00541             }
00542             break;
00543             case 2 :
00544                 error("The data file includes binary factors\n"
00545                       "Regretably, loci of this type are not supported\n\n");
00546                 break;
00547             default :
00548                 error("Unsupported locus type [%d] in data file", locus_type);
00549                 break;
00550         }
00551     }
00552 
00553     if (need_blank_line) printf("\n");
00554 
00555     columns.Push(pcEnd);
00556     columnHash.Push(0);
00557 
00558     ReadLineHelper(input, buffer, tokens);
00559     int sexDifference = tokens.Length() ? tokens[0].AsInteger() : -1;
00560     if (tokens.Length() != 2 ||
00561             (sexDifference != 0 && sexDifference != 2) ||
00562             tokens[1].AsInteger() != 0)
00563         error("Error retrieving recombination information\n\n"
00564               "Expecting two (2) numeric values, which correspond to:\n"
00565               "   sex-difference   -- must be zero (no difference) or two (sex specific recombination)\n"
00566               "   map-function     -- must be zero, that is, no interference\n"
00567               "The actual input read:\n%s\n", (const char *) buffer);
00568 
00569     Vector distances[2];
00570     bool distance_in_centimorgans = false;
00571 
00572     for (int r = 0; r <= sexDifference; r += 2)
00573     {
00574         ReadLineHelper(input, buffer, tokens);
00575         if (tokens.Length() != markerOrder.Length() - 1)
00576             error("Error retrieving recombination information\n\n"
00577                   "Expecting %d recombination fractions (current map includes %d loci)\n"
00578                   "Instead the following line was input:\n%s\n",
00579                   markerOrder.Length() - 1, markerOrder.Length(), (const char *) buffer);
00580 
00581         distances[r >> 1].Dimension(tokens.Length());
00582         for (int i = 0; i < tokens.Length(); i++)
00583             distances[r >> 1][i] = (double) tokens[i];
00584 
00585         if (distances[r >> 1].Min() < 0.0)
00586             error("Linkage datafile specifies negative recombination fractions");
00587 
00588         bool centimorgans = distances[r >> 1].Max() > 0.5;
00589 
00590         if (centimorgans && !distance_in_centimorgans)
00591             printf("  Some recombination fractions in datafile are greater than 0.5,\n"
00592                    "  so recombination fractions will be interpreted as cM distances\n\n");
00593 
00594         distance_in_centimorgans |= centimorgans;
00595     }
00596 
00597     double position = 0.0, positionMale = 0.0;
00598 
00599     for (int i = 0, moving = false; i < markerOrder.Length(); i++)
00600     {
00601         int m = markerOrder[i].AsInteger() - 1;
00602 
00603         if (m < 0 || m >= locus.Length())
00604             error("The marker order in the linkage datafile is invalid\n");
00605 
00606         m = locus[m];
00607 
00608         if (m != -1)
00609         {
00610             MarkerInfo * info = GetMarkerInfo(m);
00611             info->chromosome = chromosomeX ? 9999 : 0;
00612 
00613             if (sexDifference == 2)
00614                 info->position = (position + positionMale) * 0.5,
00615                                  info->positionFemale = position,
00616                                                         info->positionMale = positionMale;
00617             else
00618                 info->position = info->positionMale = info->positionFemale = position;
00619 
00620             moving = true;
00621         }
00622 
00623         if (i < markerOrder.Length() - 1 && moving)
00624             position += distance_in_centimorgans ?
00625                         0.01 * distances[0][i] : RecombinationToDistance(distances[0][i]);
00626 
00627         if (sexDifference == 2 && i < markerOrder.Length() - 1 && moving)
00628             positionMale += distance_in_centimorgans ?
00629                             0.01 * distances[1][i] : RecombinationToDistance(distances[1][i]);
00630     }
00631 }
00632 
00633 int PedigreeDescription::ReadLineHelper(IFILE & input,
00634                                         String & buffer,
00635                                         StringArray & tokens)
00636 {
00637     do
00638     {
00639         // Read Line
00640         buffer.ReadLine(input);
00641         buffer.Trim();
00642 
00643         // Strip comments marked with >>
00644         int pos = buffer.FastFind(">>");
00645         if (pos == -1) pos = buffer.FastFind("<<");
00646         if (pos == -1) pos = buffer.Length() + 1;
00647         if (buffer[0] == '#') pos = 0;
00648 
00649         // Find space/tab delimited tokens
00650         tokens.Clear();
00651         tokens.AddTokens(buffer.Left(pos - 1), WHITESPACE);
00652 
00653     }
00654     while (tokens.Length() == 0 && !ifeof(input));
00655 
00656     return tokens.Length();
00657 }
00658 
00659 void PedigreeDescription::LoadMendelDataFile(const char * iFilename)
00660 {
00661     IFILE f = ifopen(iFilename, "rb");
00662 
00663     if (f == NULL)
00664         error(
00665             "The MENDEL format datafile %s cannot be opened\n\n"
00666             "Please check that the file exists and is not being used by another program\n"
00667             "To find out how to set input filenames, check the documentation\n",
00668             iFilename);
00669 
00670     LoadMendelDataFile(f);
00671     ifclose(f);
00672 };
00673 
00674 void PedigreeDescription::LoadMendelDataFile(IFILE & file)
00675 {
00676     // Processes mendel format file
00677     mendelFormat = true;
00678 
00679     // Codominant markers are mapped to markers
00680     // Non-codominant markers are mapped into multiple "affection status"
00681     // (Y/N) variables
00682     columns.Clear();
00683     columnHash.Clear();
00684     columnCount = 0;
00685 
00686     FortranFormat parser;
00687 
00688     // Variables for storing parsed input
00689     String locusName;
00690     String locusType;
00691     String alleleLabel;
00692     String alleleFreq;
00693     String phenotype;
00694     String genotype;
00695     int phenoCount;
00696     int alleleCount;
00697 
00698     while (!ifeof(file))
00699     {
00700         // Cycle through headers for each locus
00701         parser.SetInputFile(file);
00702         parser.SetFormat("(2A8,I2,I3)");
00703 
00704         // After retrieving locus name, check that we haven't tried to
00705         // read past the end-of-file
00706         parser.GetNextField(locusName);
00707         parser.GetNextField(locusType);
00708         alleleCount = parser.GetNextInteger();
00709         phenoCount = parser.GetNextInteger();
00710 
00711         if (locusName.IsEmpty() && locusType.IsEmpty() && alleleCount == 0 &&
00712                 phenoCount == 0 && ifeof(file))
00713             break;
00714 
00715         // Only recognize autosomal and x-linked loci
00716         if (locusType.Compare("AUTOSOME") != 0 && locusType.Compare("X-LINKED"))
00717             error("Unrecognized locus type '%s' in Mendel data file\n\n"
00718                   "Recognized locus types are \"AUTOSOME\" and \"X-LINKED\".",
00719                   (const char *) locusType);
00720 
00721         if (locusType.Compare("AUTOSOME") == 0 && chromosomeX)
00722             error("The data file indicates that locus %s is AUTOSOMAL, but\n"
00723                   "X-LINKED loci were expected as input\n",
00724                   (const char *) locusName);
00725 
00726         if (locusType.Compare("X-LINKED") == 0 && !chromosomeX)
00727             error("The data file indicates that locus %s is X-LINKED, but\n"
00728                   "AUTOSOMAL loci were expected as input\n",
00729                   (const char *) locusName);
00730 
00731         if (locusName.IsEmpty())
00732             error("Blank locus name encountered in data file\n");
00733 
00734         if (phenoCount == 0)
00735         {
00736             // Co-dominant marker
00737             columns.Push(pcMarker);
00738             columnHash.Push(GetMarkerID(locusName));
00739             columnCount++;
00740 
00741             // Update marker info with allele labels and frequencies
00742             MarkerInfo * info = GetMarkerInfo(locusName);
00743 
00744             info->alleleLabels.Clear();
00745             info->alleleLabels.Push("");
00746             info->freq.Clear();
00747 
00748             parser.SetFormat("(2A8)");
00749 
00750             // Mendel allows allele names to be specified with frequencies
00751             // left blank
00752             for (int i = 0; i < alleleCount; i++)
00753             {
00754                 parser.GetNextField(alleleLabel);
00755                 parser.GetNextField(alleleFreq);
00756 
00757                 if (alleleLabel.IsEmpty())
00758                     error("Locus %s is missing allele label for allele #%d\n",
00759                           (const char *) locusName, i+1);
00760 
00761                 info->alleleLabels.Push(alleleLabel);
00762 
00763                 if (!alleleFreq.IsEmpty())
00764                 {
00765                     if (info->freq.Length() == 0)
00766                         info->freq.Push(0.0);
00767 
00768                     info->freq.Push(alleleFreq.AsDouble());
00769                 }
00770             }
00771             info->IndexAlleles();
00772 
00773             if (info->alleleLabels.Length() != info->freq.Length() &&
00774                     info->freq.Length() != 0)
00775                 error("Locus %s is missing allele frequency information for %d alleles\n",
00776                       (const char *) locusName,
00777                       info->alleleLabels.Length() - info->freq.Length());
00778         }
00779         else
00780         {
00781             // Non-codominant marker, which we decompose into multiple traits...
00782             parser.SetFormat("(2A8)");
00783 
00784             // First skip allele frequency information
00785             for (int i = 0; i < alleleCount; i++)
00786             {
00787                 parser.GetNextField(alleleLabel);
00788                 parser.GetNextField(alleleFreq);
00789             }
00790 
00791             // Then read in each phenotype
00792             for (int i = 0; i < alleleCount; i++)
00793             {
00794                 parser.SetFormat("(A8,I3)");
00795                 parser.GetNextField(phenotype);
00796                 int genoCount = parser.GetNextInteger();
00797 
00798                 parser.SetFormat("(A17)");
00799                 for (int j = 0; j < genoCount; j++)
00800                     parser.GetNextField(genotype);
00801 
00802                 columns.Push(pcAffection);
00803                 columnHash.Push(GetAffectionID(locusName + "->" + phenotype));
00804                 columnCount++;
00805             }
00806         }
00807     }
00808 
00809     columns.Push(pcEnd);
00810     columnHash.Push(0);
00811 }
00812 
00813 int PedigreeDescription::CountColumns(int type)
00814 {
00815     int count = 0;
00816 
00817     for (int i = 0; i < columns.Length(); i++)
00818         if (columns[i] == type)
00819             count++;
00820 
00821     return count;
00822 }
00823 
00824 const char * PedigreeDescription::ColumnSummary(String & string)
00825 {
00826     string.Clear();
00827     UpdateSummary(string, pcMarker, " markers [x2 cols]");
00828     UpdateSummary(string, pcTrait, " traits");
00829     UpdateSummary(string, pcAffection, " discrete traits");
00830     UpdateSummary(string, pcCovariate, " covariates");
00831     UpdateSummary(string, pcString, " strings");
00832     UpdateSummary(string, pcZygosity, " zygosity");
00833     UpdateSummary(string, pcSkip, " skipped");
00834     return string;
00835 }
00836 
00837 void PedigreeDescription::UpdateSummary(String & string, int type, const char * label)
00838 {
00839     int count = CountColumns(type);
00840 
00841     if (count)
00842     {
00843         if (string.Length())
00844             string += ", ";
00845         string += count;
00846         string += label;
00847     }
00848 }
00849 
00850 
00851 void PedigreeDescription::AddMarkerColumn(const char * markerName)
00852 {
00853     if (columns.Last() == pcEnd)
00854     {
00855         columns.Pop();
00856         columnHash.Pop();
00857     }
00858 
00859     columnHash.Push(GetMarkerID(markerName));
00860     columns.Push(pcMarker);
00861     columnCount++;
00862 }
00863 
00864 void PedigreeDescription::AddCovariateColumn(const char * covariateName)
00865 {
00866     if (columns.Last() == pcEnd)
00867     {
00868         columns.Pop();
00869         columnHash.Pop();
00870     }
00871 
00872     columnHash.Push(GetCovariateID(covariateName));
00873     columns.Push(pcCovariate);
00874     columnCount++;
00875 }
00876 
00877 void PedigreeDescription::AddTraitColumn(const char * traitName)
00878 {
00879     if (columns.Last() == pcEnd)
00880     {
00881         columns.Pop();
00882         columnHash.Pop();
00883     }
00884 
00885     columnHash.Push(GetCovariateID(traitName));
00886     columns.Push(pcTrait);
00887     columnCount++;
00888 }
00889 
00890 void PedigreeDescription::AddAffectionColumn(const char * affectionName)
00891 {
00892     if (columns.Last() == pcEnd)
00893     {
00894         columns.Pop();
00895         columnHash.Pop();
00896     }
00897 
00898     columnHash.Push(GetAffectionID(affectionName));
00899     columns.Push(pcAffection);
00900     columnCount++;
00901 }
00902 
00903 void PedigreeDescription::AddStringColumn(const char * stringName)
00904 {
00905     if (columns.Last() == pcEnd)
00906     {
00907         columns.Pop();
00908         columnHash.Pop();
00909     }
00910 
00911     columnHash.Push(GetStringID(stringName));
00912     columns.Push(pcString);
00913     columnCount++;
00914 }
00915 
00916 void PedigreeDescription::AddZygosityColumn()
00917 {
00918     if (columns.Last() == pcEnd)
00919     {
00920         columns.Pop();
00921         columnHash.Pop();
00922     }
00923 
00924     columnHash.Push(0);
00925     columns.Push(pcZygosity);
00926     columnCount++;
00927 }
00928 
00929 void PedigreeDescription::AddSkippedColumn()
00930 {
00931     if (columns.Last() == pcEnd)
00932     {
00933         columns.Pop();
00934         columnHash.Pop();
00935     }
00936 
00937     columnHash.Push(0);
00938     columns.Push(pcSkip);
00939     columnCount++;
00940 }
00941 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends