PedigreeDescription.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "PedigreeDescription.h"
00019 #include "MapFunction.h"
00020 #include "MathVector.h"
00021 #include "Constant.h"
00022 #include "FortranFormat.h"
00023 #include "Error.h"
00024 
00025 #include <stdlib.h>
00026 #include <ctype.h>
00027 #include <string.h>
00028 #include <math.h>
00029 
00030 PedigreeDescription::PedigreeDescription()
00031 {
00032     columnCount = 0;
00033     mendelFormat = false;
00034 }
00035 
00036 PedigreeDescription::~PedigreeDescription()
00037 { };
00038 
00039 PedigreeDescription & PedigreeDescription::operator = (PedigreeDescription & rhs)
00040 {
00041     columnCount = rhs.columnCount;
00042 
00043     columns = rhs.columns;
00044     columnHash = rhs.columnHash;
00045 
00046     return *this;
00047 };
00048 
00049 void PedigreeDescription::Load(IFILE & input, bool warnIfLinkage)
00050 {
00051     // Check if we are dealing with a linkage format data file
00052     String      buffer;
00053     StringArray tokens;
00054 
00055     mendelFormat = false;
00056 
00057     ReadLineHelper(input, buffer, tokens);
00058     ifrewind(input);
00059 
00060     if (tokens.Length() == 4 && isdigit(tokens[0][0]))
00061     {
00062         if (warnIfLinkage) printf("Data file looks like a LINKAGE format file...\n\n");
00063         LoadLinkageDataFile(input);
00064         return;
00065     }
00066 
00067     if (buffer.Length() > 18
00068             && (buffer.SubStr(8,8).SlowCompare("AUTOSOME") == 0 ||
00069                 buffer.SubStr(8,8).SlowCompare("X-LINKED") == 0)
00070             && (isdigit(buffer[16])  || isdigit(buffer[17]))
00071             && (isdigit(buffer[18])  || isdigit(buffer[19]) ||
00072                 (buffer.Length() > 19 && isdigit(buffer[20]))))
00073     {
00074         printf("Data file looks like a MENDEL format file...\n"
00075                "   Activating EXPERIMENTAL support for this format\n\n");
00076         LoadMendelDataFile(input);
00077         return;
00078     }
00079 
00080     // Reset things
00081     ifrewind(input);
00082     int done = 0;
00083     int line = 0;
00084 
00085     columns.Clear();
00086     columnHash.Clear();
00087     columnCount = 0;
00088 
00089     while (!ifeof(input) && !done)
00090     {
00091         int   i;
00092 
00093         buffer.ReadLine(input);
00094         line++;
00095 
00096         tokens.Clear();
00097         tokens.AddTokens(buffer, WHITESPACE);
00098 
00099         if (tokens.Length() < 1) continue;
00100 
00101         if (tokens.Length() == 1)
00102             error("Problem reading data file:\n"
00103                   "Item #%d (of type %s) has no name.",
00104                   columnCount+1, (const char *) tokens[0]);
00105 
00106         switch (toupper(tokens[0][0]))
00107         {
00108             case 'A' :
00109                 columnHash.Push(GetAffectionID(tokens[1]));
00110                 columns.Push(pcAffection);
00111                 columnCount++;
00112                 break;
00113             case 'M' :
00114                 columnHash.Push(GetMarkerID(tokens[1]));
00115                 columns.Push(pcMarker);
00116                 columnCount++;
00117                 break;
00118             case 'T' :
00119                 columnHash.Push(GetTraitID(tokens[1]));
00120                 columns.Push(pcTrait);
00121                 columnCount++;
00122                 break;
00123             case 'C' :
00124                 columnHash.Push(GetCovariateID(tokens[1]));
00125                 columns.Push(pcCovariate);
00126                 columnCount++;
00127                 break;
00128             case 'S' :
00129                 i = (int) tokens[0].SubStr(1);
00130                 i = i > 0 ? i : 1;
00131                 while (i--)
00132                 {
00133                     columns.Push(pcSkip);
00134                     columnHash.Push(0);
00135                     columnCount++;
00136                 }
00137                 break;
00138             case 'Z' :
00139                 columnHash.Push(0);
00140                 columns.Push(pcZygosity);
00141                 columnCount++;
00142                 break;
00143             case 'V' :
00144                 GetMarkerID(tokens[1]);
00145                 break;
00146             case 'E' :
00147                 done = 1;
00148                 break;
00149             case 'U' :
00150                 if (toupper(tokens[0][1]) == 'T' && toupper(tokens[0][2]) == 'C')
00151                 {
00152                     int c = GetCovariateID(tokens[1]);
00153                     int t = GetTraitID(tokens[1]);
00154 
00155                     if (c >= 32767 || t >= 32767)
00156                         error("Internal error processing data file\n");
00157 
00158                     columnHash.Push(t * 32768 + c);
00159                     columns.Push(pcUndocumentedTraitCovariate);
00160                     columnCount++;
00161                     break;
00162                 }
00163             default :
00164                 error("Problem in data file (line %d):\n%s\n",
00165                       line, (const char *) buffer);
00166         }
00167     }
00168 
00169     columns.Push(pcEnd);
00170     columnHash.Push(0);
00171 };
00172 
00173 void PedigreeDescription::Load(const char * iFilename, bool warnIfLinkage)
00174 {
00175     IFILE f = ifopen(iFilename, "rb");
00176 
00177     if (f == NULL)
00178         error(
00179             "The datafile %s cannot be opened\n\n"
00180             "Common causes for this problem are:\n"
00181             "  * You might not have used the correct options to specify input file names,\n"
00182             "    please check the program documentation for information on how to do this\n\n"
00183             "  * The file doesn't exist or the filename might have been misspelt\n\n"
00184             "  * The file exists but it is being used by another program which you will need\n"
00185             "    to close before continuing\n\n"
00186             "  * The file is larger than 2GB and you haven't compiled this application with\n"
00187             "    large file support.\n\n",
00188             iFilename);
00189 
00190     Load(f, warnIfLinkage);
00191     ifclose(f);
00192 
00193     filename = iFilename;
00194 };
00195 
00196 void PedigreeDescription::LoadMap(const char * iFilename)
00197 {
00198     IFILE f = ifopen(iFilename, "rb");
00199 
00200     if (f == NULL)
00201         error(
00202             "The mapfile %s cannot be opened\n\n"
00203             "Please check that the file exists and is not being used by another program\n"
00204             "To find out how to set input filenames, check the documentation\n",
00205             iFilename);
00206 
00207     LoadMap(f);
00208     ifclose(f);
00209 };
00210 
00211 void PedigreeDescription::LoadMap(IFILE & input)
00212 {
00213     columns.Clear();
00214     columnHash.Clear();
00215     columnCount = 0;
00216 
00217     int         lastposition = 0;
00218     String      buffer;
00219     StringArray tokens;
00220 
00221     buffer.ReadLine(input);
00222     tokens.AddTokens(buffer, WHITESPACE);
00223 
00224     while (tokens.Length() == 0 && !ifeof(input))
00225     {
00226         buffer.ReadLine(input);
00227         tokens.AddTokens(buffer, WHITESPACE);
00228     }
00229 
00230     if (tokens.Length() != 3)
00231         error("Error reading map file header, which has %d columns.\n"
00232               "Three columns were expected, corresponding to\n"
00233               "MARKER_ID, MARKER_NAME and BASE_PAIR_POSITION\n"
00234               "The offending header is transcribed below:\n\n"
00235               "%s", tokens.Length(), (const char *) buffer);
00236     else
00237         printf("Map file column labels\n"
00238                "  -- COLUMN 1, Expecting MARKER_ID, Read %s\n"
00239                "  -- COLUMN 2, Expecting MARKER_NAME, Read %s\n"
00240                "  -- COLUMN 3, Expection BASE_PAIR_POSITION, Read %s\n\n",
00241                (const char *)(tokens[0]), (const char *)(tokens[1]),
00242                (const char *)(tokens[2]));
00243 
00244     int line = 1;
00245     while (!ifeof(input))
00246     {
00247         int    serial;
00248         long   position;
00249 
00250         buffer.ReadLine(input);
00251         line++;
00252 
00253         tokens.Clear();
00254         tokens.AddTokens(buffer, WHITESPACE);
00255 
00256         if (tokens.Length() < 1) continue;
00257         if (tokens.Length() != 3)
00258             error("Each line in the map file should have 3 tokens, corresponding\n"
00259                   "to MARKER_ID, MARKER_NAME and BASE_PAIR_POSITION respectively\n"
00260                   "However, there are %d tokens in line %d, transcribed below:\n\n"
00261                   "%s", tokens.Length(), line, (const char *) buffer);
00262 
00263         serial = (int) tokens[0];
00264         if (serial != columnCount + 1)
00265             error("Reading Marker Index from Map File...\n"
00266                   "Markers should be indexed consecutively starting at 1\n"
00267                   "Marker %d does not fit this pattern\n", columnCount + 1);
00268 
00269         position = (int) tokens[2];
00270         if (position < lastposition)
00271             error("Reading Marker Position from Map File...\n"
00272                   "Marker position should be in base-pairs\n"
00273                   "and markers should be in map order\n");
00274 
00275         // TODO -- store marker locations somewhere!
00276         lastposition = position;
00277 
00278         columnHash.Push(GetMarkerID(tokens[1]));
00279         columns.Push(pcMarker);
00280         columnCount++;
00281 
00282         GetMarkerInfo(tokens[1])->position = position * 1e-8;
00283     }
00284 
00285     columns.Push(pcEnd);
00286     columnHash.Push(0);
00287 };
00288 
00289 int PedigreeDescription::CountTextColumns()
00290 {
00291     int count = 0;
00292 
00293     for (int i = 0; i < columnCount; i++, count++)
00294         if (columns[i] == pcMarker)
00295             count++;
00296 
00297     return count;
00298 }
00299 
00300 void PedigreeDescription::LoadLinkageDataFile(const char * iFilename)
00301 {
00302     IFILE f = ifopen(iFilename, "rb");
00303 
00304     if (f == NULL)
00305         error(
00306             "The linkage format datafile %s cannot be opened\n\n"
00307             "Please check that the file exists and is not being used by another program\n"
00308             "To find out how to set input filenames, check the documentation\n",
00309             iFilename);
00310 
00311     LoadLinkageDataFile(f);
00312     ifclose(f);
00313 
00314     filename = iFilename;
00315 };
00316 
00317 void PedigreeDescription::LoadLinkageDataFile(IFILE & input)
00318 {
00319     columns.Clear();
00320     columnHash.Clear();
00321     columnCount = 0;
00322 
00323     String      buffer, label;
00324     StringArray tokens;
00325 
00326     ReadLineHelper(input, buffer, tokens);
00327 
00328     if (tokens.Length() != 4 || tokens[2].AsInteger() != (int) chromosomeX ||
00329             tokens[0].AsInteger() < 0)
00330         error("Cannot handle first line of data file\n\n"
00331               "Expecting four (4) numeric values, which correspond to:\n"
00332               "   num-loci   -- number of loci in the pedigree\n"
00333               "                 this value must be positive\n"
00334               "   risk-locus -- locus for which risks should be calculated\n"
00335               "                 this value will be ignored\n"
00336               "   sex-link   -- are the loci sex linked [0 - No, 1 - Yes]\n"
00337               "                 %s\n"
00338               "   program    -- which LINKAGE program do you want to use?\n"
00339               "                 this value will also be ignored\n\n"
00340               "The actual input read:\n%s\n",
00341               chromosomeX ? "expecting X-linked data, so this value must be ONE (1)"
00342               : "expecting autosomal data, so this must be ZERO (0)",
00343               (const char *) buffer);
00344 
00345     int numloci = tokens[0];
00346 
00347     ReadLineHelper(input, buffer, tokens);
00348 
00349     if (tokens.Length() != 4 ||
00350             tokens[0].AsInteger() != 0 ||
00351             tokens[3].AsInteger() != 0)
00352         error("Cannot handle second line of data file\n\n"
00353               "Expecting four (4) numeric values, which correspond to:\n"
00354               "   mutation-model         -- must be zero, corresponding to no mutation\n"
00355               "   male-mutation-rate     -- ignored\n"
00356               "   female-mutation-rate   -- ignored\n"
00357               "   linkage-disequilibrium -- must be zero, may be used in the future to\n"
00358               "                             read haplotype frequencies\n\n"
00359               "The actual input read:\n%s\n", (const char *) buffer);
00360 
00361     StringArray markerOrder;
00362     int         unknown = 0;
00363 
00364     ReadLineHelper(input, buffer, markerOrder);
00365 
00366     if (markerOrder.Length() > numloci)
00367         error("The third line of the data file lists marker order\n\n"
00368               "Although %d loci are defined [in the first line],\n"
00369               "this line includes %d values:\n%s\n",
00370               numloci, markerOrder.Length(), (const char *) buffer);
00371 
00372     IntArray    locus;
00373     bool need_blank_line = false;
00374 
00375     while (!ifeof(input) && numloci--)
00376     {
00377         if (ReadLineHelper(input, buffer, tokens) == 0)
00378             error("Linkage data file ends unexpectedly");
00379 
00380         if (tokens.Length() < 2)
00381             error("Incomplete locus information in data file\n"
00382                   "Information for each locus should include 2 or more fiels\n"
00383                   "The expected fields are:\n"
00384                   "   field_type  -- indicator of locus type (trait, marker,...)\n"
00385                   "   alleles     -- number of alleles\n"
00386                   "   name        -- locus name, preceded by hash (#) sign\n\n"
00387                   "The actual input read:\n%s\n", (const char *) buffer);
00388 
00389         int locus_type = (int) tokens[0];
00390         int alleles    = (int) tokens[1];
00391 
00392         String locus_name("LOCUS");
00393         locus_name += ++unknown;
00394 
00395         if (tokens.Length() > 2 && tokens[2][0] == '#')
00396         {
00397             if (tokens[2][1] != 0)
00398                 locus_name = tokens[2].SubStr(1);
00399             else if (tokens.Length() > 3)
00400                 locus_name = tokens[3];
00401         }
00402 
00403         if ((locus_type == 4 && alleles == 0) ||
00404                 (locus_type == 4 && alleles == 1))
00405         {
00406             columnHash.Push(GetCovariateID(locus_name));
00407             columns.Push(pcCovariate);
00408             columnCount++;
00409             continue;
00410         }
00411 
00412         if (locus_type == 0 && alleles == 0)
00413         {
00414             columnHash.Push(GetTraitID(locus_name));
00415             columns.Push(pcTrait);
00416             columnCount++;
00417             continue;
00418         }
00419 
00420         if (ReadLineHelper(input, buffer, tokens) != alleles)
00421             error("Expecting %d allele frequencies, but input has %d columns:\n"
00422                   "%s\n", alleles, tokens.Length(), (const char *) buffer);
00423 
00424         Vector frequencies(alleles + 1);
00425 
00426         frequencies[0] = 0.0;
00427         for (int i = 1; i <= alleles; i++)
00428             frequencies[i] = (double) tokens[i - 1];
00429 
00430         double sum = frequencies.Sum();
00431 
00432         if (sum <= 0.0)
00433             error("Allele frequencies at %s sum to %f, which doesn't make sense\n",
00434                   (const char *) locus_name, sum);
00435 
00436         if (fabs(sum - 1.0) > 1.2e-5)
00437         {
00438             printf("Allele frequencies at %s sum to %f, adjusted to 1.0\n",
00439                    (const char *) locus_name, sum);
00440             need_blank_line = true;
00441         }
00442 
00443         if (sum != 1.0)
00444             frequencies *= 1.0 / sum;
00445 
00446         switch (locus_type)
00447         {
00448             case 1 :
00449             {
00450                 // Affection
00451                 columnHash.Push(GetAffectionID(locus_name));
00452                 columns.Push(pcAffection);
00453                 columnCount++;
00454 
00455                 // Read number of liability classes
00456                 if (ReadLineHelper(input, buffer, tokens) == 0)
00457                     error("Linkage data file ends unexpectedly\n");
00458 
00459                 // Skip liability class data
00460                 int classes = tokens[0];
00461                 if (classes > 1)
00462                 {
00463                     columnHash.Push(0);
00464                     columns.Push(pcSkip);
00465                     columnCount++;
00466                 }
00467 
00468                 // Separate liability class rows for males and females for X-linked data
00469                 if (chromosomeX) classes *= 2;
00470 
00471                 while (classes--)
00472                     if (ReadLineHelper(input, buffer, tokens) == 0)
00473                         error("Linkage data file ends unexpectedly\n");
00474 
00475                 // Ignore map location for quantitative variables
00476                 locus.Push(-1);
00477             }
00478             break;
00479             case 3 :
00480             {
00481                 columnHash.Push(GetMarkerID(locus_name));
00482                 columns.Push(pcMarker);
00483                 columnCount++;
00484 
00485                 // Store allele frequencies
00486                 MarkerInfo * info = GetMarkerInfo(locus_name);
00487 
00488                 info->freq = frequencies;
00489 
00490                 // Initialize allele labels
00491                 info->alleleLabels.Clear();
00492                 for (int i = 0; i < frequencies.Length(); i++)
00493                     info->alleleLabels.Push(label = i);
00494                 info->IndexAlleles();
00495 
00496                 // Store marker id, so that we can track map location
00497                 locus.Push(GetMarkerID(locus_name));
00498             }
00499             break;
00500             case 0 :
00501             {
00502                 // Read number of quantitative variables
00503                 if (ReadLineHelper(input, buffer, tokens) == 0)
00504                     error("Linkage data file ends unexpectedly\n");
00505 
00506                 // Add each quantitative variable to pedigree
00507                 // Discard information on means
00508                 for (int vars = tokens[0], i = 0; i < vars; i++)
00509                 {
00510                     if (ReadLineHelper(input, buffer, tokens) == 0)
00511                         error("Linkage data file ends unexpectedly\n");
00512 
00513                     String trait_name(locus_name);
00514 
00515                     if (i)
00516                     {
00517                         trait_name += ".";
00518                         trait_name += i + 1;
00519                     }
00520 
00521                     columnHash.Push(GetTraitID(trait_name));
00522                     columns.Push(pcTrait);
00523                     columnCount++;
00524                 }
00525 
00526                 // Skip var-covar matrix
00527                 if (ReadLineHelper(input, buffer, tokens) == 0)
00528                     error("Linkage data file ends unexpectedly\n");
00529 
00530                 // Skip heterozygote scaling factor for var-covar matrix
00531                 if (ReadLineHelper(input, buffer, tokens) == 0)
00532                     error("Linkage data file ends unexpectedly\n");
00533 
00534                 // Ignore map location for quantitative variables
00535                 locus.Push(-1);
00536             }
00537             break;
00538             case 2 :
00539                 error("The data file includes binary factors\n"
00540                       "Regretably, loci of this type are not supported\n\n");
00541                 break;
00542             default :
00543                 error("Unsupported locus type [%d] in data file", locus_type);
00544                 break;
00545         }
00546     }
00547 
00548     if (need_blank_line) printf("\n");
00549 
00550     columns.Push(pcEnd);
00551     columnHash.Push(0);
00552 
00553     ReadLineHelper(input, buffer, tokens);
00554     int sexDifference = tokens.Length() ? tokens[0].AsInteger() : -1;
00555     if (tokens.Length() != 2 ||
00556             (sexDifference != 0 && sexDifference != 2) ||
00557             tokens[1].AsInteger() != 0)
00558         error("Error retrieving recombination information\n\n"
00559               "Expecting two (2) numeric values, which correspond to:\n"
00560               "   sex-difference   -- must be zero (no difference) or two (sex specific recombination)\n"
00561               "   map-function     -- must be zero, that is, no interference\n"
00562               "The actual input read:\n%s\n", (const char *) buffer);
00563 
00564     Vector distances[2];
00565     bool distance_in_centimorgans = false;
00566 
00567     for (int r = 0; r <= sexDifference; r += 2)
00568     {
00569         ReadLineHelper(input, buffer, tokens);
00570         if (tokens.Length() != markerOrder.Length() - 1)
00571             error("Error retrieving recombination information\n\n"
00572                   "Expecting %d recombination fractions (current map includes %d loci)\n"
00573                   "Instead the following line was input:\n%s\n",
00574                   markerOrder.Length() - 1, markerOrder.Length(), (const char *) buffer);
00575 
00576         distances[r >> 1].Dimension(tokens.Length());
00577         for (int i = 0; i < tokens.Length(); i++)
00578             distances[r >> 1][i] = (double) tokens[i];
00579 
00580         if (distances[r >> 1].Min() < 0.0)
00581             error("Linkage datafile specifies negative recombination fractions");
00582 
00583         bool centimorgans = distances[r >> 1].Max() > 0.5;
00584 
00585         if (centimorgans && !distance_in_centimorgans)
00586             printf("  Some recombination fractions in datafile are greater than 0.5,\n"
00587                    "  so recombination fractions will be interpreted as cM distances\n\n");
00588 
00589         distance_in_centimorgans |= centimorgans;
00590     }
00591 
00592     double position = 0.0, positionMale = 0.0;
00593 
00594     for (int i = 0, moving = false; i < markerOrder.Length(); i++)
00595     {
00596         int m = markerOrder[i].AsInteger() - 1;
00597 
00598         if (m < 0 || m >= locus.Length())
00599             error("The marker order in the linkage datafile is invalid\n");
00600 
00601         m = locus[m];
00602 
00603         if (m != -1)
00604         {
00605             MarkerInfo * info = GetMarkerInfo(m);
00606             info->chromosome = chromosomeX ? 9999 : 0;
00607 
00608             if (sexDifference == 2)
00609                 info->position = (position + positionMale) * 0.5,
00610                                  info->positionFemale = position,
00611                                                         info->positionMale = positionMale;
00612             else
00613                 info->position = info->positionMale = info->positionFemale = position;
00614 
00615             moving = true;
00616         }
00617 
00618         if (i < markerOrder.Length() - 1 && moving)
00619             position += distance_in_centimorgans ?
00620                         0.01 * distances[0][i] : RecombinationToDistance(distances[0][i]);
00621 
00622         if (sexDifference == 2 && i < markerOrder.Length() - 1 && moving)
00623             positionMale += distance_in_centimorgans ?
00624                             0.01 * distances[1][i] : RecombinationToDistance(distances[1][i]);
00625     }
00626 }
00627 
00628 int PedigreeDescription::ReadLineHelper(IFILE & input,
00629                                         String & buffer,
00630                                         StringArray & tokens)
00631 {
00632     do
00633     {
00634         // Read Line
00635         buffer.ReadLine(input);
00636         buffer.Trim();
00637 
00638         // Strip comments marked with >>
00639         int pos = buffer.FastFind(">>");
00640         if (pos == -1) pos = buffer.FastFind("<<");
00641         if (pos == -1) pos = buffer.Length() + 1;
00642         if (buffer[0] == '#') pos = 0;
00643 
00644         // Find space/tab delimited tokens
00645         tokens.Clear();
00646         tokens.AddTokens(buffer.Left(pos - 1), WHITESPACE);
00647 
00648     }
00649     while (tokens.Length() == 0 && !ifeof(input));
00650 
00651     return tokens.Length();
00652 }
00653 
00654 void PedigreeDescription::LoadMendelDataFile(const char * iFilename)
00655 {
00656     IFILE f = ifopen(iFilename, "rb");
00657 
00658     if (f == NULL)
00659         error(
00660             "The MENDEL format datafile %s cannot be opened\n\n"
00661             "Please check that the file exists and is not being used by another program\n"
00662             "To find out how to set input filenames, check the documentation\n",
00663             iFilename);
00664 
00665     LoadMendelDataFile(f);
00666     ifclose(f);
00667 };
00668 
00669 void PedigreeDescription::LoadMendelDataFile(IFILE & file)
00670 {
00671     // Processes mendel format file
00672     mendelFormat = true;
00673 
00674     // Codominant markers are mapped to markers
00675     // Non-codominant markers are mapped into multiple "affection status"
00676     // (Y/N) variables
00677     columns.Clear();
00678     columnHash.Clear();
00679     columnCount = 0;
00680 
00681     FortranFormat parser;
00682 
00683     // Variables for storing parsed input
00684     String locusName;
00685     String locusType;
00686     String alleleLabel;
00687     String alleleFreq;
00688     String phenotype;
00689     String genotype;
00690     int phenoCount;
00691     int alleleCount;
00692 
00693     while (!ifeof(file))
00694     {
00695         // Cycle through headers for each locus
00696         parser.SetInputFile(file);
00697         parser.SetFormat("(2A8,I2,I3)");
00698 
00699         // After retrieving locus name, check that we haven't tried to
00700         // read past the end-of-file
00701         parser.GetNextField(locusName);
00702         parser.GetNextField(locusType);
00703         alleleCount = parser.GetNextInteger();
00704         phenoCount = parser.GetNextInteger();
00705 
00706         if (locusName.IsEmpty() && locusType.IsEmpty() && alleleCount == 0 &&
00707                 phenoCount == 0 && ifeof(file))
00708             break;
00709 
00710         // Only recognize autosomal and x-linked loci
00711         if (locusType.Compare("AUTOSOME") != 0 && locusType.Compare("X-LINKED"))
00712             error("Unrecognized locus type '%s' in Mendel data file\n\n"
00713                   "Recognized locus types are \"AUTOSOME\" and \"X-LINKED\".",
00714                   (const char *) locusType);
00715 
00716         if (locusType.Compare("AUTOSOME") == 0 && chromosomeX)
00717             error("The data file indicates that locus %s is AUTOSOMAL, but\n"
00718                   "X-LINKED loci were expected as input\n",
00719                   (const char *) locusName);
00720 
00721         if (locusType.Compare("X-LINKED") == 0 && !chromosomeX)
00722             error("The data file indicates that locus %s is X-LINKED, but\n"
00723                   "AUTOSOMAL loci were expected as input\n",
00724                   (const char *) locusName);
00725 
00726         if (locusName.IsEmpty())
00727             error("Blank locus name encountered in data file\n");
00728 
00729         if (phenoCount == 0)
00730         {
00731             // Co-dominant marker
00732             columns.Push(pcMarker);
00733             columnHash.Push(GetMarkerID(locusName));
00734             columnCount++;
00735 
00736             // Update marker info with allele labels and frequencies
00737             MarkerInfo * info = GetMarkerInfo(locusName);
00738 
00739             info->alleleLabels.Clear();
00740             info->alleleLabels.Push("");
00741             info->freq.Clear();
00742 
00743             parser.SetFormat("(2A8)");
00744 
00745             // Mendel allows allele names to be specified with frequencies
00746             // left blank
00747             for (int i = 0; i < alleleCount; i++)
00748             {
00749                 parser.GetNextField(alleleLabel);
00750                 parser.GetNextField(alleleFreq);
00751 
00752                 if (alleleLabel.IsEmpty())
00753                     error("Locus %s is missing allele label for allele #%d\n",
00754                           (const char *) locusName, i+1);
00755 
00756                 info->alleleLabels.Push(alleleLabel);
00757 
00758                 if (!alleleFreq.IsEmpty())
00759                 {
00760                     if (info->freq.Length() == 0)
00761                         info->freq.Push(0.0);
00762 
00763                     info->freq.Push(alleleFreq.AsDouble());
00764                 }
00765             }
00766             info->IndexAlleles();
00767 
00768             if (info->alleleLabels.Length() != info->freq.Length() &&
00769                     info->freq.Length() != 0)
00770                 error("Locus %s is missing allele frequency information for %d alleles\n",
00771                       (const char *) locusName,
00772                       info->alleleLabels.Length() - info->freq.Length());
00773         }
00774         else
00775         {
00776             // Non-codominant marker, which we decompose into multiple traits...
00777             parser.SetFormat("(2A8)");
00778 
00779             // First skip allele frequency information
00780             for (int i = 0; i < alleleCount; i++)
00781             {
00782                 parser.GetNextField(alleleLabel);
00783                 parser.GetNextField(alleleFreq);
00784             }
00785 
00786             // Then read in each phenotype
00787             for (int i = 0; i < alleleCount; i++)
00788             {
00789                 parser.SetFormat("(A8,I3)");
00790                 parser.GetNextField(phenotype);
00791                 int genoCount = parser.GetNextInteger();
00792 
00793                 parser.SetFormat("(A17)");
00794                 for (int j = 0; j < genoCount; j++)
00795                     parser.GetNextField(genotype);
00796 
00797                 columns.Push(pcAffection);
00798                 columnHash.Push(GetAffectionID(locusName + "->" + phenotype));
00799                 columnCount++;
00800             }
00801         }
00802     }
00803 
00804     columns.Push(pcEnd);
00805     columnHash.Push(0);
00806 }
00807 
00808 int PedigreeDescription::CountColumns(int type)
00809 {
00810     int count = 0;
00811 
00812     for (int i = 0; i < columns.Length(); i++)
00813         if (columns[i] == type)
00814             count++;
00815 
00816     return count;
00817 }
00818 
00819 const char * PedigreeDescription::ColumnSummary(String & string)
00820 {
00821     string.Clear();
00822     UpdateSummary(string, pcMarker, " markers [x2 cols]");
00823     UpdateSummary(string, pcTrait, " traits");
00824     UpdateSummary(string, pcAffection, " discrete traits");
00825     UpdateSummary(string, pcCovariate, " covariates");
00826     UpdateSummary(string, pcZygosity, " zygosity");
00827     UpdateSummary(string, pcSkip, " skipped");
00828     return string;
00829 }
00830 
00831 void PedigreeDescription::UpdateSummary(String & string, int type, const char * label)
00832 {
00833     int count = CountColumns(type);
00834 
00835     if (count)
00836     {
00837         if (string.Length())
00838             string += ", ";
00839         string += count;
00840         string += label;
00841     }
00842 }
00843 
Generated on Wed Nov 17 15:38:29 2010 for StatGen Software by  doxygen 1.6.3