00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "PedigreeDescription.h"
00019 #include "MapFunction.h"
00020 #include "MathVector.h"
00021 #include "Constant.h"
00022 #include "FortranFormat.h"
00023 #include "Error.h"
00024
00025 #include <stdlib.h>
00026 #include <ctype.h>
00027 #include <string.h>
00028 #include <math.h>
00029
00030 PedigreeDescription::PedigreeDescription()
00031 {
00032 columnCount = 0;
00033 mendelFormat = false;
00034 }
00035
00036 PedigreeDescription::~PedigreeDescription()
00037 { };
00038
00039 PedigreeDescription & PedigreeDescription::operator = (PedigreeDescription & rhs)
00040 {
00041 columnCount = rhs.columnCount;
00042
00043 columns = rhs.columns;
00044 columnHash = rhs.columnHash;
00045
00046 return *this;
00047 };
00048
00049 void PedigreeDescription::Load(IFILE & input, bool warnIfLinkage)
00050 {
00051
00052 String buffer;
00053 StringArray tokens;
00054
00055 mendelFormat = false;
00056
00057 ReadLineHelper(input, buffer, tokens);
00058 ifrewind(input);
00059
00060 if (tokens.Length() == 4 && isdigit(tokens[0][0]))
00061 {
00062 if (warnIfLinkage) printf("Data file looks like a LINKAGE format file...\n\n");
00063 LoadLinkageDataFile(input);
00064 return;
00065 }
00066
00067 if (buffer.Length() > 18
00068 && (buffer.SubStr(8,8).SlowCompare("AUTOSOME") == 0 ||
00069 buffer.SubStr(8,8).SlowCompare("X-LINKED") == 0)
00070 && (isdigit(buffer[16]) || isdigit(buffer[17]))
00071 && (isdigit(buffer[18]) || isdigit(buffer[19]) ||
00072 (buffer.Length() > 19 && isdigit(buffer[20]))))
00073 {
00074 printf("Data file looks like a MENDEL format file...\n"
00075 " Activating EXPERIMENTAL support for this format\n\n");
00076 LoadMendelDataFile(input);
00077 return;
00078 }
00079
00080
00081 ifrewind(input);
00082 int done = 0;
00083 int line = 0;
00084
00085 columns.Clear();
00086 columnHash.Clear();
00087 columnCount = 0;
00088
00089 while (!ifeof(input) && !done)
00090 {
00091 int i;
00092
00093 buffer.ReadLine(input);
00094 line++;
00095
00096 tokens.Clear();
00097 tokens.AddTokens(buffer, WHITESPACE);
00098
00099 if (tokens.Length() < 1) continue;
00100
00101 if (tokens.Length() == 1)
00102 error("Problem reading data file:\n"
00103 "Item #%d (of type %s) has no name.",
00104 columnCount+1, (const char *) tokens[0]);
00105
00106 switch (toupper(tokens[0][0]))
00107 {
00108 case 'A' :
00109 columnHash.Push(GetAffectionID(tokens[1]));
00110 columns.Push(pcAffection);
00111 columnCount++;
00112 break;
00113 case 'M' :
00114 columnHash.Push(GetMarkerID(tokens[1]));
00115 columns.Push(pcMarker);
00116 columnCount++;
00117 break;
00118 case 'T' :
00119 columnHash.Push(GetTraitID(tokens[1]));
00120 columns.Push(pcTrait);
00121 columnCount++;
00122 break;
00123 case 'C' :
00124 columnHash.Push(GetCovariateID(tokens[1]));
00125 columns.Push(pcCovariate);
00126 columnCount++;
00127 break;
00128 case 'S' :
00129 i = (int) tokens[0].SubStr(1);
00130 i = i > 0 ? i : 1;
00131 while (i--)
00132 {
00133 columns.Push(pcSkip);
00134 columnHash.Push(0);
00135 columnCount++;
00136 }
00137 break;
00138 case 'Z' :
00139 columnHash.Push(0);
00140 columns.Push(pcZygosity);
00141 columnCount++;
00142 break;
00143 case 'V' :
00144 GetMarkerID(tokens[1]);
00145 break;
00146 case 'E' :
00147 done = 1;
00148 break;
00149 case 'U' :
00150 if (toupper(tokens[0][1]) == 'T' && toupper(tokens[0][2]) == 'C')
00151 {
00152 int c = GetCovariateID(tokens[1]);
00153 int t = GetTraitID(tokens[1]);
00154
00155 if (c >= 32767 || t >= 32767)
00156 error("Internal error processing data file\n");
00157
00158 columnHash.Push(t * 32768 + c);
00159 columns.Push(pcUndocumentedTraitCovariate);
00160 columnCount++;
00161 break;
00162 }
00163 default :
00164 error("Problem in data file (line %d):\n%s\n",
00165 line, (const char *) buffer);
00166 }
00167 }
00168
00169 columns.Push(pcEnd);
00170 columnHash.Push(0);
00171 };
00172
00173 void PedigreeDescription::Load(const char * iFilename, bool warnIfLinkage)
00174 {
00175 IFILE f = ifopen(iFilename, "rb");
00176
00177 if (f == NULL)
00178 error(
00179 "The datafile %s cannot be opened\n\n"
00180 "Common causes for this problem are:\n"
00181 " * You might not have used the correct options to specify input file names,\n"
00182 " please check the program documentation for information on how to do this\n\n"
00183 " * The file doesn't exist or the filename might have been misspelt\n\n"
00184 " * The file exists but it is being used by another program which you will need\n"
00185 " to close before continuing\n\n"
00186 " * The file is larger than 2GB and you haven't compiled this application with\n"
00187 " large file support.\n\n",
00188 iFilename);
00189
00190 Load(f, warnIfLinkage);
00191 ifclose(f);
00192
00193 filename = iFilename;
00194 };
00195
00196 void PedigreeDescription::LoadMap(const char * iFilename)
00197 {
00198 IFILE f = ifopen(iFilename, "rb");
00199
00200 if (f == NULL)
00201 error(
00202 "The mapfile %s cannot be opened\n\n"
00203 "Please check that the file exists and is not being used by another program\n"
00204 "To find out how to set input filenames, check the documentation\n",
00205 iFilename);
00206
00207 LoadMap(f);
00208 ifclose(f);
00209 };
00210
00211 void PedigreeDescription::LoadMap(IFILE & input)
00212 {
00213 columns.Clear();
00214 columnHash.Clear();
00215 columnCount = 0;
00216
00217 int lastposition = 0;
00218 String buffer;
00219 StringArray tokens;
00220
00221 buffer.ReadLine(input);
00222 tokens.AddTokens(buffer, WHITESPACE);
00223
00224 while (tokens.Length() == 0 && !ifeof(input))
00225 {
00226 buffer.ReadLine(input);
00227 tokens.AddTokens(buffer, WHITESPACE);
00228 }
00229
00230 if (tokens.Length() != 3)
00231 error("Error reading map file header, which has %d columns.\n"
00232 "Three columns were expected, corresponding to\n"
00233 "MARKER_ID, MARKER_NAME and BASE_PAIR_POSITION\n"
00234 "The offending header is transcribed below:\n\n"
00235 "%s", tokens.Length(), (const char *) buffer);
00236 else
00237 printf("Map file column labels\n"
00238 " -- COLUMN 1, Expecting MARKER_ID, Read %s\n"
00239 " -- COLUMN 2, Expecting MARKER_NAME, Read %s\n"
00240 " -- COLUMN 3, Expection BASE_PAIR_POSITION, Read %s\n\n",
00241 (const char *)(tokens[0]), (const char *)(tokens[1]),
00242 (const char *)(tokens[2]));
00243
00244 int line = 1;
00245 while (!ifeof(input))
00246 {
00247 int serial;
00248 long position;
00249
00250 buffer.ReadLine(input);
00251 line++;
00252
00253 tokens.Clear();
00254 tokens.AddTokens(buffer, WHITESPACE);
00255
00256 if (tokens.Length() < 1) continue;
00257 if (tokens.Length() != 3)
00258 error("Each line in the map file should have 3 tokens, corresponding\n"
00259 "to MARKER_ID, MARKER_NAME and BASE_PAIR_POSITION respectively\n"
00260 "However, there are %d tokens in line %d, transcribed below:\n\n"
00261 "%s", tokens.Length(), line, (const char *) buffer);
00262
00263 serial = (int) tokens[0];
00264 if (serial != columnCount + 1)
00265 error("Reading Marker Index from Map File...\n"
00266 "Markers should be indexed consecutively starting at 1\n"
00267 "Marker %d does not fit this pattern\n", columnCount + 1);
00268
00269 position = (int) tokens[2];
00270 if (position < lastposition)
00271 error("Reading Marker Position from Map File...\n"
00272 "Marker position should be in base-pairs\n"
00273 "and markers should be in map order\n");
00274
00275
00276 lastposition = position;
00277
00278 columnHash.Push(GetMarkerID(tokens[1]));
00279 columns.Push(pcMarker);
00280 columnCount++;
00281
00282 GetMarkerInfo(tokens[1])->position = position * 1e-8;
00283 }
00284
00285 columns.Push(pcEnd);
00286 columnHash.Push(0);
00287 };
00288
00289 int PedigreeDescription::CountTextColumns()
00290 {
00291 int count = 0;
00292
00293 for (int i = 0; i < columnCount; i++, count++)
00294 if (columns[i] == pcMarker)
00295 count++;
00296
00297 return count;
00298 }
00299
00300 void PedigreeDescription::LoadLinkageDataFile(const char * iFilename)
00301 {
00302 IFILE f = ifopen(iFilename, "rb");
00303
00304 if (f == NULL)
00305 error(
00306 "The linkage format datafile %s cannot be opened\n\n"
00307 "Please check that the file exists and is not being used by another program\n"
00308 "To find out how to set input filenames, check the documentation\n",
00309 iFilename);
00310
00311 LoadLinkageDataFile(f);
00312 ifclose(f);
00313
00314 filename = iFilename;
00315 };
00316
00317 void PedigreeDescription::LoadLinkageDataFile(IFILE & input)
00318 {
00319 columns.Clear();
00320 columnHash.Clear();
00321 columnCount = 0;
00322
00323 String buffer, label;
00324 StringArray tokens;
00325
00326 ReadLineHelper(input, buffer, tokens);
00327
00328 if (tokens.Length() != 4 || tokens[2].AsInteger() != (int) chromosomeX ||
00329 tokens[0].AsInteger() < 0)
00330 error("Cannot handle first line of data file\n\n"
00331 "Expecting four (4) numeric values, which correspond to:\n"
00332 " num-loci -- number of loci in the pedigree\n"
00333 " this value must be positive\n"
00334 " risk-locus -- locus for which risks should be calculated\n"
00335 " this value will be ignored\n"
00336 " sex-link -- are the loci sex linked [0 - No, 1 - Yes]\n"
00337 " %s\n"
00338 " program -- which LINKAGE program do you want to use?\n"
00339 " this value will also be ignored\n\n"
00340 "The actual input read:\n%s\n",
00341 chromosomeX ? "expecting X-linked data, so this value must be ONE (1)"
00342 : "expecting autosomal data, so this must be ZERO (0)",
00343 (const char *) buffer);
00344
00345 int numloci = tokens[0];
00346
00347 ReadLineHelper(input, buffer, tokens);
00348
00349 if (tokens.Length() != 4 ||
00350 tokens[0].AsInteger() != 0 ||
00351 tokens[3].AsInteger() != 0)
00352 error("Cannot handle second line of data file\n\n"
00353 "Expecting four (4) numeric values, which correspond to:\n"
00354 " mutation-model -- must be zero, corresponding to no mutation\n"
00355 " male-mutation-rate -- ignored\n"
00356 " female-mutation-rate -- ignored\n"
00357 " linkage-disequilibrium -- must be zero, may be used in the future to\n"
00358 " read haplotype frequencies\n\n"
00359 "The actual input read:\n%s\n", (const char *) buffer);
00360
00361 StringArray markerOrder;
00362 int unknown = 0;
00363
00364 ReadLineHelper(input, buffer, markerOrder);
00365
00366 if (markerOrder.Length() > numloci)
00367 error("The third line of the data file lists marker order\n\n"
00368 "Although %d loci are defined [in the first line],\n"
00369 "this line includes %d values:\n%s\n",
00370 numloci, markerOrder.Length(), (const char *) buffer);
00371
00372 IntArray locus;
00373 bool need_blank_line = false;
00374
00375 while (!ifeof(input) && numloci--)
00376 {
00377 if (ReadLineHelper(input, buffer, tokens) == 0)
00378 error("Linkage data file ends unexpectedly");
00379
00380 if (tokens.Length() < 2)
00381 error("Incomplete locus information in data file\n"
00382 "Information for each locus should include 2 or more fiels\n"
00383 "The expected fields are:\n"
00384 " field_type -- indicator of locus type (trait, marker,...)\n"
00385 " alleles -- number of alleles\n"
00386 " name -- locus name, preceded by hash (#) sign\n\n"
00387 "The actual input read:\n%s\n", (const char *) buffer);
00388
00389 int locus_type = (int) tokens[0];
00390 int alleles = (int) tokens[1];
00391
00392 String locus_name("LOCUS");
00393 locus_name += ++unknown;
00394
00395 if (tokens.Length() > 2 && tokens[2][0] == '#')
00396 {
00397 if (tokens[2][1] != 0)
00398 locus_name = tokens[2].SubStr(1);
00399 else if (tokens.Length() > 3)
00400 locus_name = tokens[3];
00401 }
00402
00403 if ((locus_type == 4 && alleles == 0) ||
00404 (locus_type == 4 && alleles == 1))
00405 {
00406 columnHash.Push(GetCovariateID(locus_name));
00407 columns.Push(pcCovariate);
00408 columnCount++;
00409 continue;
00410 }
00411
00412 if (locus_type == 0 && alleles == 0)
00413 {
00414 columnHash.Push(GetTraitID(locus_name));
00415 columns.Push(pcTrait);
00416 columnCount++;
00417 continue;
00418 }
00419
00420 if (ReadLineHelper(input, buffer, tokens) != alleles)
00421 error("Expecting %d allele frequencies, but input has %d columns:\n"
00422 "%s\n", alleles, tokens.Length(), (const char *) buffer);
00423
00424 Vector frequencies(alleles + 1);
00425
00426 frequencies[0] = 0.0;
00427 for (int i = 1; i <= alleles; i++)
00428 frequencies[i] = (double) tokens[i - 1];
00429
00430 double sum = frequencies.Sum();
00431
00432 if (sum <= 0.0)
00433 error("Allele frequencies at %s sum to %f, which doesn't make sense\n",
00434 (const char *) locus_name, sum);
00435
00436 if (fabs(sum - 1.0) > 1.2e-5)
00437 {
00438 printf("Allele frequencies at %s sum to %f, adjusted to 1.0\n",
00439 (const char *) locus_name, sum);
00440 need_blank_line = true;
00441 }
00442
00443 if (sum != 1.0)
00444 frequencies *= 1.0 / sum;
00445
00446 switch (locus_type)
00447 {
00448 case 1 :
00449 {
00450
00451 columnHash.Push(GetAffectionID(locus_name));
00452 columns.Push(pcAffection);
00453 columnCount++;
00454
00455
00456 if (ReadLineHelper(input, buffer, tokens) == 0)
00457 error("Linkage data file ends unexpectedly\n");
00458
00459
00460 int classes = tokens[0];
00461 if (classes > 1)
00462 {
00463 columnHash.Push(0);
00464 columns.Push(pcSkip);
00465 columnCount++;
00466 }
00467
00468
00469 if (chromosomeX) classes *= 2;
00470
00471 while (classes--)
00472 if (ReadLineHelper(input, buffer, tokens) == 0)
00473 error("Linkage data file ends unexpectedly\n");
00474
00475
00476 locus.Push(-1);
00477 }
00478 break;
00479 case 3 :
00480 {
00481 columnHash.Push(GetMarkerID(locus_name));
00482 columns.Push(pcMarker);
00483 columnCount++;
00484
00485
00486 MarkerInfo * info = GetMarkerInfo(locus_name);
00487
00488 info->freq = frequencies;
00489
00490
00491 info->alleleLabels.Clear();
00492 for (int i = 0; i < frequencies.Length(); i++)
00493 info->alleleLabels.Push(label = i);
00494 info->IndexAlleles();
00495
00496
00497 locus.Push(GetMarkerID(locus_name));
00498 }
00499 break;
00500 case 0 :
00501 {
00502
00503 if (ReadLineHelper(input, buffer, tokens) == 0)
00504 error("Linkage data file ends unexpectedly\n");
00505
00506
00507
00508 for (int vars = tokens[0], i = 0; i < vars; i++)
00509 {
00510 if (ReadLineHelper(input, buffer, tokens) == 0)
00511 error("Linkage data file ends unexpectedly\n");
00512
00513 String trait_name(locus_name);
00514
00515 if (i)
00516 {
00517 trait_name += ".";
00518 trait_name += i + 1;
00519 }
00520
00521 columnHash.Push(GetTraitID(trait_name));
00522 columns.Push(pcTrait);
00523 columnCount++;
00524 }
00525
00526
00527 if (ReadLineHelper(input, buffer, tokens) == 0)
00528 error("Linkage data file ends unexpectedly\n");
00529
00530
00531 if (ReadLineHelper(input, buffer, tokens) == 0)
00532 error("Linkage data file ends unexpectedly\n");
00533
00534
00535 locus.Push(-1);
00536 }
00537 break;
00538 case 2 :
00539 error("The data file includes binary factors\n"
00540 "Regretably, loci of this type are not supported\n\n");
00541 break;
00542 default :
00543 error("Unsupported locus type [%d] in data file", locus_type);
00544 break;
00545 }
00546 }
00547
00548 if (need_blank_line) printf("\n");
00549
00550 columns.Push(pcEnd);
00551 columnHash.Push(0);
00552
00553 ReadLineHelper(input, buffer, tokens);
00554 int sexDifference = tokens.Length() ? tokens[0].AsInteger() : -1;
00555 if (tokens.Length() != 2 ||
00556 (sexDifference != 0 && sexDifference != 2) ||
00557 tokens[1].AsInteger() != 0)
00558 error("Error retrieving recombination information\n\n"
00559 "Expecting two (2) numeric values, which correspond to:\n"
00560 " sex-difference -- must be zero (no difference) or two (sex specific recombination)\n"
00561 " map-function -- must be zero, that is, no interference\n"
00562 "The actual input read:\n%s\n", (const char *) buffer);
00563
00564 Vector distances[2];
00565 bool distance_in_centimorgans = false;
00566
00567 for (int r = 0; r <= sexDifference; r += 2)
00568 {
00569 ReadLineHelper(input, buffer, tokens);
00570 if (tokens.Length() != markerOrder.Length() - 1)
00571 error("Error retrieving recombination information\n\n"
00572 "Expecting %d recombination fractions (current map includes %d loci)\n"
00573 "Instead the following line was input:\n%s\n",
00574 markerOrder.Length() - 1, markerOrder.Length(), (const char *) buffer);
00575
00576 distances[r >> 1].Dimension(tokens.Length());
00577 for (int i = 0; i < tokens.Length(); i++)
00578 distances[r >> 1][i] = (double) tokens[i];
00579
00580 if (distances[r >> 1].Min() < 0.0)
00581 error("Linkage datafile specifies negative recombination fractions");
00582
00583 bool centimorgans = distances[r >> 1].Max() > 0.5;
00584
00585 if (centimorgans && !distance_in_centimorgans)
00586 printf(" Some recombination fractions in datafile are greater than 0.5,\n"
00587 " so recombination fractions will be interpreted as cM distances\n\n");
00588
00589 distance_in_centimorgans |= centimorgans;
00590 }
00591
00592 double position = 0.0, positionMale = 0.0;
00593
00594 for (int i = 0, moving = false; i < markerOrder.Length(); i++)
00595 {
00596 int m = markerOrder[i].AsInteger() - 1;
00597
00598 if (m < 0 || m >= locus.Length())
00599 error("The marker order in the linkage datafile is invalid\n");
00600
00601 m = locus[m];
00602
00603 if (m != -1)
00604 {
00605 MarkerInfo * info = GetMarkerInfo(m);
00606 info->chromosome = chromosomeX ? 9999 : 0;
00607
00608 if (sexDifference == 2)
00609 info->position = (position + positionMale) * 0.5,
00610 info->positionFemale = position,
00611 info->positionMale = positionMale;
00612 else
00613 info->position = info->positionMale = info->positionFemale = position;
00614
00615 moving = true;
00616 }
00617
00618 if (i < markerOrder.Length() - 1 && moving)
00619 position += distance_in_centimorgans ?
00620 0.01 * distances[0][i] : RecombinationToDistance(distances[0][i]);
00621
00622 if (sexDifference == 2 && i < markerOrder.Length() - 1 && moving)
00623 positionMale += distance_in_centimorgans ?
00624 0.01 * distances[1][i] : RecombinationToDistance(distances[1][i]);
00625 }
00626 }
00627
00628 int PedigreeDescription::ReadLineHelper(IFILE & input,
00629 String & buffer,
00630 StringArray & tokens)
00631 {
00632 do
00633 {
00634
00635 buffer.ReadLine(input);
00636 buffer.Trim();
00637
00638
00639 int pos = buffer.FastFind(">>");
00640 if (pos == -1) pos = buffer.FastFind("<<");
00641 if (pos == -1) pos = buffer.Length() + 1;
00642 if (buffer[0] == '#') pos = 0;
00643
00644
00645 tokens.Clear();
00646 tokens.AddTokens(buffer.Left(pos - 1), WHITESPACE);
00647
00648 }
00649 while (tokens.Length() == 0 && !ifeof(input));
00650
00651 return tokens.Length();
00652 }
00653
00654 void PedigreeDescription::LoadMendelDataFile(const char * iFilename)
00655 {
00656 IFILE f = ifopen(iFilename, "rb");
00657
00658 if (f == NULL)
00659 error(
00660 "The MENDEL format datafile %s cannot be opened\n\n"
00661 "Please check that the file exists and is not being used by another program\n"
00662 "To find out how to set input filenames, check the documentation\n",
00663 iFilename);
00664
00665 LoadMendelDataFile(f);
00666 ifclose(f);
00667 };
00668
00669 void PedigreeDescription::LoadMendelDataFile(IFILE & file)
00670 {
00671
00672 mendelFormat = true;
00673
00674
00675
00676
00677 columns.Clear();
00678 columnHash.Clear();
00679 columnCount = 0;
00680
00681 FortranFormat parser;
00682
00683
00684 String locusName;
00685 String locusType;
00686 String alleleLabel;
00687 String alleleFreq;
00688 String phenotype;
00689 String genotype;
00690 int phenoCount;
00691 int alleleCount;
00692
00693 while (!ifeof(file))
00694 {
00695
00696 parser.SetInputFile(file);
00697 parser.SetFormat("(2A8,I2,I3)");
00698
00699
00700
00701 parser.GetNextField(locusName);
00702 parser.GetNextField(locusType);
00703 alleleCount = parser.GetNextInteger();
00704 phenoCount = parser.GetNextInteger();
00705
00706 if (locusName.IsEmpty() && locusType.IsEmpty() && alleleCount == 0 &&
00707 phenoCount == 0 && ifeof(file))
00708 break;
00709
00710
00711 if (locusType.Compare("AUTOSOME") != 0 && locusType.Compare("X-LINKED"))
00712 error("Unrecognized locus type '%s' in Mendel data file\n\n"
00713 "Recognized locus types are \"AUTOSOME\" and \"X-LINKED\".",
00714 (const char *) locusType);
00715
00716 if (locusType.Compare("AUTOSOME") == 0 && chromosomeX)
00717 error("The data file indicates that locus %s is AUTOSOMAL, but\n"
00718 "X-LINKED loci were expected as input\n",
00719 (const char *) locusName);
00720
00721 if (locusType.Compare("X-LINKED") == 0 && !chromosomeX)
00722 error("The data file indicates that locus %s is X-LINKED, but\n"
00723 "AUTOSOMAL loci were expected as input\n",
00724 (const char *) locusName);
00725
00726 if (locusName.IsEmpty())
00727 error("Blank locus name encountered in data file\n");
00728
00729 if (phenoCount == 0)
00730 {
00731
00732 columns.Push(pcMarker);
00733 columnHash.Push(GetMarkerID(locusName));
00734 columnCount++;
00735
00736
00737 MarkerInfo * info = GetMarkerInfo(locusName);
00738
00739 info->alleleLabels.Clear();
00740 info->alleleLabels.Push("");
00741 info->freq.Clear();
00742
00743 parser.SetFormat("(2A8)");
00744
00745
00746
00747 for (int i = 0; i < alleleCount; i++)
00748 {
00749 parser.GetNextField(alleleLabel);
00750 parser.GetNextField(alleleFreq);
00751
00752 if (alleleLabel.IsEmpty())
00753 error("Locus %s is missing allele label for allele #%d\n",
00754 (const char *) locusName, i+1);
00755
00756 info->alleleLabels.Push(alleleLabel);
00757
00758 if (!alleleFreq.IsEmpty())
00759 {
00760 if (info->freq.Length() == 0)
00761 info->freq.Push(0.0);
00762
00763 info->freq.Push(alleleFreq.AsDouble());
00764 }
00765 }
00766 info->IndexAlleles();
00767
00768 if (info->alleleLabels.Length() != info->freq.Length() &&
00769 info->freq.Length() != 0)
00770 error("Locus %s is missing allele frequency information for %d alleles\n",
00771 (const char *) locusName,
00772 info->alleleLabels.Length() - info->freq.Length());
00773 }
00774 else
00775 {
00776
00777 parser.SetFormat("(2A8)");
00778
00779
00780 for (int i = 0; i < alleleCount; i++)
00781 {
00782 parser.GetNextField(alleleLabel);
00783 parser.GetNextField(alleleFreq);
00784 }
00785
00786
00787 for (int i = 0; i < alleleCount; i++)
00788 {
00789 parser.SetFormat("(A8,I3)");
00790 parser.GetNextField(phenotype);
00791 int genoCount = parser.GetNextInteger();
00792
00793 parser.SetFormat("(A17)");
00794 for (int j = 0; j < genoCount; j++)
00795 parser.GetNextField(genotype);
00796
00797 columns.Push(pcAffection);
00798 columnHash.Push(GetAffectionID(locusName + "->" + phenotype));
00799 columnCount++;
00800 }
00801 }
00802 }
00803
00804 columns.Push(pcEnd);
00805 columnHash.Push(0);
00806 }
00807
00808 int PedigreeDescription::CountColumns(int type)
00809 {
00810 int count = 0;
00811
00812 for (int i = 0; i < columns.Length(); i++)
00813 if (columns[i] == type)
00814 count++;
00815
00816 return count;
00817 }
00818
00819 const char * PedigreeDescription::ColumnSummary(String & string)
00820 {
00821 string.Clear();
00822 UpdateSummary(string, pcMarker, " markers [x2 cols]");
00823 UpdateSummary(string, pcTrait, " traits");
00824 UpdateSummary(string, pcAffection, " discrete traits");
00825 UpdateSummary(string, pcCovariate, " covariates");
00826 UpdateSummary(string, pcZygosity, " zygosity");
00827 UpdateSummary(string, pcSkip, " skipped");
00828 return string;
00829 }
00830
00831 void PedigreeDescription::UpdateSummary(String & string, int type, const char * label)
00832 {
00833 int count = CountColumns(type);
00834
00835 if (count)
00836 {
00837 if (string.Length())
00838 string += ", ";
00839 string += count;
00840 string += label;
00841 }
00842 }
00843