00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "PedigreeDescription.h"
00019 #include "MapFunction.h"
00020 #include "MathVector.h"
00021 #include "Constant.h"
00022 #include "FortranFormat.h"
00023 #include "Error.h"
00024
00025 #include <stdlib.h>
00026 #include <ctype.h>
00027 #include <string.h>
00028 #include <math.h>
00029
00030 PedigreeDescription::PedigreeDescription()
00031 {
00032 columnCount = 0;
00033 mendelFormat = false;
00034 }
00035
00036 PedigreeDescription::~PedigreeDescription()
00037 { };
00038
00039 PedigreeDescription & PedigreeDescription::operator = (PedigreeDescription & rhs)
00040 {
00041 columnCount = rhs.columnCount;
00042
00043 columns = rhs.columns;
00044 columnHash = rhs.columnHash;
00045
00046 return *this;
00047 };
00048
00049 void PedigreeDescription::Load(IFILE & input, bool warnIfLinkage)
00050 {
00051
00052 String buffer;
00053 StringArray tokens;
00054
00055 mendelFormat = false;
00056
00057 ReadLineHelper(input, buffer, tokens);
00058 ifrewind(input);
00059
00060 if (tokens.Length() == 4 && isdigit(tokens[0][0]))
00061 {
00062 if (warnIfLinkage) printf("Data file looks like a LINKAGE format file...\n\n");
00063 LoadLinkageDataFile(input);
00064 return;
00065 }
00066
00067 if (buffer.Length() > 18
00068 && (buffer.SubStr(8,8).SlowCompare("AUTOSOME") == 0 ||
00069 buffer.SubStr(8,8).SlowCompare("X-LINKED") == 0)
00070 && (isdigit(buffer[16]) || isdigit(buffer[17]))
00071 && (isdigit(buffer[18]) || isdigit(buffer[19]) ||
00072 (buffer.Length() > 19 && isdigit(buffer[20]))))
00073 {
00074 printf("Data file looks like a MENDEL format file...\n"
00075 " Activating EXPERIMENTAL support for this format\n\n");
00076 LoadMendelDataFile(input);
00077 return;
00078 }
00079
00080
00081 ifrewind(input);
00082 int done = 0;
00083 int line = 0;
00084
00085 columns.Clear();
00086 columnHash.Clear();
00087 columnCount = 0;
00088
00089 while (!ifeof(input) && !done)
00090 {
00091 int i;
00092
00093 buffer.ReadLine(input);
00094 line++;
00095
00096 tokens.Clear();
00097 tokens.AddTokens(buffer, WHITESPACE);
00098
00099 if (tokens.Length() < 1) continue;
00100
00101 if (tokens.Length() == 1)
00102 error("Problem reading data file:\n"
00103 "Item #%d (of type %s) has no name.",
00104 columnCount+1, (const char *) tokens[0]);
00105
00106 switch (toupper(tokens[0][0]))
00107 {
00108 case 'A' :
00109 columnHash.Push(GetAffectionID(tokens[1]));
00110 columns.Push(pcAffection);
00111 columnCount++;
00112 break;
00113 case 'M' :
00114 columnHash.Push(GetMarkerID(tokens[1]));
00115 columns.Push(pcMarker);
00116 columnCount++;
00117 break;
00118 case 'T' :
00119 columnHash.Push(GetTraitID(tokens[1]));
00120 columns.Push(pcTrait);
00121 columnCount++;
00122 break;
00123 case 'C' :
00124 columnHash.Push(GetCovariateID(tokens[1]));
00125 columns.Push(pcCovariate);
00126 columnCount++;
00127 break;
00128 case '$' :
00129 columnHash.Push(GetStringID(tokens[1]));
00130 columns.Push(pcString);
00131 columnCount++;
00132 break;
00133 case 'S' :
00134 i = (int) tokens[0].SubStr(1);
00135 i = i > 0 ? i : 1;
00136 while (i--)
00137 {
00138 columns.Push(pcSkip);
00139 columnHash.Push(0);
00140 columnCount++;
00141 }
00142 break;
00143 case 'Z' :
00144 columnHash.Push(0);
00145 columns.Push(pcZygosity);
00146 columnCount++;
00147 break;
00148 case 'V' :
00149 GetMarkerID(tokens[1]);
00150 break;
00151 case 'E' :
00152 done = 1;
00153 break;
00154 case 'U' :
00155 if (toupper(tokens[0][1]) == 'T' && toupper(tokens[0][2]) == 'C')
00156 {
00157 int c = GetCovariateID(tokens[1]);
00158 int t = GetTraitID(tokens[1]);
00159
00160 if (c >= 32767 || t >= 32767)
00161 error("Internal error processing data file\n");
00162
00163 columnHash.Push(t * 32768 + c);
00164 columns.Push(pcUndocumentedTraitCovariate);
00165 columnCount++;
00166 break;
00167 }
00168 default :
00169 error("Problem in data file (line %d):\n%s\n",
00170 line, (const char *) buffer);
00171 }
00172 }
00173
00174 columns.Push(pcEnd);
00175 columnHash.Push(0);
00176 };
00177
00178 void PedigreeDescription::Load(const char * iFilename, bool warnIfLinkage)
00179 {
00180 IFILE f = ifopen(iFilename, "rb");
00181
00182 if (f == NULL)
00183 error(
00184 "The datafile %s cannot be opened\n\n"
00185 "Common causes for this problem are:\n"
00186 " * You might not have used the correct options to specify input file names,\n"
00187 " please check the program documentation for information on how to do this\n\n"
00188 " * The file doesn't exist or the filename might have been misspelt\n\n"
00189 " * The file exists but it is being used by another program which you will need\n"
00190 " to close before continuing\n\n"
00191 " * The file is larger than 2GB and you haven't compiled this application with\n"
00192 " large file support.\n\n",
00193 iFilename);
00194
00195 Load(f, warnIfLinkage);
00196 ifclose(f);
00197
00198 filename = iFilename;
00199 };
00200
00201 void PedigreeDescription::LoadMap(const char * iFilename)
00202 {
00203 IFILE f = ifopen(iFilename, "rb");
00204
00205 if (f == NULL)
00206 error(
00207 "The mapfile %s cannot be opened\n\n"
00208 "Please check that the file exists and is not being used by another program\n"
00209 "To find out how to set input filenames, check the documentation\n",
00210 iFilename);
00211
00212 LoadMap(f);
00213 ifclose(f);
00214 };
00215
00216 void PedigreeDescription::LoadMap(IFILE & input)
00217 {
00218 columns.Clear();
00219 columnHash.Clear();
00220 columnCount = 0;
00221
00222 int lastposition = 0;
00223 String buffer;
00224 StringArray tokens;
00225
00226 buffer.ReadLine(input);
00227 tokens.AddTokens(buffer, WHITESPACE);
00228
00229 while (tokens.Length() == 0 && !ifeof(input))
00230 {
00231 buffer.ReadLine(input);
00232 tokens.AddTokens(buffer, WHITESPACE);
00233 }
00234
00235 if (tokens.Length() != 3)
00236 error("Error reading map file header, which has %d columns.\n"
00237 "Three columns were expected, corresponding to\n"
00238 "MARKER_ID, MARKER_NAME and BASE_PAIR_POSITION\n"
00239 "The offending header is transcribed below:\n\n"
00240 "%s", tokens.Length(), (const char *) buffer);
00241 else
00242 printf("Map file column labels\n"
00243 " -- COLUMN 1, Expecting MARKER_ID, Read %s\n"
00244 " -- COLUMN 2, Expecting MARKER_NAME, Read %s\n"
00245 " -- COLUMN 3, Expection BASE_PAIR_POSITION, Read %s\n\n",
00246 (const char *)(tokens[0]), (const char *)(tokens[1]),
00247 (const char *)(tokens[2]));
00248
00249 int line = 1;
00250 while (!ifeof(input))
00251 {
00252 int serial;
00253 long position;
00254
00255 buffer.ReadLine(input);
00256 line++;
00257
00258 tokens.Clear();
00259 tokens.AddTokens(buffer, WHITESPACE);
00260
00261 if (tokens.Length() < 1) continue;
00262 if (tokens.Length() != 3)
00263 error("Each line in the map file should have 3 tokens, corresponding\n"
00264 "to MARKER_ID, MARKER_NAME and BASE_PAIR_POSITION respectively\n"
00265 "However, there are %d tokens in line %d, transcribed below:\n\n"
00266 "%s", tokens.Length(), line, (const char *) buffer);
00267
00268 serial = (int) tokens[0];
00269 if (serial != columnCount + 1)
00270 error("Reading Marker Index from Map File...\n"
00271 "Markers should be indexed consecutively starting at 1\n"
00272 "Marker %d does not fit this pattern\n", columnCount + 1);
00273
00274 position = (int) tokens[2];
00275 if (position < lastposition)
00276 error("Reading Marker Position from Map File...\n"
00277 "Marker position should be in base-pairs\n"
00278 "and markers should be in map order\n");
00279
00280
00281 lastposition = position;
00282
00283 columnHash.Push(GetMarkerID(tokens[1]));
00284 columns.Push(pcMarker);
00285 columnCount++;
00286
00287 GetMarkerInfo(tokens[1])->position = position * 1e-8;
00288 }
00289
00290 columns.Push(pcEnd);
00291 columnHash.Push(0);
00292 };
00293
00294 int PedigreeDescription::CountTextColumns()
00295 {
00296 int count = 0;
00297
00298 for (int i = 0; i < columnCount; i++, count++)
00299 if (columns[i] == pcMarker)
00300 count++;
00301
00302 return count;
00303 }
00304
00305 void PedigreeDescription::LoadLinkageDataFile(const char * iFilename)
00306 {
00307 IFILE f = ifopen(iFilename, "rb");
00308
00309 if (f == NULL)
00310 error(
00311 "The linkage format datafile %s cannot be opened\n\n"
00312 "Please check that the file exists and is not being used by another program\n"
00313 "To find out how to set input filenames, check the documentation\n",
00314 iFilename);
00315
00316 LoadLinkageDataFile(f);
00317 ifclose(f);
00318
00319 filename = iFilename;
00320 };
00321
00322 void PedigreeDescription::LoadLinkageDataFile(IFILE & input)
00323 {
00324 columns.Clear();
00325 columnHash.Clear();
00326 columnCount = 0;
00327
00328 String buffer, label;
00329 StringArray tokens;
00330
00331 ReadLineHelper(input, buffer, tokens);
00332
00333 if (tokens.Length() != 4 || tokens[2].AsInteger() != (int) chromosomeX ||
00334 tokens[0].AsInteger() < 0)
00335 error("Cannot handle first line of data file\n\n"
00336 "Expecting four (4) numeric values, which correspond to:\n"
00337 " num-loci -- number of loci in the pedigree\n"
00338 " this value must be positive\n"
00339 " risk-locus -- locus for which risks should be calculated\n"
00340 " this value will be ignored\n"
00341 " sex-link -- are the loci sex linked [0 - No, 1 - Yes]\n"
00342 " %s\n"
00343 " program -- which LINKAGE program do you want to use?\n"
00344 " this value will also be ignored\n\n"
00345 "The actual input read:\n%s\n",
00346 chromosomeX ? "expecting X-linked data, so this value must be ONE (1)"
00347 : "expecting autosomal data, so this must be ZERO (0)",
00348 (const char *) buffer);
00349
00350 int numloci = tokens[0];
00351
00352 ReadLineHelper(input, buffer, tokens);
00353
00354 if (tokens.Length() != 4 ||
00355 tokens[0].AsInteger() != 0 ||
00356 tokens[3].AsInteger() != 0)
00357 error("Cannot handle second line of data file\n\n"
00358 "Expecting four (4) numeric values, which correspond to:\n"
00359 " mutation-model -- must be zero, corresponding to no mutation\n"
00360 " male-mutation-rate -- ignored\n"
00361 " female-mutation-rate -- ignored\n"
00362 " linkage-disequilibrium -- must be zero, may be used in the future to\n"
00363 " read haplotype frequencies\n\n"
00364 "The actual input read:\n%s\n", (const char *) buffer);
00365
00366 StringArray markerOrder;
00367 int unknown = 0;
00368
00369 ReadLineHelper(input, buffer, markerOrder);
00370
00371 if (markerOrder.Length() > numloci)
00372 error("The third line of the data file lists marker order\n\n"
00373 "Although %d loci are defined [in the first line],\n"
00374 "this line includes %d values:\n%s\n",
00375 numloci, markerOrder.Length(), (const char *) buffer);
00376
00377 IntArray locus;
00378 bool need_blank_line = false;
00379
00380 while (!ifeof(input) && numloci--)
00381 {
00382 if (ReadLineHelper(input, buffer, tokens) == 0)
00383 error("Linkage data file ends unexpectedly");
00384
00385 if (tokens.Length() < 2)
00386 error("Incomplete locus information in data file\n"
00387 "Information for each locus should include 2 or more fiels\n"
00388 "The expected fields are:\n"
00389 " field_type -- indicator of locus type (trait, marker,...)\n"
00390 " alleles -- number of alleles\n"
00391 " name -- locus name, preceded by hash (#) sign\n\n"
00392 "The actual input read:\n%s\n", (const char *) buffer);
00393
00394 int locus_type = (int) tokens[0];
00395 int alleles = (int) tokens[1];
00396
00397 String locus_name("LOCUS");
00398 locus_name += ++unknown;
00399
00400 if (tokens.Length() > 2 && tokens[2][0] == '#')
00401 {
00402 if (tokens[2][1] != 0)
00403 locus_name = tokens[2].SubStr(1);
00404 else if (tokens.Length() > 3)
00405 locus_name = tokens[3];
00406 }
00407
00408 if ((locus_type == 4 && alleles == 0) ||
00409 (locus_type == 4 && alleles == 1))
00410 {
00411 columnHash.Push(GetCovariateID(locus_name));
00412 columns.Push(pcCovariate);
00413 columnCount++;
00414 continue;
00415 }
00416
00417 if (locus_type == 0 && alleles == 0)
00418 {
00419 columnHash.Push(GetTraitID(locus_name));
00420 columns.Push(pcTrait);
00421 columnCount++;
00422 continue;
00423 }
00424
00425 if (ReadLineHelper(input, buffer, tokens) != alleles)
00426 error("Expecting %d allele frequencies, but input has %d columns:\n"
00427 "%s\n", alleles, tokens.Length(), (const char *) buffer);
00428
00429 Vector frequencies(alleles + 1);
00430
00431 frequencies[0] = 0.0;
00432 for (int i = 1; i <= alleles; i++)
00433 frequencies[i] = (double) tokens[i - 1];
00434
00435 double sum = frequencies.Sum();
00436
00437 if (sum <= 0.0)
00438 error("Allele frequencies at %s sum to %f, which doesn't make sense\n",
00439 (const char *) locus_name, sum);
00440
00441 if (fabs(sum - 1.0) > 1.2e-5)
00442 {
00443 printf("Allele frequencies at %s sum to %f, adjusted to 1.0\n",
00444 (const char *) locus_name, sum);
00445 need_blank_line = true;
00446 }
00447
00448 if (sum != 1.0)
00449 frequencies *= 1.0 / sum;
00450
00451 switch (locus_type)
00452 {
00453 case 1 :
00454 {
00455
00456 columnHash.Push(GetAffectionID(locus_name));
00457 columns.Push(pcAffection);
00458 columnCount++;
00459
00460
00461 if (ReadLineHelper(input, buffer, tokens) == 0)
00462 error("Linkage data file ends unexpectedly\n");
00463
00464
00465 int classes = tokens[0];
00466 if (classes > 1)
00467 {
00468 columnHash.Push(0);
00469 columns.Push(pcSkip);
00470 columnCount++;
00471 }
00472
00473
00474 if (chromosomeX) classes *= 2;
00475
00476 while (classes--)
00477 if (ReadLineHelper(input, buffer, tokens) == 0)
00478 error("Linkage data file ends unexpectedly\n");
00479
00480
00481 locus.Push(-1);
00482 }
00483 break;
00484 case 3 :
00485 {
00486 columnHash.Push(GetMarkerID(locus_name));
00487 columns.Push(pcMarker);
00488 columnCount++;
00489
00490
00491 MarkerInfo * info = GetMarkerInfo(locus_name);
00492
00493 info->freq = frequencies;
00494
00495
00496 info->alleleLabels.Clear();
00497 for (int i = 0; i < frequencies.Length(); i++)
00498 info->alleleLabels.Push(label = i);
00499 info->IndexAlleles();
00500
00501
00502 locus.Push(GetMarkerID(locus_name));
00503 }
00504 break;
00505 case 0 :
00506 {
00507
00508 if (ReadLineHelper(input, buffer, tokens) == 0)
00509 error("Linkage data file ends unexpectedly\n");
00510
00511
00512
00513 for (int vars = tokens[0], i = 0; i < vars; i++)
00514 {
00515 if (ReadLineHelper(input, buffer, tokens) == 0)
00516 error("Linkage data file ends unexpectedly\n");
00517
00518 String trait_name(locus_name);
00519
00520 if (i)
00521 {
00522 trait_name += ".";
00523 trait_name += i + 1;
00524 }
00525
00526 columnHash.Push(GetTraitID(trait_name));
00527 columns.Push(pcTrait);
00528 columnCount++;
00529 }
00530
00531
00532 if (ReadLineHelper(input, buffer, tokens) == 0)
00533 error("Linkage data file ends unexpectedly\n");
00534
00535
00536 if (ReadLineHelper(input, buffer, tokens) == 0)
00537 error("Linkage data file ends unexpectedly\n");
00538
00539
00540 locus.Push(-1);
00541 }
00542 break;
00543 case 2 :
00544 error("The data file includes binary factors\n"
00545 "Regretably, loci of this type are not supported\n\n");
00546 break;
00547 default :
00548 error("Unsupported locus type [%d] in data file", locus_type);
00549 break;
00550 }
00551 }
00552
00553 if (need_blank_line) printf("\n");
00554
00555 columns.Push(pcEnd);
00556 columnHash.Push(0);
00557
00558 ReadLineHelper(input, buffer, tokens);
00559 int sexDifference = tokens.Length() ? tokens[0].AsInteger() : -1;
00560 if (tokens.Length() != 2 ||
00561 (sexDifference != 0 && sexDifference != 2) ||
00562 tokens[1].AsInteger() != 0)
00563 error("Error retrieving recombination information\n\n"
00564 "Expecting two (2) numeric values, which correspond to:\n"
00565 " sex-difference -- must be zero (no difference) or two (sex specific recombination)\n"
00566 " map-function -- must be zero, that is, no interference\n"
00567 "The actual input read:\n%s\n", (const char *) buffer);
00568
00569 Vector distances[2];
00570 bool distance_in_centimorgans = false;
00571
00572 for (int r = 0; r <= sexDifference; r += 2)
00573 {
00574 ReadLineHelper(input, buffer, tokens);
00575 if (tokens.Length() != markerOrder.Length() - 1)
00576 error("Error retrieving recombination information\n\n"
00577 "Expecting %d recombination fractions (current map includes %d loci)\n"
00578 "Instead the following line was input:\n%s\n",
00579 markerOrder.Length() - 1, markerOrder.Length(), (const char *) buffer);
00580
00581 distances[r >> 1].Dimension(tokens.Length());
00582 for (int i = 0; i < tokens.Length(); i++)
00583 distances[r >> 1][i] = (double) tokens[i];
00584
00585 if (distances[r >> 1].Min() < 0.0)
00586 error("Linkage datafile specifies negative recombination fractions");
00587
00588 bool centimorgans = distances[r >> 1].Max() > 0.5;
00589
00590 if (centimorgans && !distance_in_centimorgans)
00591 printf(" Some recombination fractions in datafile are greater than 0.5,\n"
00592 " so recombination fractions will be interpreted as cM distances\n\n");
00593
00594 distance_in_centimorgans |= centimorgans;
00595 }
00596
00597 double position = 0.0, positionMale = 0.0;
00598
00599 for (int i = 0, moving = false; i < markerOrder.Length(); i++)
00600 {
00601 int m = markerOrder[i].AsInteger() - 1;
00602
00603 if (m < 0 || m >= locus.Length())
00604 error("The marker order in the linkage datafile is invalid\n");
00605
00606 m = locus[m];
00607
00608 if (m != -1)
00609 {
00610 MarkerInfo * info = GetMarkerInfo(m);
00611 info->chromosome = chromosomeX ? 9999 : 0;
00612
00613 if (sexDifference == 2)
00614 info->position = (position + positionMale) * 0.5,
00615 info->positionFemale = position,
00616 info->positionMale = positionMale;
00617 else
00618 info->position = info->positionMale = info->positionFemale = position;
00619
00620 moving = true;
00621 }
00622
00623 if (i < markerOrder.Length() - 1 && moving)
00624 position += distance_in_centimorgans ?
00625 0.01 * distances[0][i] : RecombinationToDistance(distances[0][i]);
00626
00627 if (sexDifference == 2 && i < markerOrder.Length() - 1 && moving)
00628 positionMale += distance_in_centimorgans ?
00629 0.01 * distances[1][i] : RecombinationToDistance(distances[1][i]);
00630 }
00631 }
00632
00633 int PedigreeDescription::ReadLineHelper(IFILE & input,
00634 String & buffer,
00635 StringArray & tokens)
00636 {
00637 do
00638 {
00639
00640 buffer.ReadLine(input);
00641 buffer.Trim();
00642
00643
00644 int pos = buffer.FastFind(">>");
00645 if (pos == -1) pos = buffer.FastFind("<<");
00646 if (pos == -1) pos = buffer.Length() + 1;
00647 if (buffer[0] == '#') pos = 0;
00648
00649
00650 tokens.Clear();
00651 tokens.AddTokens(buffer.Left(pos - 1), WHITESPACE);
00652
00653 }
00654 while (tokens.Length() == 0 && !ifeof(input));
00655
00656 return tokens.Length();
00657 }
00658
00659 void PedigreeDescription::LoadMendelDataFile(const char * iFilename)
00660 {
00661 IFILE f = ifopen(iFilename, "rb");
00662
00663 if (f == NULL)
00664 error(
00665 "The MENDEL format datafile %s cannot be opened\n\n"
00666 "Please check that the file exists and is not being used by another program\n"
00667 "To find out how to set input filenames, check the documentation\n",
00668 iFilename);
00669
00670 LoadMendelDataFile(f);
00671 ifclose(f);
00672 };
00673
00674 void PedigreeDescription::LoadMendelDataFile(IFILE & file)
00675 {
00676
00677 mendelFormat = true;
00678
00679
00680
00681
00682 columns.Clear();
00683 columnHash.Clear();
00684 columnCount = 0;
00685
00686 FortranFormat parser;
00687
00688
00689 String locusName;
00690 String locusType;
00691 String alleleLabel;
00692 String alleleFreq;
00693 String phenotype;
00694 String genotype;
00695 int phenoCount;
00696 int alleleCount;
00697
00698 while (!ifeof(file))
00699 {
00700
00701 parser.SetInputFile(file);
00702 parser.SetFormat("(2A8,I2,I3)");
00703
00704
00705
00706 parser.GetNextField(locusName);
00707 parser.GetNextField(locusType);
00708 alleleCount = parser.GetNextInteger();
00709 phenoCount = parser.GetNextInteger();
00710
00711 if (locusName.IsEmpty() && locusType.IsEmpty() && alleleCount == 0 &&
00712 phenoCount == 0 && ifeof(file))
00713 break;
00714
00715
00716 if (locusType.Compare("AUTOSOME") != 0 && locusType.Compare("X-LINKED"))
00717 error("Unrecognized locus type '%s' in Mendel data file\n\n"
00718 "Recognized locus types are \"AUTOSOME\" and \"X-LINKED\".",
00719 (const char *) locusType);
00720
00721 if (locusType.Compare("AUTOSOME") == 0 && chromosomeX)
00722 error("The data file indicates that locus %s is AUTOSOMAL, but\n"
00723 "X-LINKED loci were expected as input\n",
00724 (const char *) locusName);
00725
00726 if (locusType.Compare("X-LINKED") == 0 && !chromosomeX)
00727 error("The data file indicates that locus %s is X-LINKED, but\n"
00728 "AUTOSOMAL loci were expected as input\n",
00729 (const char *) locusName);
00730
00731 if (locusName.IsEmpty())
00732 error("Blank locus name encountered in data file\n");
00733
00734 if (phenoCount == 0)
00735 {
00736
00737 columns.Push(pcMarker);
00738 columnHash.Push(GetMarkerID(locusName));
00739 columnCount++;
00740
00741
00742 MarkerInfo * info = GetMarkerInfo(locusName);
00743
00744 info->alleleLabels.Clear();
00745 info->alleleLabels.Push("");
00746 info->freq.Clear();
00747
00748 parser.SetFormat("(2A8)");
00749
00750
00751
00752 for (int i = 0; i < alleleCount; i++)
00753 {
00754 parser.GetNextField(alleleLabel);
00755 parser.GetNextField(alleleFreq);
00756
00757 if (alleleLabel.IsEmpty())
00758 error("Locus %s is missing allele label for allele #%d\n",
00759 (const char *) locusName, i+1);
00760
00761 info->alleleLabels.Push(alleleLabel);
00762
00763 if (!alleleFreq.IsEmpty())
00764 {
00765 if (info->freq.Length() == 0)
00766 info->freq.Push(0.0);
00767
00768 info->freq.Push(alleleFreq.AsDouble());
00769 }
00770 }
00771 info->IndexAlleles();
00772
00773 if (info->alleleLabels.Length() != info->freq.Length() &&
00774 info->freq.Length() != 0)
00775 error("Locus %s is missing allele frequency information for %d alleles\n",
00776 (const char *) locusName,
00777 info->alleleLabels.Length() - info->freq.Length());
00778 }
00779 else
00780 {
00781
00782 parser.SetFormat("(2A8)");
00783
00784
00785 for (int i = 0; i < alleleCount; i++)
00786 {
00787 parser.GetNextField(alleleLabel);
00788 parser.GetNextField(alleleFreq);
00789 }
00790
00791
00792 for (int i = 0; i < alleleCount; i++)
00793 {
00794 parser.SetFormat("(A8,I3)");
00795 parser.GetNextField(phenotype);
00796 int genoCount = parser.GetNextInteger();
00797
00798 parser.SetFormat("(A17)");
00799 for (int j = 0; j < genoCount; j++)
00800 parser.GetNextField(genotype);
00801
00802 columns.Push(pcAffection);
00803 columnHash.Push(GetAffectionID(locusName + "->" + phenotype));
00804 columnCount++;
00805 }
00806 }
00807 }
00808
00809 columns.Push(pcEnd);
00810 columnHash.Push(0);
00811 }
00812
00813 int PedigreeDescription::CountColumns(int type)
00814 {
00815 int count = 0;
00816
00817 for (int i = 0; i < columns.Length(); i++)
00818 if (columns[i] == type)
00819 count++;
00820
00821 return count;
00822 }
00823
00824 const char * PedigreeDescription::ColumnSummary(String & string)
00825 {
00826 string.Clear();
00827 UpdateSummary(string, pcMarker, " markers [x2 cols]");
00828 UpdateSummary(string, pcTrait, " traits");
00829 UpdateSummary(string, pcAffection, " discrete traits");
00830 UpdateSummary(string, pcCovariate, " covariates");
00831 UpdateSummary(string, pcString, " strings");
00832 UpdateSummary(string, pcZygosity, " zygosity");
00833 UpdateSummary(string, pcSkip, " skipped");
00834 return string;
00835 }
00836
00837 void PedigreeDescription::UpdateSummary(String & string, int type, const char * label)
00838 {
00839 int count = CountColumns(type);
00840
00841 if (count)
00842 {
00843 if (string.Length())
00844 string += ", ";
00845 string += count;
00846 string += label;
00847 }
00848 }
00849
00850
00851 void PedigreeDescription::AddMarkerColumn(const char * markerName)
00852 {
00853 if (columns.Last() == pcEnd)
00854 {
00855 columns.Pop();
00856 columnHash.Pop();
00857 }
00858
00859 columnHash.Push(GetMarkerID(markerName));
00860 columns.Push(pcMarker);
00861 columnCount++;
00862 }
00863
00864 void PedigreeDescription::AddCovariateColumn(const char * covariateName)
00865 {
00866 if (columns.Last() == pcEnd)
00867 {
00868 columns.Pop();
00869 columnHash.Pop();
00870 }
00871
00872 columnHash.Push(GetCovariateID(covariateName));
00873 columns.Push(pcCovariate);
00874 columnCount++;
00875 }
00876
00877 void PedigreeDescription::AddTraitColumn(const char * traitName)
00878 {
00879 if (columns.Last() == pcEnd)
00880 {
00881 columns.Pop();
00882 columnHash.Pop();
00883 }
00884
00885 columnHash.Push(GetCovariateID(traitName));
00886 columns.Push(pcTrait);
00887 columnCount++;
00888 }
00889
00890 void PedigreeDescription::AddAffectionColumn(const char * affectionName)
00891 {
00892 if (columns.Last() == pcEnd)
00893 {
00894 columns.Pop();
00895 columnHash.Pop();
00896 }
00897
00898 columnHash.Push(GetAffectionID(affectionName));
00899 columns.Push(pcAffection);
00900 columnCount++;
00901 }
00902
00903 void PedigreeDescription::AddStringColumn(const char * stringName)
00904 {
00905 if (columns.Last() == pcEnd)
00906 {
00907 columns.Pop();
00908 columnHash.Pop();
00909 }
00910
00911 columnHash.Push(GetStringID(stringName));
00912 columns.Push(pcString);
00913 columnCount++;
00914 }
00915
00916 void PedigreeDescription::AddZygosityColumn()
00917 {
00918 if (columns.Last() == pcEnd)
00919 {
00920 columns.Pop();
00921 columnHash.Pop();
00922 }
00923
00924 columnHash.Push(0);
00925 columns.Push(pcZygosity);
00926 columnCount++;
00927 }
00928
00929 void PedigreeDescription::AddSkippedColumn()
00930 {
00931 if (columns.Last() == pcEnd)
00932 {
00933 columns.Pop();
00934 columnHash.Pop();
00935 }
00936
00937 columnHash.Push(0);
00938 columns.Push(pcSkip);
00939 columnCount++;
00940 }
00941