00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "PedigreeGlobals.h"
00019 #include "Sort.h"
00020 #include "Error.h"
00021
00022 #include <math.h>
00023 #include <string.h>
00024 #include <ctype.h>
00025
00026 int PedigreeGlobals::traitCount = 0;
00027 int PedigreeGlobals::affectionCount = 0;
00028 int PedigreeGlobals::covariateCount = 0;
00029 int PedigreeGlobals::markerCount = 0;
00030
00031
00032 bool PedigreeGlobals::chromosomeX = false;
00033 bool PedigreeGlobals::sexSpecificMap = false;
00034
00035 StringArray PedigreeGlobals::traitNames;
00036 StringArray PedigreeGlobals::markerNames;
00037 StringArray PedigreeGlobals::covariateNames;
00038 StringArray PedigreeGlobals::affectionNames;
00039 StringIntHash PedigreeGlobals::markerLookup;
00040 StringIntHash PedigreeGlobals::traitLookup;
00041 StringIntHash PedigreeGlobals::affectionLookup;
00042 StringIntHash PedigreeGlobals::covariateLookup;
00043
00044 int PedigreeGlobals::markerInfoCount = 0;
00045 int PedigreeGlobals::markerInfoSize = 0;
00046
00047 MarkerInfo ** PedigreeGlobals::markerInfo = NULL;
00048 MarkerInfo ** PedigreeGlobals::markerInfoByInteger = NULL;
00049 StringHash PedigreeGlobals::markerInfoByName;
00050
00051 int MarkerInfo::count = 0;
00052
00053 int MarkerInfo::ComparePosition(MarkerInfo ** left, MarkerInfo ** right)
00054 {
00055 if ((*left)->chromosome != (*right)->chromosome)
00056 return (*left)->chromosome - (*right)->chromosome;
00057
00058 double difference = (*left)->position - (*right)->position;
00059
00060 if (difference > 0.0)
00061 return 1;
00062 else if (difference == 0.0)
00063 return (*left)->serial - (*right)->serial;
00064 else
00065 return -1;
00066 }
00067
00068 String MarkerInfo::GetAlleleLabel(int allele)
00069 {
00070 if (alleleLabels.Length() > allele && alleleLabels[allele].Length())
00071 return alleleLabels[allele];
00072 else if (alleleLabels.Length() <= allele)
00073 alleleLabels.Dimension(allele + 1);
00074 return alleleLabels[allele] = allele;
00075 }
00076
00077 bool MarkerInfo::AdjustFrequencies()
00078 {
00079 if (freq.Length() <= 1)
00080 {
00081 freq.Clear();
00082 return false;
00083 }
00084
00085 if (freq.Min() < 0.0)
00086 error("Locus %s has negative allele frequencies\n", (const char *) name);
00087
00088 double sum = freq.Sum();
00089
00090 if (sum <= 0.0)
00091 error("Locus %s frequencies sum to %f, which doesn't make sense\n",
00092 (const char *) name, sum);
00093
00094 if (sum != 1.0)
00095 freq *= 1.0 / sum;
00096
00097 if (fabs(sum - 1.0) > 1.2e-5)
00098 {
00099 printf("Locus %s frequencies sum to %f, adjusted to 1.0\n",
00100 (const char *) name, sum);
00101 return true;
00102 }
00103
00104 return false;
00105 }
00106
00107 void MarkerInfo::IndexAlleles()
00108 {
00109 if (alleleLabels.Length() >= 255)
00110 error("Marker %s has more than 254 distinct alleles\n",
00111 (const char *) name);
00112
00113 alleleNumbers.Clear();
00114
00115 for (int i = 1; i < alleleLabels.Length(); i++)
00116 alleleNumbers.SetInteger(alleleLabels[i], i);
00117 }
00118
00119 int MarkerInfo::NewAllele(const String & label)
00120 {
00121 if (alleleLabels.Length() == 0)
00122 alleleLabels.Push("");
00123
00124 if (alleleLabels.Length() >= 255)
00125 error("Marker %s has more than 254 distinct alleles\n",
00126 (const char *) name);
00127
00128 alleleNumbers.SetInteger(label, alleleLabels.Length());
00129 alleleLabels.Push(label);
00130
00131 return alleleLabels.Length() - 1;
00132 }
00133
00134 int PedigreeGlobals::GetTraitID(const char * name)
00135 {
00136 int idx = traitLookup.Integer(name);
00137
00138 if (idx != -1) return idx;
00139
00140 traitNames.Add(name);
00141 traitLookup.SetInteger(name, traitCount);
00142 return traitCount++;
00143 }
00144
00145 int PedigreeGlobals::GetAffectionID(const char * name)
00146 {
00147 int idx = affectionLookup.Integer(name);
00148
00149 if (idx != -1) return idx;
00150
00151 affectionNames.Add(name);
00152 affectionLookup.SetInteger(name, affectionCount);
00153 return affectionCount++;
00154 }
00155
00156 int PedigreeGlobals::GetCovariateID(const char * name)
00157 {
00158 int idx = covariateLookup.Integer(name);
00159
00160 if (idx != -1) return idx;
00161
00162 covariateNames.Add(name);
00163 covariateLookup.SetInteger(name, covariateCount);
00164 return covariateCount++;
00165 }
00166
00167 int PedigreeGlobals::GetMarkerID(const char * name)
00168 {
00169 int idx = markerLookup.Integer(name);
00170
00171 if (idx != -1) return idx;
00172
00173 markerNames.Add(name);
00174 markerLookup.SetInteger(name, markerCount);
00175
00176
00177 if (markerCount == 0)
00178 {
00179 markerInfoByInteger = new MarkerInfo * [16];
00180
00181 for (int i = 0; i < 16; i++)
00182 markerInfoByInteger[i] = NULL;
00183 }
00184 else if ((markerCount & (markerCount - 1)) == 0 && markerCount > 15)
00185 {
00186 MarkerInfo ** newKey = new MarkerInfo * [markerCount * 2];
00187
00188 for (int i = 0; i < markerCount; i++)
00189 newKey[i] = markerInfoByInteger[i];
00190
00191 for (int i = markerCount; i < markerCount * 2; i++)
00192 newKey[i] = NULL;
00193
00194 delete [] markerInfoByInteger;
00195
00196 markerInfoByInteger = newKey;
00197 }
00198
00199 return markerCount++;
00200 }
00201
00202 MarkerInfo * PedigreeGlobals::GetMarkerInfo(String & name)
00203 {
00204 MarkerInfo * info = (MarkerInfo *) markerInfoByName.Object(name);
00205
00206 if (info != NULL) return info;
00207
00208 info = new MarkerInfo(name);
00209 markerInfoByName.Add(name, info);
00210
00211 if (markerInfoCount >= markerInfoSize)
00212 GrowMarkerInfo();
00213
00214 markerInfo[markerInfoCount++] = info;
00215
00216 int markerId = LookupMarker(name);
00217 if (markerId >= 0) markerInfoByInteger[markerId] = info;
00218
00219 return info;
00220 }
00221
00222 MarkerInfo * PedigreeGlobals::GetMarkerInfo(int markerId)
00223 {
00224 if (markerId >= markerCount)
00225 error("Attempted to retrieve MarkerInfo using out-of-bounds index\n");
00226
00227 if (markerInfoByInteger[markerId] != NULL)
00228 return markerInfoByInteger[markerId];
00229 else
00230 return GetMarkerInfo(markerNames[markerId]);
00231 }
00232
00233 void PedigreeGlobals::GrowMarkerInfo()
00234 {
00235 int newSize = markerInfoSize ? 2 * markerInfoSize : 32;
00236
00237 MarkerInfo ** newArray = new MarkerInfo * [newSize];
00238
00239 if (markerInfoSize)
00240 {
00241 memcpy(newArray, markerInfo, sizeof(MarkerInfo *) * markerInfoSize);
00242 delete [] markerInfo;
00243 }
00244
00245 markerInfo = newArray;
00246 markerInfoSize = newSize;
00247 }
00248
00249 void PedigreeGlobals::FlagMissingMarkers(IntArray & missingMarkers)
00250 {
00251 int skipped_markers = 0;
00252
00253 if (missingMarkers.Length())
00254 {
00255 StringArray names;
00256
00257 printf("These markers couldn't be placed and won't be analysed:");
00258
00259 for (int i = 0; i < missingMarkers.Length(); i++)
00260 names.Push(GetMarkerInfo(missingMarkers[i])->name);
00261 names.Sort();
00262
00263 for (int i = 0, line = 80, lines = 0; i < missingMarkers.Length(); i++)
00264 {
00265 if (line + names[i].Length() + 1 > 79)
00266 printf("\n "), line = 3, lines++;
00267
00268 if (lines < 5)
00269 {
00270 printf("%s ", (const char *) names[i]);
00271 line += names[i].Length() + 1;
00272 }
00273 else
00274 skipped_markers++;
00275 }
00276
00277 if (skipped_markers)
00278 printf("as well as %d other unlisted markers...", skipped_markers);
00279
00280 printf("\n\n");
00281 }
00282 }
00283
00284 void PedigreeGlobals::GetOrderedMarkers(IntArray & markers)
00285 {
00286 if (markers.Length() == 0)
00287 {
00288 markers.Dimension(markerCount);
00289 markers.SetSequence(0, 1);
00290 }
00291
00292 MarkerInfo ** subset = new MarkerInfo * [markers.Length()];
00293
00294 int count = 0;
00295 IntArray missingMarkers;
00296
00297 for (int i = 0; i < markers.Length(); i++)
00298 {
00299 MarkerInfo * info = GetMarkerInfo(markers[i]);
00300
00301 if (info->chromosome != -1)
00302 subset[count++] = info;
00303 else
00304 missingMarkers.Push(i);
00305 }
00306
00307 FlagMissingMarkers(missingMarkers);
00308
00309 QuickSort(subset, count, sizeof(MarkerInfo *),
00310 COMPAREFUNC MarkerInfo::ComparePosition);
00311
00312 markers.Clear();
00313 for (int i = 0; i < count; i++)
00314 markers.Push(GetMarkerID(subset[i]->name));
00315 }
00316
00317 int PedigreeGlobals::SortMarkersInMapOrder(IntArray & markers, int chromosome)
00318 {
00319 if (markers.Length() == 0)
00320 {
00321 markers.Dimension(markerCount);
00322 markers.SetSequence(0, 1);
00323 }
00324
00325 MarkerInfo ** subset = new MarkerInfo * [markers.Length()];
00326
00327 int count = 0;
00328 IntArray missingMarkers;
00329
00330 for (int i = 0; i < markers.Length(); i++)
00331 {
00332 MarkerInfo * info = GetMarkerInfo(markers[i]);
00333
00334 if (info->chromosome != -1)
00335 subset[count++] = info;
00336 else if (chromosome == -1)
00337 missingMarkers.Push(i);
00338 }
00339
00340 if (chromosome == -1)
00341 FlagMissingMarkers(missingMarkers);
00342
00343 QuickSort(subset, count, sizeof(MarkerInfo *),
00344 COMPAREFUNC MarkerInfo::ComparePosition);
00345
00346 markers.Clear();
00347
00348 int current_chromosome = -1, next_chromosome = 0;
00349
00350 for (int i = 0; i < count; i++)
00351 if (subset[i]->chromosome < chromosome)
00352 continue;
00353 else if (current_chromosome == -1 ||
00354 subset[i]->chromosome == current_chromosome)
00355 {
00356 markers.Push(GetMarkerID(subset[i]->name));
00357 current_chromosome = subset[i]->chromosome;
00358 }
00359 else if (!next_chromosome)
00360 {
00361 next_chromosome = subset[i]->chromosome;
00362 break;
00363 }
00364
00365 delete [] subset;
00366
00367 return next_chromosome;
00368 }
00369
00370 void PedigreeGlobals::VerifySexSpecificOrder()
00371 {
00372 if (markerCount <= 1)
00373 return;
00374
00375 MarkerInfo ** sortedMarkers = new MarkerInfo * [markerCount];
00376
00377 for (int i = 0; i < markerCount; i++)
00378 sortedMarkers[i] = GetMarkerInfo(i);
00379
00380 QuickSort(sortedMarkers, markerCount, sizeof(MarkerInfo *),
00381 COMPAREFUNC MarkerInfo::ComparePosition);
00382
00383 double prev_female = sortedMarkers[0]->positionFemale;
00384 double prev_male = sortedMarkers[0]->positionMale;
00385 double curr_female, curr_male;
00386
00387 int prev_chromosome = sortedMarkers[0]->chromosome;
00388 int curr_chromosome;
00389
00390 for (int i = 1; i < markerCount; i++)
00391 {
00392 curr_chromosome = sortedMarkers[i]->chromosome;
00393 curr_female = sortedMarkers[i]->positionFemale;
00394 curr_male = sortedMarkers[i]->positionMale;
00395
00396 if (curr_chromosome == prev_chromosome &&
00397 (curr_female < prev_female || curr_male < prev_male))
00398 error("Sex-specific and sex-averaged maps are inconsistent.\n\n"
00399 "In the sex-averaged map, marker %s (%.2f cM) follows marker %s (%.2f cM).\n"
00400 "In the %smale map, marker %s (%.2f cM) PRECEDES marker %s (%.2f cM).\n",
00401 (const char *) sortedMarkers[i]->name,
00402 sortedMarkers[i]->position * 100,
00403 (const char *) sortedMarkers[i-1]->name,
00404 sortedMarkers[i-1]->position * 100,
00405 curr_female < prev_female ? "fe" : "",
00406 (const char *) sortedMarkers[i]->name,
00407 (curr_female < prev_female ? curr_female : curr_male) * 100,
00408 (const char *) sortedMarkers[i-1]->name,
00409 (curr_female < prev_female ? prev_female : prev_male) * 100);
00410
00411 prev_chromosome = curr_chromosome;
00412 prev_female = curr_female;
00413 prev_male = curr_male;
00414 }
00415
00416 delete [] sortedMarkers;
00417 }
00418
00419 void PedigreeGlobals::LoadAlleleFrequencies(const char * filename, bool required)
00420 {
00421
00422
00423 if (filename[0] == 0)
00424 {
00425 if (required)
00426 error("No name provided for required allele freuquency file\n");
00427 else
00428 return;
00429 }
00430
00431
00432
00433 IFILE f = ifopen(filename, "rb");
00434
00435 if (f == NULL)
00436 {
00437 if (required)
00438 error("Failed to open required alallele frequency file '%s'",
00439 (const char *) filename);
00440 else
00441 return;
00442 }
00443
00444 LoadAlleleFrequencies(f);
00445 ifclose(f);
00446 }
00447
00448 void PedigreeGlobals::LoadAlleleFrequencies(IFILE & input)
00449 {
00450 int done = 0;
00451 String buffer;
00452 StringArray tokens;
00453 MarkerInfo *info = NULL;
00454
00455 bool need_blank_line = false;
00456 int allele_size, old_max, next_allele = 0;
00457
00458 while (!ifeof(input) && !done)
00459 {
00460 int i, j;
00461
00462 buffer.ReadLine(input);
00463
00464 tokens.Clear();
00465 tokens.AddTokens(buffer, WHITESPACE);
00466
00467 if (tokens.Length() < 1) continue;
00468
00469 switch (toupper(tokens[0][0]))
00470 {
00471 case 'M' :
00472 if (tokens.Length() == 1)
00473 error("Unnamed marker in allele frequency file");
00474 if (info != NULL)
00475 need_blank_line |= info->AdjustFrequencies();
00476 info = GetMarkerInfo(tokens[1]);
00477 info->freq.Clear();
00478 info->freq.Push(0.0);
00479 next_allele = 1;
00480 break;
00481 case 'F' :
00482 if (info != NULL)
00483 for (i = 1; i < tokens.Length(); i++)
00484 {
00485 buffer = next_allele++;
00486
00487 int allele = LoadAllele(info, buffer);
00488
00489 if (allele >= info->freq.Length())
00490 {
00491 old_max = info->freq.Length();
00492 info->freq.Dimension(allele + 1);
00493 for (j = old_max; j < allele; j++)
00494 info->freq[j] = 0.0;
00495 }
00496
00497 info->freq[allele] = tokens[i].AsDouble();
00498 }
00499 break;
00500 case 'A' :
00501 if (info == NULL) continue;
00502
00503 if (tokens.Length() != 3)
00504 error("Error reading named allele frequencies for locus %s\n"
00505 "Lines with named alleles should have the format\n"
00506 " A allele_label allele_frequency\n\n"
00507 "But the following line was read:\n%s\n",
00508 (const char *) info->name, (const char *) buffer);
00509
00510 allele_size = LoadAllele(info, tokens[1]);
00511 next_allele = atoi(tokens[1]) + 1;
00512
00513 if (allele_size < 1)
00514 error("Error reading named allele frequencies for locus %s\n"
00515 "An invalid allele label was encountered\n",
00516 (const char *) info->name);
00517
00518 if (allele_size >= info->freq.Length())
00519 {
00520 old_max = info->freq.Length();
00521 info->freq.Dimension(allele_size + 1);
00522 for (i = old_max; i < allele_size; i++)
00523 info->freq[i] = 0.0;
00524 }
00525
00526 info->freq[allele_size] = tokens[2];
00527 break;
00528 case 'E' :
00529 done = 1;
00530 break;
00531 default :
00532 error("Problem in allele frequency file.\n"
00533 "Lines in this file should be of two types:\n"
00534 " -- Marker name lines begin with an M\n"
00535 " -- Frequency lines begin with an F\n\n"
00536 "However the following line is different:\n%s\n",
00537 (const char *) buffer);
00538 }
00539 }
00540
00541 if (info != NULL)
00542 need_blank_line |= info->AdjustFrequencies();
00543
00544 if (need_blank_line) printf("\n");
00545 }
00546
00547 void PedigreeGlobals::LoadMarkerMap(const char * filename, bool filter)
00548 {
00549 IFILE f = ifopen(filename, "rb");
00550 if (f == NULL) return;
00551 LoadMarkerMap(f, filter);
00552 ifclose(f);
00553 }
00554
00555 void PedigreeGlobals::LoadMarkerMap(IFILE & input, bool filter)
00556 {
00557 String buffer;
00558 StringArray tokens;
00559 bool first_pass = true;
00560
00561 while (!ifeof(input))
00562 {
00563 buffer.ReadLine(input);
00564
00565 tokens.Clear();
00566 tokens.AddTokens(buffer, WHITESPACE);
00567
00568 if (tokens.Length() < 1) continue;
00569
00570 if (first_pass)
00571 {
00572 sexSpecificMap = (tokens.Length() == 5);
00573
00574
00575
00576
00577 first_pass = false;
00578 }
00579
00580 if (tokens.Length() != 3 && !sexSpecificMap)
00581 error("Error reading map file\n"
00582 "Each line in this file should include 3 fields:\n"
00583 "CHROMOSOME, MARKER_NAME, and POSITION\n"
00584 "However the following line has %d fields\n%s\n",
00585 tokens.Length(), (const char *) buffer);
00586
00587 if (tokens.Length() != 5 && sexSpecificMap)
00588 error("Error reading map file\n"
00589 "Each line in this file should include 5 fields:\n\n"
00590 "CHROMOSOME, MARKER_NAME, SEX_AVERAGED_POS, FEMALE_POS AND MALE_POS\n\n"
00591 "However the following line has %d fields\n%s\n",
00592 tokens.Length(), (const char *) buffer);
00593
00594 bool previous_state = String::caseSensitive;
00595 String::caseSensitive = false;
00596
00597 if ((tokens[0] == "CHR" || tokens[0] == "CHROMOSOME") &&
00598 (tokens[1] == "MARKER" || tokens[1] == "MARKER_NAME" || tokens[1] == "MRK") &&
00599 (tokens[2] == "KOSAMBI" || tokens[2] == "POS" || tokens[2] == "POSITION" ||
00600 tokens[2] == "SEX_AVERAGED_POS" || tokens[2] == "CM" || tokens[2] == "HALDANE"))
00601 continue;
00602
00603 String::caseSensitive = previous_state;
00604
00605 if (filter)
00606 if (LookupMarker(tokens[1]) < 0)
00607 continue;
00608
00609 MarkerInfo * info = GetMarkerInfo(tokens[1]);
00610
00611 int chr = (tokens[0][0] == 'x' || tokens[0][0] == 'X') ? 999 : (int) tokens[0];
00612
00613 info->chromosome = chr;
00614 info->position = (double) tokens[2] * 0.01;
00615
00616 if (sexSpecificMap)
00617 {
00618 char * flag;
00619
00620 double female = strtod(tokens[3], &flag);
00621 if (*flag)
00622 error("In the map file, the female cM position for marker\n"
00623 "%s is %s. This is not a valid number.",
00624 (const char *) tokens[1], (const char *) tokens[3]);
00625
00626 double male = strtod(tokens[4], &flag);
00627 if (*flag)
00628 error("In the map file, the male cM position for marker\n"
00629 "%s is %s. This is not a valid number.",
00630 (const char *) tokens[1], (const char *) tokens[4]);
00631
00632 info->positionFemale = (double) female * 0.01;
00633 info->positionMale = (double) male * 0.01;
00634 }
00635 else
00636 info->positionFemale = info->positionMale = info->position;
00637 }
00638
00639 if (sexSpecificMap) VerifySexSpecificOrder();
00640 }
00641
00642 void PedigreeGlobals::LoadBasepairMap(const char * filename)
00643 {
00644 IFILE f = ifopen(filename, "rb");
00645 if (f == NULL)
00646 error("The map file [%s] could not be opened\n\n"
00647 "Please check that the filename is correct and that the file is\n"
00648 "not being used by another program", filename);
00649 LoadBasepairMap(f);
00650 ifclose(f);
00651 }
00652
00653 void PedigreeGlobals::LoadBasepairMap(IFILE & input)
00654 {
00655 String buffer;
00656 StringArray tokens;
00657
00658 sexSpecificMap = false;
00659
00660 while (!ifeof(input))
00661 {
00662 buffer.ReadLine(input);
00663
00664 tokens.Clear();
00665 tokens.AddTokens(buffer, WHITESPACE);
00666
00667 if (tokens.Length() < 1) continue;
00668
00669 if (tokens.Length() != 3)
00670 error("Error reading map file\n"
00671 "Each line in this file should include 3 fields:\n"
00672 "CHROMOSOME, MARKER_NAME, and POSITION\n"
00673 "However the following line has %d fields\n%s\n",
00674 tokens.Length(), (const char *) buffer);
00675
00676 bool previous_state = String::caseSensitive;
00677 String::caseSensitive = false;
00678
00679 if ((tokens[0] == "CHR" || tokens[0] == "CHROMOSOME") &&
00680 (tokens[1] == "MARKER" || tokens[1] == "MARKER_NAME" || tokens[1] == "MRK") &&
00681 (tokens[2] == "BASEPAIR" || tokens[2] == "POS" || tokens[2] == "POSITION"))
00682 continue;
00683
00684 String::caseSensitive = previous_state;
00685
00686 MarkerInfo * info = GetMarkerInfo(tokens[1]);
00687
00688 int chr = (tokens[0][0] == 'x' || tokens[0][0] == 'X') ? 999 : (int) tokens[0];
00689
00690 info->chromosome = chr;
00691 info->position = (double) tokens[2];
00692 }
00693 }
00694
00695 int PedigreeGlobals::instanceCount = 0;
00696
00697 PedigreeGlobals::~PedigreeGlobals()
00698 {
00699 if (--instanceCount == 0 && markerInfoSize)
00700 {
00701 for (int i = 0; i < markerInfoCount; i++)
00702 delete markerInfo[i];
00703 delete [] markerInfo;
00704 delete [] markerInfoByInteger;
00705 }
00706 }
00707
00708 void PedigreeGlobals::WriteMapFile(const char * filename)
00709 {
00710 if (!MarkerPositionsAvailable())
00711 return;
00712
00713 FILE * output = fopen(filename, "wt");
00714
00715 if (output == NULL)
00716 error("Creating map file \"%s\"", filename);
00717
00718 WriteMapFile(output);
00719 fclose(output);
00720 }
00721
00722 void PedigreeGlobals::WriteMapFile(FILE * output)
00723 {
00724 if (!sexSpecificMap)
00725 fprintf(output, "CHR MARKER POS\n");
00726 else
00727 fprintf(output, "CHR MARKER POS POSF POSM\n");
00728
00729 for (int i = 0; i < markerInfoCount; i++)
00730 {
00731 if (markerInfo[i]->chromosome != -1)
00732 {
00733 if (!sexSpecificMap)
00734 fprintf(output, "%3d %-10s %g\n",
00735 markerInfo[i]->chromosome,
00736 (const char *) markerInfo[i]->name,
00737 markerInfo[i]->position * 100.0);
00738 else
00739 fprintf(output, "%3d %-10s %g %g %g\n",
00740 markerInfo[i]->chromosome,
00741 (const char *) markerInfo[i]->name,
00742 markerInfo[i]->position * 100.0,
00743 markerInfo[i]->positionFemale * 100.0,
00744 markerInfo[i]->positionMale * 100.0);
00745 }
00746 }
00747 }
00748
00749 void PedigreeGlobals::WriteFreqFile(const char * filename, bool old_format)
00750 {
00751 FILE * output = fopen(filename, "wt");
00752
00753 if (output == NULL)
00754 error("Creating allele frequency file \"%s\"", filename);
00755
00756 WriteFreqFile(output, old_format);
00757 fclose(output);
00758 }
00759
00760 void PedigreeGlobals::WriteFreqFile(FILE * output, bool old_format)
00761 {
00762 for (int i = 0; i < markerInfoCount; i++)
00763 {
00764 MarkerInfo * info = markerInfo[i];
00765
00766 if (info->freq.Length() == 0) continue;
00767
00768 fprintf(output, "M %s\n", (const char *) info->name);
00769
00770 if (old_format && info->alleleLabels.Length() == 0)
00771 for (int j = 1; j < info->freq.Length(); j++)
00772 fprintf(output, "%s%.5f%s",
00773 j % 7 == 1 ? "F " : "", info->freq[j],
00774 j == info->freq.Length() - 1 ? "\n" : j % 7 == 0 ? "\n" : " ");
00775 else
00776 for (int j = 1; j < info->freq.Length(); j++)
00777 if (info->freq[j] > 1e-7)
00778 fprintf(output, "A %5s %.5f\n",
00779 (const char *) info->GetAlleleLabel(j), info->freq[j]);
00780 }
00781 }
00782
00783 bool PedigreeGlobals::MarkerPositionsAvailable()
00784 {
00785 for (int i = 0; i < markerInfoCount; i++)
00786 if (markerInfo[i]->chromosome != -1)
00787 return true;
00788
00789 return false;
00790 }
00791
00792 bool PedigreeGlobals::AlleleFrequenciesAvailable()
00793 {
00794 for (int i = 0; i < markerInfoCount; i++)
00795 if (markerInfo[i]->freq.Length() > 1)
00796 return true;
00797
00798 return false;
00799 }
00800
00801 int PedigreeGlobals::LoadAllele(int marker, String & token)
00802 {
00803 return LoadAllele(GetMarkerInfo(marker), token);
00804 }
00805
00806 int PedigreeGlobals::LoadAllele(MarkerInfo * info, String & token)
00807 {
00808 int allele = info->GetAlleleNumber(token);
00809
00810 if (allele >= 0) return allele;
00811
00812 static unsigned char lookup[128];
00813 static bool init = false;
00814
00815 if (!init)
00816 {
00817 init = true;
00818
00819 for (int i = 0; i < 128; i++)
00820 lookup[i] = 0;
00821
00822 for (int i = '1'; i <= '9'; i++)
00823 lookup[i] = 1;
00824
00825 lookup[int('a')] = lookup[int('A')] = lookup[int('c')] = lookup[int('C')] = 2;
00826 lookup[int('g')] = lookup[int('G')] = lookup[int('t')] = lookup[int('T')] = 2;
00827 }
00828
00829 int first = token[0];
00830 bool goodstart = first > 0 && first < 128;
00831
00832 if (token.Length() == 1 && goodstart && lookup[int(token[0])])
00833 return info->NewAllele(token);
00834
00835 if (!goodstart || lookup[int(token[0])] != 1)
00836 return 0;
00837
00838 int integer = token.AsInteger();
00839 token = integer;
00840
00841 allele = info->GetAlleleNumber(token);
00842
00843 if (allele > 0)
00844 return allele;
00845
00846 if (integer <= 0) return 0;
00847
00848 if (integer > 1000000)
00849 {
00850 static bool warn_user = true;
00851
00852 if (warn_user)
00853 {
00854 printf("Some allele numbers for marker %s are > 1000000\n"
00855 "All allele numbers >1000000 will be treated as missing\n\n",
00856 (const char *) info->name);
00857 warn_user = false;
00858 }
00859
00860 return 0;
00861 }
00862
00863 return info->NewAllele(token);
00864 }
00865
00866 std::ostream &operator << (std::ostream &stream, MarkerInfo &m)
00867 {
00868 stream << "MarkerInfo for marker " << m.name << std::endl;
00869 stream << " located on chromsome " << m.chromosome << ":" << (int64_t)(100 * m.position) << std::endl;
00870 stream << " allele count = " << m.freq.Length() << std::endl;
00871 stream << " label count = " << m.alleleLabels.Length() << std::endl;
00872 if (m.freq.Length() == m.alleleLabels.Length())
00873 {
00874 for (int i=0; i<m.freq.Length(); i++)
00875 {
00876 stream << " " << m.alleleLabels[i] << ":" << m.freq[i];
00877 }
00878 stream << std::endl;
00879 }
00880 else
00881 {
00882 stream << " output truncated - counts appear corrupt." << std::endl;
00883 }
00884 return stream;
00885 }
00886