00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "PedigreeGlobals.h"
00019 #include "Sort.h"
00020 #include "Error.h"
00021
00022 #include <math.h>
00023 #include <string.h>
00024 #include <ctype.h>
00025
00026 int PedigreeGlobals::traitCount = 0;
00027 int PedigreeGlobals::affectionCount = 0;
00028 int PedigreeGlobals::covariateCount = 0;
00029 int PedigreeGlobals::markerCount = 0;
00030 int PedigreeGlobals::stringCount = 0;
00031
00032
00033 bool PedigreeGlobals::chromosomeX = false;
00034 bool PedigreeGlobals::sexSpecificMap = false;
00035
00036 StringArray PedigreeGlobals::traitNames;
00037 StringArray PedigreeGlobals::markerNames;
00038 StringArray PedigreeGlobals::covariateNames;
00039 StringArray PedigreeGlobals::affectionNames;
00040 StringArray PedigreeGlobals::stringNames;
00041 StringIntHash PedigreeGlobals::markerLookup;
00042 StringIntHash PedigreeGlobals::traitLookup;
00043 StringIntHash PedigreeGlobals::affectionLookup;
00044 StringIntHash PedigreeGlobals::covariateLookup;
00045 StringIntHash PedigreeGlobals::stringLookup;
00046
00047 int PedigreeGlobals::markerInfoCount = 0;
00048 int PedigreeGlobals::markerInfoSize = 0;
00049
00050 MarkerInfo ** PedigreeGlobals::markerInfo = NULL;
00051 MarkerInfo ** PedigreeGlobals::markerInfoByInteger = NULL;
00052 StringHash PedigreeGlobals::markerInfoByName;
00053
00054 int MarkerInfo::count = 0;
00055
00056 int MarkerInfo::ComparePosition(MarkerInfo ** left, MarkerInfo ** right)
00057 {
00058 if ((*left)->chromosome != (*right)->chromosome)
00059 return (*left)->chromosome - (*right)->chromosome;
00060
00061 double difference = (*left)->position - (*right)->position;
00062
00063 if (difference > 0.0)
00064 return 1;
00065 else if (difference == 0.0)
00066 return (*left)->serial - (*right)->serial;
00067 else
00068 return -1;
00069 }
00070
00071 String MarkerInfo::GetAlleleLabel(int allele)
00072 {
00073 if (alleleLabels.Length() > allele && alleleLabels[allele].Length())
00074 return alleleLabels[allele];
00075 else if (alleleLabels.Length() <= allele)
00076 alleleLabels.Dimension(allele + 1);
00077 return alleleLabels[allele] = allele;
00078 }
00079
00080 bool MarkerInfo::AdjustFrequencies()
00081 {
00082 if (freq.Length() <= 1)
00083 {
00084 freq.Clear();
00085 return false;
00086 }
00087
00088 if (freq.Min() < 0.0)
00089 error("Locus %s has negative allele frequencies\n", (const char *) name);
00090
00091 double sum = freq.Sum();
00092
00093 if (sum <= 0.0)
00094 error("Locus %s frequencies sum to %f, which doesn't make sense\n",
00095 (const char *) name, sum);
00096
00097 if (sum != 1.0)
00098 freq *= 1.0 / sum;
00099
00100 if (fabs(sum - 1.0) > 1.2e-5)
00101 {
00102 printf("Locus %s frequencies sum to %f, adjusted to 1.0\n",
00103 (const char *) name, sum);
00104 return true;
00105 }
00106
00107 return false;
00108 }
00109
00110 void MarkerInfo::IndexAlleles()
00111 {
00112 if (alleleLabels.Length() >= 255)
00113 error("Marker %s has more than 254 distinct alleles\n",
00114 (const char *) name);
00115
00116 alleleNumbers.Clear();
00117
00118 for (int i = 1; i < alleleLabels.Length(); i++)
00119 alleleNumbers.SetInteger(alleleLabels[i], i);
00120 }
00121
00122 int MarkerInfo::NewAllele(const String & label)
00123 {
00124 if (alleleLabels.Length() == 0)
00125 alleleLabels.Push("");
00126
00127 if (alleleLabels.Length() >= 255)
00128 error("Marker %s has more than 254 distinct alleles\n",
00129 (const char *) name);
00130
00131 alleleNumbers.SetInteger(label, alleleLabels.Length());
00132 alleleLabels.Push(label);
00133
00134 return alleleLabels.Length() - 1;
00135 }
00136
00137 int PedigreeGlobals::GetTraitID(const char * name)
00138 {
00139 int idx = traitLookup.Integer(name);
00140
00141 if (idx != -1) return idx;
00142
00143 traitNames.Add(name);
00144 traitLookup.SetInteger(name, traitCount);
00145 return traitCount++;
00146 }
00147
00148 int PedigreeGlobals::GetAffectionID(const char * name)
00149 {
00150 int idx = affectionLookup.Integer(name);
00151
00152 if (idx != -1) return idx;
00153
00154 affectionNames.Add(name);
00155 affectionLookup.SetInteger(name, affectionCount);
00156 return affectionCount++;
00157 }
00158
00159 int PedigreeGlobals::GetCovariateID(const char * name)
00160 {
00161 int idx = covariateLookup.Integer(name);
00162
00163 if (idx != -1) return idx;
00164
00165 covariateNames.Add(name);
00166 covariateLookup.SetInteger(name, covariateCount);
00167 return covariateCount++;
00168 }
00169
00170 int PedigreeGlobals::GetStringID(const char * name)
00171 {
00172 int idx = stringLookup.Integer(name);
00173
00174 if (idx != -1) return idx;
00175
00176 stringNames.Add(name);
00177 stringLookup.SetInteger(name, stringCount);
00178 return stringCount++;
00179 }
00180
00181 int PedigreeGlobals::GetMarkerID(const char * name)
00182 {
00183 int idx = markerLookup.Integer(name);
00184
00185 if (idx != -1) return idx;
00186
00187 markerNames.Add(name);
00188 markerLookup.SetInteger(name, markerCount);
00189
00190
00191 if (markerCount == 0)
00192 {
00193 markerInfoByInteger = new MarkerInfo * [16];
00194
00195 for (int i = 0; i < 16; i++)
00196 markerInfoByInteger[i] = NULL;
00197 }
00198 else if ((markerCount & (markerCount - 1)) == 0 && markerCount > 15)
00199 {
00200 MarkerInfo ** newKey = new MarkerInfo * [markerCount * 2];
00201
00202 for (int i = 0; i < markerCount; i++)
00203 newKey[i] = markerInfoByInteger[i];
00204
00205 for (int i = markerCount; i < markerCount * 2; i++)
00206 newKey[i] = NULL;
00207
00208 delete [] markerInfoByInteger;
00209
00210 markerInfoByInteger = newKey;
00211 }
00212
00213 return markerCount++;
00214 }
00215
00216 MarkerInfo * PedigreeGlobals::GetMarkerInfo(String & name)
00217 {
00218 MarkerInfo * info = (MarkerInfo *) markerInfoByName.Object(name);
00219
00220 if (info != NULL) return info;
00221
00222 info = new MarkerInfo(name);
00223 markerInfoByName.Add(name, info);
00224
00225 if (markerInfoCount >= markerInfoSize)
00226 GrowMarkerInfo();
00227
00228 markerInfo[markerInfoCount++] = info;
00229
00230 int markerId = LookupMarker(name);
00231 if (markerId >= 0) markerInfoByInteger[markerId] = info;
00232
00233 return info;
00234 }
00235
00236 MarkerInfo * PedigreeGlobals::GetMarkerInfo(int markerId)
00237 {
00238 if (markerId >= markerCount)
00239 error("Attempted to retrieve MarkerInfo using out-of-bounds index\n");
00240
00241 if (markerInfoByInteger[markerId] != NULL)
00242 return markerInfoByInteger[markerId];
00243 else
00244 return GetMarkerInfo(markerNames[markerId]);
00245 }
00246
00247 void PedigreeGlobals::GrowMarkerInfo()
00248 {
00249 int newSize = markerInfoSize ? 2 * markerInfoSize : 32;
00250
00251 MarkerInfo ** newArray = new MarkerInfo * [newSize];
00252
00253 if (markerInfoSize)
00254 {
00255 memcpy(newArray, markerInfo, sizeof(MarkerInfo *) * markerInfoSize);
00256 delete [] markerInfo;
00257 }
00258
00259 markerInfo = newArray;
00260 markerInfoSize = newSize;
00261 }
00262
00263 void PedigreeGlobals::FlagMissingMarkers(IntArray & missingMarkers)
00264 {
00265 int skipped_markers = 0;
00266
00267 if (missingMarkers.Length())
00268 {
00269 StringArray names;
00270
00271 printf("These markers couldn't be placed and won't be analysed:");
00272
00273 for (int i = 0; i < missingMarkers.Length(); i++)
00274 names.Push(GetMarkerInfo(missingMarkers[i])->name);
00275 names.Sort();
00276
00277 for (int i = 0, line = 80, lines = 0; i < missingMarkers.Length(); i++)
00278 {
00279 if (line + names[i].Length() + 1 > 79)
00280 printf("\n "), line = 3, lines++;
00281
00282 if (lines < 5)
00283 {
00284 printf("%s ", (const char *) names[i]);
00285 line += names[i].Length() + 1;
00286 }
00287 else
00288 skipped_markers++;
00289 }
00290
00291 if (skipped_markers)
00292 printf("as well as %d other unlisted markers...", skipped_markers);
00293
00294 printf("\n\n");
00295 }
00296 }
00297
00298 void PedigreeGlobals::GetOrderedMarkers(IntArray & markers)
00299 {
00300 if (markers.Length() == 0)
00301 {
00302 markers.Dimension(markerCount);
00303 markers.SetSequence(0, 1);
00304 }
00305
00306 MarkerInfo ** subset = new MarkerInfo * [markers.Length()];
00307
00308 int count = 0;
00309 IntArray missingMarkers;
00310
00311 for (int i = 0; i < markers.Length(); i++)
00312 {
00313 MarkerInfo * info = GetMarkerInfo(markers[i]);
00314
00315 if (info->chromosome != -1)
00316 subset[count++] = info;
00317 else
00318 missingMarkers.Push(i);
00319 }
00320
00321 FlagMissingMarkers(missingMarkers);
00322
00323 QuickSort(subset, count, sizeof(MarkerInfo *),
00324 COMPAREFUNC MarkerInfo::ComparePosition);
00325
00326 markers.Clear();
00327 for (int i = 0; i < count; i++)
00328 markers.Push(GetMarkerID(subset[i]->name));
00329 }
00330
00331 int PedigreeGlobals::SortMarkersInMapOrder(IntArray & markers, int chromosome)
00332 {
00333 if (markers.Length() == 0)
00334 {
00335 markers.Dimension(markerCount);
00336 markers.SetSequence(0, 1);
00337 }
00338
00339 MarkerInfo ** subset = new MarkerInfo * [markers.Length()];
00340
00341 int count = 0;
00342 IntArray missingMarkers;
00343
00344 for (int i = 0; i < markers.Length(); i++)
00345 {
00346 MarkerInfo * info = GetMarkerInfo(markers[i]);
00347
00348 if (info->chromosome != -1)
00349 subset[count++] = info;
00350 else if (chromosome == -1)
00351 missingMarkers.Push(i);
00352 }
00353
00354 if (chromosome == -1)
00355 FlagMissingMarkers(missingMarkers);
00356
00357 QuickSort(subset, count, sizeof(MarkerInfo *),
00358 COMPAREFUNC MarkerInfo::ComparePosition);
00359
00360 markers.Clear();
00361
00362 int current_chromosome = -1, next_chromosome = 0;
00363
00364 for (int i = 0; i < count; i++)
00365 if (subset[i]->chromosome < chromosome)
00366 continue;
00367 else if (current_chromosome == -1 ||
00368 subset[i]->chromosome == current_chromosome)
00369 {
00370 markers.Push(GetMarkerID(subset[i]->name));
00371 current_chromosome = subset[i]->chromosome;
00372 }
00373 else if (!next_chromosome)
00374 {
00375 next_chromosome = subset[i]->chromosome;
00376 break;
00377 }
00378
00379 delete [] subset;
00380
00381 return next_chromosome;
00382 }
00383
00384 void PedigreeGlobals::VerifySexSpecificOrder()
00385 {
00386 if (markerCount <= 1)
00387 return;
00388
00389 MarkerInfo ** sortedMarkers = new MarkerInfo * [markerCount];
00390
00391 for (int i = 0; i < markerCount; i++)
00392 sortedMarkers[i] = GetMarkerInfo(i);
00393
00394 QuickSort(sortedMarkers, markerCount, sizeof(MarkerInfo *),
00395 COMPAREFUNC MarkerInfo::ComparePosition);
00396
00397 double prev_female = sortedMarkers[0]->positionFemale;
00398 double prev_male = sortedMarkers[0]->positionMale;
00399 double curr_female, curr_male;
00400
00401 int prev_chromosome = sortedMarkers[0]->chromosome;
00402 int curr_chromosome;
00403
00404 for (int i = 1; i < markerCount; i++)
00405 {
00406 curr_chromosome = sortedMarkers[i]->chromosome;
00407 curr_female = sortedMarkers[i]->positionFemale;
00408 curr_male = sortedMarkers[i]->positionMale;
00409
00410 if (curr_chromosome == prev_chromosome &&
00411 (curr_female < prev_female || curr_male < prev_male))
00412 error("Sex-specific and sex-averaged maps are inconsistent.\n\n"
00413 "In the sex-averaged map, marker %s (%.2f cM) follows marker %s (%.2f cM).\n"
00414 "In the %smale map, marker %s (%.2f cM) PRECEDES marker %s (%.2f cM).\n",
00415 (const char *) sortedMarkers[i]->name,
00416 sortedMarkers[i]->position * 100,
00417 (const char *) sortedMarkers[i-1]->name,
00418 sortedMarkers[i-1]->position * 100,
00419 curr_female < prev_female ? "fe" : "",
00420 (const char *) sortedMarkers[i]->name,
00421 (curr_female < prev_female ? curr_female : curr_male) * 100,
00422 (const char *) sortedMarkers[i-1]->name,
00423 (curr_female < prev_female ? prev_female : prev_male) * 100);
00424
00425 prev_chromosome = curr_chromosome;
00426 prev_female = curr_female;
00427 prev_male = curr_male;
00428 }
00429
00430 delete [] sortedMarkers;
00431 }
00432
00433 void PedigreeGlobals::LoadAlleleFrequencies(const char * filename, bool required)
00434 {
00435
00436
00437 if (filename[0] == 0)
00438 {
00439 if (required)
00440 error("No name provided for required allele freuquency file\n");
00441 else
00442 return;
00443 }
00444
00445
00446
00447 IFILE f = ifopen(filename, "rb");
00448
00449 if (f == NULL)
00450 {
00451 if (required)
00452 error("Failed to open required allele frequency file '%s'",
00453 (const char *) filename);
00454 else
00455 return;
00456 }
00457
00458 LoadAlleleFrequencies(f);
00459 ifclose(f);
00460 }
00461
00462 void PedigreeGlobals::LoadAlleleFrequencies(IFILE & input)
00463 {
00464 int done = 0;
00465 String buffer;
00466 StringArray tokens;
00467 MarkerInfo *info = NULL;
00468
00469 bool need_blank_line = false;
00470 int allele_size, old_max, next_allele = 0;
00471
00472 while (!ifeof(input) && !done)
00473 {
00474 int i, j;
00475
00476 buffer.ReadLine(input);
00477
00478 tokens.Clear();
00479 tokens.AddTokens(buffer, WHITESPACE);
00480
00481 if (tokens.Length() < 1) continue;
00482
00483 switch (toupper(tokens[0][0]))
00484 {
00485 case 'M' :
00486 if (tokens.Length() == 1)
00487 error("Unnamed marker in allele frequency file");
00488 if (info != NULL)
00489 need_blank_line |= info->AdjustFrequencies();
00490 info = GetMarkerInfo(tokens[1]);
00491 info->freq.Clear();
00492 info->freq.Push(0.0);
00493 next_allele = 1;
00494 break;
00495 case 'F' :
00496 if (info != NULL)
00497 for (i = 1; i < tokens.Length(); i++)
00498 {
00499 buffer = next_allele++;
00500
00501 int allele = LoadAllele(info, buffer);
00502
00503 if (allele >= info->freq.Length())
00504 {
00505 old_max = info->freq.Length();
00506 info->freq.Dimension(allele + 1);
00507 for (j = old_max; j < allele; j++)
00508 info->freq[j] = 0.0;
00509 }
00510
00511 info->freq[allele] = tokens[i].AsDouble();
00512 }
00513 break;
00514 case 'A' :
00515 if (info == NULL) continue;
00516
00517 if (tokens.Length() != 3)
00518 error("Error reading named allele frequencies for locus %s\n"
00519 "Lines with named alleles should have the format\n"
00520 " A allele_label allele_frequency\n\n"
00521 "But the following line was read:\n%s\n",
00522 (const char *) info->name, (const char *) buffer);
00523
00524 allele_size = LoadAllele(info, tokens[1]);
00525 next_allele = atoi(tokens[1]) + 1;
00526
00527 if (allele_size < 1)
00528 error("Error reading named allele frequencies for locus %s\n"
00529 "An invalid allele label was encountered\n",
00530 (const char *) info->name);
00531
00532 if (allele_size >= info->freq.Length())
00533 {
00534 old_max = info->freq.Length();
00535 info->freq.Dimension(allele_size + 1);
00536 for (i = old_max; i < allele_size; i++)
00537 info->freq[i] = 0.0;
00538 }
00539
00540 info->freq[allele_size] = tokens[2];
00541 break;
00542 case 'E' :
00543 done = 1;
00544 break;
00545 default :
00546 error("Problem in allele frequency file.\n"
00547 "Lines in this file should be of two types:\n"
00548 " -- Marker name lines begin with an M\n"
00549 " -- Frequency lines begin with an F\n\n"
00550 "However the following line is different:\n%s\n",
00551 (const char *) buffer);
00552 }
00553 }
00554
00555 if (info != NULL)
00556 need_blank_line |= info->AdjustFrequencies();
00557
00558 if (need_blank_line) printf("\n");
00559 }
00560
00561 void PedigreeGlobals::LoadMarkerMap(const char * filename, bool filter)
00562 {
00563 IFILE f = ifopen(filename, "rb");
00564 if (f == NULL) return;
00565 LoadMarkerMap(f, filter);
00566 ifclose(f);
00567 }
00568
00569 void PedigreeGlobals::LoadMarkerMap(IFILE & input, bool filter)
00570 {
00571 String buffer;
00572 StringArray tokens;
00573 bool first_pass = true;
00574
00575 while (!ifeof(input))
00576 {
00577 buffer.ReadLine(input);
00578
00579 tokens.Clear();
00580 tokens.AddTokens(buffer, WHITESPACE);
00581
00582 if (tokens.Length() < 1) continue;
00583
00584 if (first_pass)
00585 {
00586 sexSpecificMap = (tokens.Length() == 5);
00587
00588
00589
00590
00591 first_pass = false;
00592 }
00593
00594 if (tokens.Length() != 3 && !sexSpecificMap)
00595 error("Error reading map file\n"
00596 "Each line in this file should include 3 fields:\n"
00597 "CHROMOSOME, MARKER_NAME, and POSITION\n"
00598 "However the following line has %d fields\n%s\n",
00599 tokens.Length(), (const char *) buffer);
00600
00601 if (tokens.Length() != 5 && sexSpecificMap)
00602 error("Error reading map file\n"
00603 "Each line in this file should include 5 fields:\n\n"
00604 "CHROMOSOME, MARKER_NAME, SEX_AVERAGED_POS, FEMALE_POS AND MALE_POS\n\n"
00605 "However the following line has %d fields\n%s\n",
00606 tokens.Length(), (const char *) buffer);
00607
00608 bool previous_state = String::caseSensitive;
00609 String::caseSensitive = false;
00610
00611 if ((tokens[0] == "CHR" || tokens[0] == "CHROMOSOME") &&
00612 (tokens[1] == "MARKER" || tokens[1] == "MARKER_NAME" || tokens[1] == "MRK") &&
00613 (tokens[2] == "KOSAMBI" || tokens[2] == "POS" || tokens[2] == "POSITION" ||
00614 tokens[2] == "SEX_AVERAGED_POS" || tokens[2] == "CM" || tokens[2] == "HALDANE"))
00615 continue;
00616
00617 String::caseSensitive = previous_state;
00618
00619 if (filter)
00620 if (LookupMarker(tokens[1]) < 0)
00621 continue;
00622
00623 MarkerInfo * info = GetMarkerInfo(tokens[1]);
00624
00625 int chr = (tokens[0][0] == 'x' || tokens[0][0] == 'X') ? 999 : (int) tokens[0];
00626
00627 info->chromosome = chr;
00628 info->position = (double) tokens[2] * 0.01;
00629
00630 if (sexSpecificMap)
00631 {
00632 char * flag;
00633
00634 double female = strtod(tokens[3], &flag);
00635 if (*flag)
00636 error("In the map file, the female cM position for marker\n"
00637 "%s is %s. This is not a valid number.",
00638 (const char *) tokens[1], (const char *) tokens[3]);
00639
00640 double male = strtod(tokens[4], &flag);
00641 if (*flag)
00642 error("In the map file, the male cM position for marker\n"
00643 "%s is %s. This is not a valid number.",
00644 (const char *) tokens[1], (const char *) tokens[4]);
00645
00646 info->positionFemale = (double) female * 0.01;
00647 info->positionMale = (double) male * 0.01;
00648 }
00649 else
00650 info->positionFemale = info->positionMale = info->position;
00651 }
00652
00653 if (sexSpecificMap) VerifySexSpecificOrder();
00654 }
00655
00656 void PedigreeGlobals::LoadBasepairMap(const char * filename)
00657 {
00658 IFILE f = ifopen(filename, "rb");
00659 if (f == NULL)
00660 error("The map file [%s] could not be opened\n\n"
00661 "Please check that the filename is correct and that the file is\n"
00662 "not being used by another program", filename);
00663 LoadBasepairMap(f);
00664 ifclose(f);
00665 }
00666
00667 void PedigreeGlobals::LoadBasepairMap(IFILE & input)
00668 {
00669 String buffer;
00670 StringArray tokens;
00671
00672 sexSpecificMap = false;
00673
00674 while (!ifeof(input))
00675 {
00676 buffer.ReadLine(input);
00677
00678 tokens.Clear();
00679 tokens.AddTokens(buffer, WHITESPACE);
00680
00681 if (tokens.Length() < 1) continue;
00682
00683 if (tokens.Length() != 3)
00684 error("Error reading map file\n"
00685 "Each line in this file should include 3 fields:\n"
00686 "CHROMOSOME, MARKER_NAME, and POSITION\n"
00687 "However the following line has %d fields\n%s\n",
00688 tokens.Length(), (const char *) buffer);
00689
00690 bool previous_state = String::caseSensitive;
00691 String::caseSensitive = false;
00692
00693 if ((tokens[0] == "CHR" || tokens[0] == "CHROMOSOME") &&
00694 (tokens[1] == "MARKER" || tokens[1] == "MARKER_NAME" || tokens[1] == "MRK") &&
00695 (tokens[2] == "BASEPAIR" || tokens[2] == "POS" || tokens[2] == "POSITION"))
00696 continue;
00697
00698 String::caseSensitive = previous_state;
00699
00700 MarkerInfo * info = GetMarkerInfo(tokens[1]);
00701
00702 int chr = (tokens[0][0] == 'x' || tokens[0][0] == 'X') ? 999 : (int) tokens[0];
00703
00704 info->chromosome = chr;
00705 info->position = (double) tokens[2];
00706 }
00707 }
00708
00709 int PedigreeGlobals::instanceCount = 0;
00710
00711 PedigreeGlobals::~PedigreeGlobals()
00712 {
00713 if (--instanceCount == 0 && markerInfoSize)
00714 {
00715 for (int i = 0; i < markerInfoCount; i++)
00716 delete markerInfo[i];
00717 delete [] markerInfo;
00718 delete [] markerInfoByInteger;
00719 }
00720 }
00721
00722 void PedigreeGlobals::WriteMapFile(const char * filename)
00723 {
00724 if (!MarkerPositionsAvailable())
00725 return;
00726
00727 FILE * output = fopen(filename, "wt");
00728
00729 if (output == NULL)
00730 error("Creating map file \"%s\"", filename);
00731
00732 WriteMapFile(output);
00733 fclose(output);
00734 }
00735
00736 void PedigreeGlobals::WriteMapFile(FILE * output)
00737 {
00738 if (!sexSpecificMap)
00739 fprintf(output, "CHR MARKER POS\n");
00740 else
00741 fprintf(output, "CHR MARKER POS POSF POSM\n");
00742
00743 for (int i = 0; i < markerInfoCount; i++)
00744 {
00745 if (markerInfo[i]->chromosome != -1)
00746 {
00747 if (!sexSpecificMap)
00748 fprintf(output, "%3d %-10s %g\n",
00749 markerInfo[i]->chromosome,
00750 (const char *) markerInfo[i]->name,
00751 markerInfo[i]->position * 100.0);
00752 else
00753 fprintf(output, "%3d %-10s %g %g %g\n",
00754 markerInfo[i]->chromosome,
00755 (const char *) markerInfo[i]->name,
00756 markerInfo[i]->position * 100.0,
00757 markerInfo[i]->positionFemale * 100.0,
00758 markerInfo[i]->positionMale * 100.0);
00759 }
00760 }
00761 }
00762
00763 void PedigreeGlobals::WriteFreqFile(const char * filename, bool old_format)
00764 {
00765 FILE * output = fopen(filename, "wt");
00766
00767 if (output == NULL)
00768 error("Creating allele frequency file \"%s\"", filename);
00769
00770 WriteFreqFile(output, old_format);
00771 fclose(output);
00772 }
00773
00774 void PedigreeGlobals::WriteFreqFile(FILE * output, bool old_format)
00775 {
00776 for (int i = 0; i < markerInfoCount; i++)
00777 {
00778 MarkerInfo * info = markerInfo[i];
00779
00780 if (info->freq.Length() == 0) continue;
00781
00782 fprintf(output, "M %s\n", (const char *) info->name);
00783
00784 if (old_format && info->alleleLabels.Length() == 0)
00785 for (int j = 1; j < info->freq.Length(); j++)
00786 fprintf(output, "%s%.5f%s",
00787 j % 7 == 1 ? "F " : "", info->freq[j],
00788 j == info->freq.Length() - 1 ? "\n" : j % 7 == 0 ? "\n" : " ");
00789 else
00790 for (int j = 1; j < info->freq.Length(); j++)
00791 if (info->freq[j] > 1e-7)
00792 fprintf(output, "A %5s %.5f\n",
00793 (const char *) info->GetAlleleLabel(j), info->freq[j]);
00794 }
00795 }
00796
00797 bool PedigreeGlobals::MarkerPositionsAvailable()
00798 {
00799 for (int i = 0; i < markerInfoCount; i++)
00800 if (markerInfo[i]->chromosome != -1)
00801 return true;
00802
00803 return false;
00804 }
00805
00806 bool PedigreeGlobals::AlleleFrequenciesAvailable()
00807 {
00808 for (int i = 0; i < markerInfoCount; i++)
00809 if (markerInfo[i]->freq.Length() > 1)
00810 return true;
00811
00812 return false;
00813 }
00814
00815 int PedigreeGlobals::LoadAllele(int marker, String & token)
00816 {
00817 return LoadAllele(GetMarkerInfo(marker), token);
00818 }
00819
00820 int PedigreeGlobals::LoadAllele(MarkerInfo * info, String & token)
00821 {
00822 int allele = info->GetAlleleNumber(token);
00823
00824 if (allele >= 0) return allele;
00825
00826 static unsigned char lookup[128];
00827 static bool init = false;
00828
00829 if (!init)
00830 {
00831 init = true;
00832
00833 for (int i = 0; i < 128; i++)
00834 lookup[i] = 0;
00835
00836 for (int i = '1'; i <= '9'; i++)
00837 lookup[i] = 1;
00838
00839 lookup[int('a')] = lookup[int('A')] = lookup[int('c')] = lookup[int('C')] = 2;
00840 lookup[int('g')] = lookup[int('G')] = lookup[int('t')] = lookup[int('T')] = 2;
00841 }
00842
00843 int first = token[0];
00844 bool goodstart = first > 0 && first < 128;
00845
00846 if (token.Length() == 1 && goodstart && lookup[int(token[0])])
00847 return info->NewAllele(token);
00848
00849 if (!goodstart || lookup[int(token[0])] != 1)
00850 return 0;
00851
00852 int integer = token.AsInteger();
00853 token = integer;
00854
00855 allele = info->GetAlleleNumber(token);
00856
00857 if (allele > 0)
00858 return allele;
00859
00860 if (integer <= 0) return 0;
00861
00862 if (integer > 1000000)
00863 {
00864 static bool warn_user = true;
00865
00866 if (warn_user)
00867 {
00868 printf("Some allele numbers for marker %s are > 1000000\n"
00869 "All allele numbers >1000000 will be treated as missing\n\n",
00870 (const char *) info->name);
00871 warn_user = false;
00872 }
00873
00874 return 0;
00875 }
00876
00877 return info->NewAllele(token);
00878 }
00879
00880 std::ostream &operator << (std::ostream &stream, MarkerInfo &m)
00881 {
00882 stream << "MarkerInfo for marker " << m.name << std::endl;
00883 stream << " located on chromsome " << m.chromosome << ":" << (int64_t)(100 * m.position) << std::endl;
00884 stream << " allele count = " << m.freq.Length() << std::endl;
00885 stream << " label count = " << m.alleleLabels.Length() << std::endl;
00886 if (m.freq.Length() == m.alleleLabels.Length())
00887 {
00888 for (int i=0; i<m.freq.Length(); i++)
00889 {
00890 stream << " " << m.alleleLabels[i] << ":" << m.freq[i];
00891 }
00892 stream << std::endl;
00893 }
00894 else
00895 {
00896 stream << " output truncated - counts appear corrupt." << std::endl;
00897 }
00898 return stream;
00899 }
00900