libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include "PedigreeGlobals.h" 00019 #include "Sort.h" 00020 #include "Error.h" 00021 00022 #include <math.h> 00023 #include <string.h> 00024 #include <ctype.h> 00025 00026 int PedigreeGlobals::traitCount = 0; 00027 int PedigreeGlobals::affectionCount = 0; 00028 int PedigreeGlobals::covariateCount = 0; 00029 int PedigreeGlobals::markerCount = 0; 00030 int PedigreeGlobals::stringCount = 0; 00031 00032 // If this value isn't set, all X chromosome data will be rejected 00033 bool PedigreeGlobals::chromosomeX = false; 00034 bool PedigreeGlobals::sexSpecificMap = false; 00035 00036 StringArray PedigreeGlobals::traitNames; 00037 StringArray PedigreeGlobals::markerNames; 00038 StringArray PedigreeGlobals::covariateNames; 00039 StringArray PedigreeGlobals::affectionNames; 00040 StringArray PedigreeGlobals::stringNames; 00041 StringIntHash PedigreeGlobals::markerLookup; 00042 StringIntHash PedigreeGlobals::traitLookup; 00043 StringIntHash PedigreeGlobals::affectionLookup; 00044 StringIntHash PedigreeGlobals::covariateLookup; 00045 StringIntHash PedigreeGlobals::stringLookup; 00046 00047 int PedigreeGlobals::markerInfoCount = 0; 00048 int PedigreeGlobals::markerInfoSize = 0; 00049 00050 MarkerInfo ** PedigreeGlobals::markerInfo = NULL; 00051 MarkerInfo ** PedigreeGlobals::markerInfoByInteger = NULL; 00052 StringHash PedigreeGlobals::markerInfoByName; 00053 00054 int MarkerInfo::count = 0; 00055 00056 int MarkerInfo::ComparePosition(MarkerInfo ** left, MarkerInfo ** right) 00057 { 00058 if ((*left)->chromosome != (*right)->chromosome) 00059 return (*left)->chromosome - (*right)->chromosome; 00060 00061 double difference = (*left)->position - (*right)->position; 00062 00063 if (difference > 0.0) 00064 return 1; 00065 else if (difference == 0.0) 00066 return (*left)->serial - (*right)->serial; 00067 else 00068 return -1; 00069 } 00070 00071 String MarkerInfo::GetAlleleLabel(int allele) 00072 { 00073 if (alleleLabels.Length() > allele && alleleLabels[allele].Length()) 00074 return alleleLabels[allele]; 00075 else if (alleleLabels.Length() <= allele) 00076 alleleLabels.Dimension(allele + 1); 00077 return alleleLabels[allele] = allele; 00078 } 00079 00080 bool MarkerInfo::AdjustFrequencies() 00081 { 00082 if (freq.Length() <= 1) 00083 { 00084 freq.Clear(); 00085 return false; 00086 } 00087 00088 if (freq.Min() < 0.0) 00089 error("Locus %s has negative allele frequencies\n", (const char *) name); 00090 00091 double sum = freq.Sum(); 00092 00093 if (sum <= 0.0) 00094 error("Locus %s frequencies sum to %f, which doesn't make sense\n", 00095 (const char *) name, sum); 00096 00097 if (sum != 1.0) 00098 freq *= 1.0 / sum; 00099 00100 if (fabs(sum - 1.0) > 1.2e-5) 00101 { 00102 printf("Locus %s frequencies sum to %f, adjusted to 1.0\n", 00103 (const char *) name, sum); 00104 return true; 00105 } 00106 00107 return false; 00108 } 00109 00110 void MarkerInfo::IndexAlleles() 00111 { 00112 if (alleleLabels.Length() >= 255) 00113 error("Marker %s has more than 254 distinct alleles\n", 00114 (const char *) name); 00115 00116 alleleNumbers.Clear(); 00117 00118 for (int i = 1; i < alleleLabels.Length(); i++) 00119 alleleNumbers.SetInteger(alleleLabels[i], i); 00120 } 00121 00122 int MarkerInfo::NewAllele(const String & label) 00123 { 00124 if (alleleLabels.Length() == 0) 00125 alleleLabels.Push(""); 00126 00127 if (alleleLabels.Length() >= 255) 00128 error("Marker %s has more than 254 distinct alleles\n", 00129 (const char *) name); 00130 00131 alleleNumbers.SetInteger(label, alleleLabels.Length()); 00132 alleleLabels.Push(label); 00133 00134 return alleleLabels.Length() - 1; 00135 } 00136 00137 int PedigreeGlobals::GetTraitID(const char * name) 00138 { 00139 int idx = traitLookup.Integer(name); 00140 00141 if (idx != -1) return idx; 00142 00143 traitNames.Add(name); 00144 traitLookup.SetInteger(name, traitCount); 00145 return traitCount++; 00146 } 00147 00148 int PedigreeGlobals::GetAffectionID(const char * name) 00149 { 00150 int idx = affectionLookup.Integer(name); 00151 00152 if (idx != -1) return idx; 00153 00154 affectionNames.Add(name); 00155 affectionLookup.SetInteger(name, affectionCount); 00156 return affectionCount++; 00157 } 00158 00159 int PedigreeGlobals::GetCovariateID(const char * name) 00160 { 00161 int idx = covariateLookup.Integer(name); 00162 00163 if (idx != -1) return idx; 00164 00165 covariateNames.Add(name); 00166 covariateLookup.SetInteger(name, covariateCount); 00167 return covariateCount++; 00168 } 00169 00170 int PedigreeGlobals::GetStringID(const char * name) 00171 { 00172 int idx = stringLookup.Integer(name); 00173 00174 if (idx != -1) return idx; 00175 00176 stringNames.Add(name); 00177 stringLookup.SetInteger(name, stringCount); 00178 return stringCount++; 00179 } 00180 00181 int PedigreeGlobals::GetMarkerID(const char * name) 00182 { 00183 int idx = markerLookup.Integer(name); 00184 00185 if (idx != -1) return idx; 00186 00187 markerNames.Add(name); 00188 markerLookup.SetInteger(name, markerCount); 00189 00190 // Grow the marker info key ... 00191 if (markerCount == 0) 00192 { 00193 markerInfoByInteger = new MarkerInfo * [16]; 00194 00195 for (int i = 0; i < 16; i++) 00196 markerInfoByInteger[i] = NULL; 00197 } 00198 else if ((markerCount & (markerCount - 1)) == 0 && markerCount > 15) 00199 { 00200 MarkerInfo ** newKey = new MarkerInfo * [markerCount * 2]; 00201 00202 for (int i = 0; i < markerCount; i++) 00203 newKey[i] = markerInfoByInteger[i]; 00204 00205 for (int i = markerCount; i < markerCount * 2; i++) 00206 newKey[i] = NULL; 00207 00208 delete [] markerInfoByInteger; 00209 00210 markerInfoByInteger = newKey; 00211 } 00212 00213 return markerCount++; 00214 } 00215 00216 MarkerInfo * PedigreeGlobals::GetMarkerInfo(String & name) 00217 { 00218 MarkerInfo * info = (MarkerInfo *) markerInfoByName.Object(name); 00219 00220 if (info != NULL) return info; 00221 00222 info = new MarkerInfo(name); 00223 markerInfoByName.Add(name, info); 00224 00225 if (markerInfoCount >= markerInfoSize) 00226 GrowMarkerInfo(); 00227 00228 markerInfo[markerInfoCount++] = info; 00229 00230 int markerId = LookupMarker(name); 00231 if (markerId >= 0) markerInfoByInteger[markerId] = info; 00232 00233 return info; 00234 } 00235 00236 MarkerInfo * PedigreeGlobals::GetMarkerInfo(int markerId) 00237 { 00238 if (markerId >= markerCount) 00239 error("Attempted to retrieve MarkerInfo using out-of-bounds index\n"); 00240 00241 if (markerInfoByInteger[markerId] != NULL) 00242 return markerInfoByInteger[markerId]; 00243 else 00244 return GetMarkerInfo(markerNames[markerId]); 00245 } 00246 00247 void PedigreeGlobals::GrowMarkerInfo() 00248 { 00249 int newSize = markerInfoSize ? 2 * markerInfoSize : 32; 00250 00251 MarkerInfo ** newArray = new MarkerInfo * [newSize]; 00252 00253 if (markerInfoSize) 00254 { 00255 memcpy(newArray, markerInfo, sizeof(MarkerInfo *) * markerInfoSize); 00256 delete [] markerInfo; 00257 } 00258 00259 markerInfo = newArray; 00260 markerInfoSize = newSize; 00261 } 00262 00263 void PedigreeGlobals::FlagMissingMarkers(IntArray & missingMarkers) 00264 { 00265 int skipped_markers = 0; 00266 00267 if (missingMarkers.Length()) 00268 { 00269 StringArray names; 00270 00271 printf("These markers couldn't be placed and won't be analysed:"); 00272 00273 for (int i = 0; i < missingMarkers.Length(); i++) 00274 names.Push(GetMarkerInfo(missingMarkers[i])->name); 00275 names.Sort(); 00276 00277 for (int i = 0, line = 80, lines = 0; i < missingMarkers.Length(); i++) 00278 { 00279 if (line + names[i].Length() + 1 > 79) 00280 printf("\n "), line = 3, lines++; 00281 00282 if (lines < 5) 00283 { 00284 printf("%s ", (const char *) names[i]); 00285 line += names[i].Length() + 1; 00286 } 00287 else 00288 skipped_markers++; 00289 } 00290 00291 if (skipped_markers) 00292 printf("as well as %d other unlisted markers...", skipped_markers); 00293 00294 printf("\n\n"); 00295 } 00296 } 00297 00298 void PedigreeGlobals::GetOrderedMarkers(IntArray & markers) 00299 { 00300 if (markers.Length() == 0) 00301 { 00302 markers.Dimension(markerCount); 00303 markers.SetSequence(0, 1); 00304 } 00305 00306 MarkerInfo ** subset = new MarkerInfo * [markers.Length()]; 00307 00308 int count = 0; 00309 IntArray missingMarkers; 00310 00311 for (int i = 0; i < markers.Length(); i++) 00312 { 00313 MarkerInfo * info = GetMarkerInfo(markers[i]); 00314 00315 if (info->chromosome != -1) 00316 subset[count++] = info; 00317 else 00318 missingMarkers.Push(i); 00319 } 00320 00321 FlagMissingMarkers(missingMarkers); 00322 00323 QuickSort(subset, count, sizeof(MarkerInfo *), 00324 COMPAREFUNC MarkerInfo::ComparePosition); 00325 00326 markers.Clear(); 00327 for (int i = 0; i < count; i++) 00328 markers.Push(GetMarkerID(subset[i]->name)); 00329 } 00330 00331 int PedigreeGlobals::SortMarkersInMapOrder(IntArray & markers, int chromosome) 00332 { 00333 if (markers.Length() == 0) 00334 { 00335 markers.Dimension(markerCount); 00336 markers.SetSequence(0, 1); 00337 } 00338 00339 MarkerInfo ** subset = new MarkerInfo * [markers.Length()]; 00340 00341 int count = 0; 00342 IntArray missingMarkers; 00343 00344 for (int i = 0; i < markers.Length(); i++) 00345 { 00346 MarkerInfo * info = GetMarkerInfo(markers[i]); 00347 00348 if (info->chromosome != -1) 00349 subset[count++] = info; 00350 else if (chromosome == -1) 00351 missingMarkers.Push(i); 00352 } 00353 00354 if (chromosome == -1) 00355 FlagMissingMarkers(missingMarkers); 00356 00357 QuickSort(subset, count, sizeof(MarkerInfo *), 00358 COMPAREFUNC MarkerInfo::ComparePosition); 00359 00360 markers.Clear(); 00361 00362 int current_chromosome = -1, next_chromosome = 0; 00363 00364 for (int i = 0; i < count; i++) 00365 if (subset[i]->chromosome < chromosome) 00366 continue; 00367 else if (current_chromosome == -1 || 00368 subset[i]->chromosome == current_chromosome) 00369 { 00370 markers.Push(GetMarkerID(subset[i]->name)); 00371 current_chromosome = subset[i]->chromosome; 00372 } 00373 else if (!next_chromosome) 00374 { 00375 next_chromosome = subset[i]->chromosome; 00376 break; 00377 } 00378 00379 delete [] subset; 00380 00381 return next_chromosome; 00382 } 00383 00384 void PedigreeGlobals::VerifySexSpecificOrder() 00385 { 00386 if (markerCount <= 1) 00387 return; 00388 00389 MarkerInfo ** sortedMarkers = new MarkerInfo * [markerCount]; 00390 00391 for (int i = 0; i < markerCount; i++) 00392 sortedMarkers[i] = GetMarkerInfo(i); 00393 00394 QuickSort(sortedMarkers, markerCount, sizeof(MarkerInfo *), 00395 COMPAREFUNC MarkerInfo::ComparePosition); 00396 00397 double prev_female = sortedMarkers[0]->positionFemale; 00398 double prev_male = sortedMarkers[0]->positionMale; 00399 double curr_female, curr_male; 00400 00401 int prev_chromosome = sortedMarkers[0]->chromosome; 00402 int curr_chromosome; 00403 00404 for (int i = 1; i < markerCount; i++) 00405 { 00406 curr_chromosome = sortedMarkers[i]->chromosome; 00407 curr_female = sortedMarkers[i]->positionFemale; 00408 curr_male = sortedMarkers[i]->positionMale; 00409 00410 if (curr_chromosome == prev_chromosome && 00411 (curr_female < prev_female || curr_male < prev_male)) 00412 error("Sex-specific and sex-averaged maps are inconsistent.\n\n" 00413 "In the sex-averaged map, marker %s (%.2f cM) follows marker %s (%.2f cM).\n" 00414 "In the %smale map, marker %s (%.2f cM) PRECEDES marker %s (%.2f cM).\n", 00415 (const char *) sortedMarkers[i]->name, 00416 sortedMarkers[i]->position * 100, 00417 (const char *) sortedMarkers[i-1]->name, 00418 sortedMarkers[i-1]->position * 100, 00419 curr_female < prev_female ? "fe" : "", 00420 (const char *) sortedMarkers[i]->name, 00421 (curr_female < prev_female ? curr_female : curr_male) * 100, 00422 (const char *) sortedMarkers[i-1]->name, 00423 (curr_female < prev_female ? prev_female : prev_male) * 100); 00424 00425 prev_chromosome = curr_chromosome; 00426 prev_female = curr_female; 00427 prev_male = curr_male; 00428 } 00429 00430 delete [] sortedMarkers; 00431 } 00432 00433 void PedigreeGlobals::LoadAlleleFrequencies(const char * filename, bool required) 00434 { 00435 // This function is often called with an empty string, and not 00436 // all implementations of the C library like that ... 00437 if (filename[0] == 0) 00438 { 00439 if (required) 00440 error("No name provided for required allele freuquency file\n"); 00441 else 00442 return; 00443 } 00444 00445 // If we get here, the filename is not empty and things should 00446 // work as planned 00447 IFILE f = ifopen(filename, "rb"); 00448 00449 if (f == NULL) 00450 { 00451 if (required) 00452 error("Failed to open required allele frequency file '%s'", 00453 (const char *) filename); 00454 else 00455 return; 00456 } 00457 00458 LoadAlleleFrequencies(f); 00459 ifclose(f); 00460 } 00461 00462 void PedigreeGlobals::LoadAlleleFrequencies(IFILE & input) 00463 { 00464 int done = 0; 00465 String buffer; 00466 StringArray tokens; 00467 MarkerInfo *info = NULL; 00468 00469 bool need_blank_line = false; 00470 int allele_size, old_max, next_allele = 0; // Initialization avoids compiler warning 00471 00472 while (!ifeof(input) && !done) 00473 { 00474 int i, j; 00475 00476 buffer.ReadLine(input); 00477 00478 tokens.Clear(); 00479 tokens.AddTokens(buffer, WHITESPACE); 00480 00481 if (tokens.Length() < 1) continue; 00482 00483 switch (toupper(tokens[0][0])) 00484 { 00485 case 'M' : 00486 if (tokens.Length() == 1) 00487 error("Unnamed marker in allele frequency file"); 00488 if (info != NULL) 00489 need_blank_line |= info->AdjustFrequencies(); 00490 info = GetMarkerInfo(tokens[1]); 00491 info->freq.Clear(); 00492 info->freq.Push(0.0); 00493 next_allele = 1; 00494 break; 00495 case 'F' : 00496 if (info != NULL) 00497 for (i = 1; i < tokens.Length(); i++) 00498 { 00499 buffer = next_allele++; 00500 00501 int allele = LoadAllele(info, buffer); 00502 00503 if (allele >= info->freq.Length()) 00504 { 00505 old_max = info->freq.Length(); 00506 info->freq.Dimension(allele + 1); 00507 for (j = old_max; j < allele; j++) 00508 info->freq[j] = 0.0; 00509 } 00510 00511 info->freq[allele] = tokens[i].AsDouble(); 00512 } 00513 break; 00514 case 'A' : 00515 if (info == NULL) continue; 00516 00517 if (tokens.Length() != 3) 00518 error("Error reading named allele frequencies for locus %s\n" 00519 "Lines with named alleles should have the format\n" 00520 " A allele_label allele_frequency\n\n" 00521 "But the following line was read:\n%s\n", 00522 (const char *) info->name, (const char *) buffer); 00523 00524 allele_size = LoadAllele(info, tokens[1]); 00525 next_allele = atoi(tokens[1]) + 1; 00526 00527 if (allele_size < 1) 00528 error("Error reading named allele frequencies for locus %s\n" 00529 "An invalid allele label was encountered\n", 00530 (const char *) info->name); 00531 00532 if (allele_size >= info->freq.Length()) 00533 { 00534 old_max = info->freq.Length(); 00535 info->freq.Dimension(allele_size + 1); 00536 for (i = old_max; i < allele_size; i++) 00537 info->freq[i] = 0.0; 00538 } 00539 00540 info->freq[allele_size] = tokens[2]; 00541 break; 00542 case 'E' : 00543 done = 1; 00544 break; 00545 default : 00546 error("Problem in allele frequency file.\n" 00547 "Lines in this file should be of two types:\n" 00548 " -- Marker name lines begin with an M\n" 00549 " -- Frequency lines begin with an F\n\n" 00550 "However the following line is different:\n%s\n", 00551 (const char *) buffer); 00552 } 00553 } 00554 00555 if (info != NULL) 00556 need_blank_line |= info->AdjustFrequencies(); 00557 00558 if (need_blank_line) printf("\n"); 00559 } 00560 00561 void PedigreeGlobals::LoadMarkerMap(const char * filename, bool filter) 00562 { 00563 IFILE f = ifopen(filename, "rb"); 00564 if (f == NULL) return; 00565 LoadMarkerMap(f, filter); 00566 ifclose(f); 00567 } 00568 00569 void PedigreeGlobals::LoadMarkerMap(IFILE & input, bool filter) 00570 { 00571 String buffer; 00572 StringArray tokens; 00573 bool first_pass = true; 00574 00575 while (!ifeof(input)) 00576 { 00577 buffer.ReadLine(input); 00578 00579 tokens.Clear(); 00580 tokens.AddTokens(buffer, WHITESPACE); 00581 00582 if (tokens.Length() < 1) continue; 00583 00584 if (first_pass) 00585 { 00586 sexSpecificMap = (tokens.Length() == 5); 00587 00588 // if (sexSpecificMap) 00589 // printf("\n Found sex-specific map ...\n\n"); 00590 00591 first_pass = false; 00592 } 00593 00594 if (tokens.Length() != 3 && !sexSpecificMap) 00595 error("Error reading map file\n" 00596 "Each line in this file should include 3 fields:\n" 00597 "CHROMOSOME, MARKER_NAME, and POSITION\n" 00598 "However the following line has %d fields\n%s\n", 00599 tokens.Length(), (const char *) buffer); 00600 00601 if (tokens.Length() != 5 && sexSpecificMap) 00602 error("Error reading map file\n" 00603 "Each line in this file should include 5 fields:\n\n" 00604 "CHROMOSOME, MARKER_NAME, SEX_AVERAGED_POS, FEMALE_POS AND MALE_POS\n\n" 00605 "However the following line has %d fields\n%s\n", 00606 tokens.Length(), (const char *) buffer); 00607 00608 bool previous_state = String::caseSensitive; 00609 String::caseSensitive = false; 00610 00611 if ((tokens[0] == "CHR" || tokens[0] == "CHROMOSOME") && 00612 (tokens[1] == "MARKER" || tokens[1] == "MARKER_NAME" || tokens[1] == "MRK") && 00613 (tokens[2] == "KOSAMBI" || tokens[2] == "POS" || tokens[2] == "POSITION" || 00614 tokens[2] == "SEX_AVERAGED_POS" || tokens[2] == "CM" || tokens[2] == "HALDANE")) 00615 continue; 00616 00617 String::caseSensitive = previous_state; 00618 00619 if (filter) 00620 if (LookupMarker(tokens[1]) < 0) 00621 continue; 00622 00623 MarkerInfo * info = GetMarkerInfo(tokens[1]); 00624 00625 int chr = (tokens[0][0] == 'x' || tokens[0][0] == 'X') ? 999 : (int) tokens[0]; 00626 00627 info->chromosome = chr; 00628 info->position = (double) tokens[2] * 0.01; 00629 00630 if (sexSpecificMap) 00631 { 00632 char * flag; 00633 00634 double female = strtod(tokens[3], &flag); 00635 if (*flag) 00636 error("In the map file, the female cM position for marker\n" 00637 "%s is %s. This is not a valid number.", 00638 (const char *) tokens[1], (const char *) tokens[3]); 00639 00640 double male = strtod(tokens[4], &flag); 00641 if (*flag) 00642 error("In the map file, the male cM position for marker\n" 00643 "%s is %s. This is not a valid number.", 00644 (const char *) tokens[1], (const char *) tokens[4]); 00645 00646 info->positionFemale = (double) female * 0.01; 00647 info->positionMale = (double) male * 0.01; 00648 } 00649 else 00650 info->positionFemale = info->positionMale = info->position; 00651 } 00652 00653 if (sexSpecificMap) VerifySexSpecificOrder(); 00654 } 00655 00656 void PedigreeGlobals::LoadBasepairMap(const char * filename) 00657 { 00658 IFILE f = ifopen(filename, "rb"); 00659 if (f == NULL) 00660 error("The map file [%s] could not be opened\n\n" 00661 "Please check that the filename is correct and that the file is\n" 00662 "not being used by another program", filename); 00663 LoadBasepairMap(f); 00664 ifclose(f); 00665 } 00666 00667 void PedigreeGlobals::LoadBasepairMap(IFILE & input) 00668 { 00669 String buffer; 00670 StringArray tokens; 00671 00672 sexSpecificMap = false; 00673 00674 while (!ifeof(input)) 00675 { 00676 buffer.ReadLine(input); 00677 00678 tokens.Clear(); 00679 tokens.AddTokens(buffer, WHITESPACE); 00680 00681 if (tokens.Length() < 1) continue; 00682 00683 if (tokens.Length() != 3) 00684 error("Error reading map file\n" 00685 "Each line in this file should include 3 fields:\n" 00686 "CHROMOSOME, MARKER_NAME, and POSITION\n" 00687 "However the following line has %d fields\n%s\n", 00688 tokens.Length(), (const char *) buffer); 00689 00690 bool previous_state = String::caseSensitive; 00691 String::caseSensitive = false; 00692 00693 if ((tokens[0] == "CHR" || tokens[0] == "CHROMOSOME") && 00694 (tokens[1] == "MARKER" || tokens[1] == "MARKER_NAME" || tokens[1] == "MRK") && 00695 (tokens[2] == "BASEPAIR" || tokens[2] == "POS" || tokens[2] == "POSITION")) 00696 continue; 00697 00698 String::caseSensitive = previous_state; 00699 00700 MarkerInfo * info = GetMarkerInfo(tokens[1]); 00701 00702 int chr = (tokens[0][0] == 'x' || tokens[0][0] == 'X') ? 999 : (int) tokens[0]; 00703 00704 info->chromosome = chr; 00705 info->position = (double) tokens[2]; 00706 } 00707 } 00708 00709 int PedigreeGlobals::instanceCount = 0; 00710 00711 PedigreeGlobals::~PedigreeGlobals() 00712 { 00713 if (--instanceCount == 0 && markerInfoSize) 00714 { 00715 for (int i = 0; i < markerInfoCount; i++) 00716 delete markerInfo[i]; 00717 delete [] markerInfo; 00718 delete [] markerInfoByInteger; 00719 } 00720 } 00721 00722 void PedigreeGlobals::WriteMapFile(const char * filename) 00723 { 00724 if (!MarkerPositionsAvailable()) 00725 return; 00726 00727 FILE * output = fopen(filename, "wt"); 00728 00729 if (output == NULL) 00730 error("Creating map file \"%s\"", filename); 00731 00732 WriteMapFile(output); 00733 fclose(output); 00734 } 00735 00736 void PedigreeGlobals::WriteMapFile(FILE * output) 00737 { 00738 if (!sexSpecificMap) 00739 fprintf(output, "CHR MARKER POS\n"); 00740 else 00741 fprintf(output, "CHR MARKER POS POSF POSM\n"); 00742 00743 for (int i = 0; i < markerInfoCount; i++) 00744 { 00745 if (markerInfo[i]->chromosome != -1) 00746 { 00747 if (!sexSpecificMap) 00748 fprintf(output, "%3d %-10s %g\n", 00749 markerInfo[i]->chromosome, 00750 (const char *) markerInfo[i]->name, 00751 markerInfo[i]->position * 100.0); 00752 else 00753 fprintf(output, "%3d %-10s %g %g %g\n", 00754 markerInfo[i]->chromosome, 00755 (const char *) markerInfo[i]->name, 00756 markerInfo[i]->position * 100.0, 00757 markerInfo[i]->positionFemale * 100.0, 00758 markerInfo[i]->positionMale * 100.0); 00759 } 00760 } 00761 } 00762 00763 void PedigreeGlobals::WriteFreqFile(const char * filename, bool old_format) 00764 { 00765 FILE * output = fopen(filename, "wt"); 00766 00767 if (output == NULL) 00768 error("Creating allele frequency file \"%s\"", filename); 00769 00770 WriteFreqFile(output, old_format); 00771 fclose(output); 00772 } 00773 00774 void PedigreeGlobals::WriteFreqFile(FILE * output, bool old_format) 00775 { 00776 for (int i = 0; i < markerInfoCount; i++) 00777 { 00778 MarkerInfo * info = markerInfo[i]; 00779 00780 if (info->freq.Length() == 0) continue; 00781 00782 fprintf(output, "M %s\n", (const char *) info->name); 00783 00784 if (old_format && info->alleleLabels.Length() == 0) 00785 for (int j = 1; j < info->freq.Length(); j++) 00786 fprintf(output, "%s%.5f%s", 00787 j % 7 == 1 ? "F " : "", info->freq[j], 00788 j == info->freq.Length() - 1 ? "\n" : j % 7 == 0 ? "\n" : " "); 00789 else 00790 for (int j = 1; j < info->freq.Length(); j++) 00791 if (info->freq[j] > 1e-7) 00792 fprintf(output, "A %5s %.5f\n", 00793 (const char *) info->GetAlleleLabel(j), info->freq[j]); 00794 } 00795 } 00796 00797 bool PedigreeGlobals::MarkerPositionsAvailable() 00798 { 00799 for (int i = 0; i < markerInfoCount; i++) 00800 if (markerInfo[i]->chromosome != -1) 00801 return true; 00802 00803 return false; 00804 } 00805 00806 bool PedigreeGlobals::AlleleFrequenciesAvailable() 00807 { 00808 for (int i = 0; i < markerInfoCount; i++) 00809 if (markerInfo[i]->freq.Length() > 1) 00810 return true; 00811 00812 return false; 00813 } 00814 00815 int PedigreeGlobals::LoadAllele(int marker, String & token) 00816 { 00817 return LoadAllele(GetMarkerInfo(marker), token); 00818 } 00819 00820 int PedigreeGlobals::LoadAllele(MarkerInfo * info, String & token) 00821 { 00822 int allele = info->GetAlleleNumber(token); 00823 00824 if (allele >= 0) return allele; 00825 00826 static unsigned char lookup[128]; 00827 static bool init = false; 00828 00829 if (!init) 00830 { 00831 init = true; 00832 00833 for (int i = 0; i < 128; i++) 00834 lookup[i] = 0; 00835 00836 for (int i = '1'; i <= '9'; i++) 00837 lookup[i] = 1; 00838 00839 lookup[int('a')] = lookup[int('A')] = lookup[int('c')] = lookup[int('C')] = 2; 00840 lookup[int('g')] = lookup[int('G')] = lookup[int('t')] = lookup[int('T')] = 2; 00841 } 00842 00843 int first = token[0]; 00844 bool goodstart = first > 0 && first < 128; 00845 00846 if (token.Length() == 1 && goodstart && lookup[int(token[0])]) 00847 return info->NewAllele(token); 00848 00849 if (!goodstart || lookup[int(token[0])] != 1) 00850 return 0; 00851 00852 int integer = token.AsInteger(); 00853 token = integer; 00854 00855 allele = info->GetAlleleNumber(token); 00856 00857 if (allele > 0) 00858 return allele; 00859 00860 if (integer <= 0) return 0; 00861 00862 if (integer > 1000000) 00863 { 00864 static bool warn_user = true; 00865 00866 if (warn_user) 00867 { 00868 printf("Some allele numbers for marker %s are > 1000000\n" 00869 "All allele numbers >1000000 will be treated as missing\n\n", 00870 (const char *) info->name); 00871 warn_user = false; 00872 } 00873 00874 return 0; 00875 } 00876 00877 return info->NewAllele(token); 00878 } 00879 00880 std::ostream &operator << (std::ostream &stream, MarkerInfo &m) 00881 { 00882 stream << "MarkerInfo for marker " << m.name << std::endl; 00883 stream << " located on chromsome " << m.chromosome << ":" << (int64_t)(100 * m.position) << std::endl; 00884 stream << " allele count = " << m.freq.Length() << std::endl; 00885 stream << " label count = " << m.alleleLabels.Length() << std::endl; 00886 if (m.freq.Length() == m.alleleLabels.Length()) 00887 { 00888 for (int i=0; i<m.freq.Length(); i++) 00889 { 00890 stream << " " << m.alleleLabels[i] << ":" << m.freq[i]; 00891 } 00892 stream << std::endl; 00893 } 00894 else 00895 { 00896 stream << " output truncated - counts appear corrupt." << std::endl; 00897 } 00898 return stream; 00899 } 00900