libStatGen Software  1
PedigreeGlobals.cpp
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "PedigreeGlobals.h"
00019 #include "Sort.h"
00020 #include "Error.h"
00021 
00022 #include <math.h>
00023 #include <string.h>
00024 #include <ctype.h>
00025 
00026 int PedigreeGlobals::traitCount = 0;
00027 int PedigreeGlobals::affectionCount = 0;
00028 int PedigreeGlobals::covariateCount = 0;
00029 int PedigreeGlobals::markerCount = 0;
00030 int PedigreeGlobals::stringCount = 0;
00031 
00032 // If this value isn't set, all X chromosome data will be rejected
00033 bool PedigreeGlobals::chromosomeX = false;
00034 bool PedigreeGlobals::sexSpecificMap = false;
00035 
00036 StringArray   PedigreeGlobals::traitNames;
00037 StringArray   PedigreeGlobals::markerNames;
00038 StringArray   PedigreeGlobals::covariateNames;
00039 StringArray   PedigreeGlobals::affectionNames;
00040 StringArray   PedigreeGlobals::stringNames;
00041 StringIntHash PedigreeGlobals::markerLookup;
00042 StringIntHash PedigreeGlobals::traitLookup;
00043 StringIntHash PedigreeGlobals::affectionLookup;
00044 StringIntHash PedigreeGlobals::covariateLookup;
00045 StringIntHash PedigreeGlobals::stringLookup;
00046 
00047 int PedigreeGlobals::markerInfoCount = 0;
00048 int PedigreeGlobals::markerInfoSize = 0;
00049 
00050 MarkerInfo ** PedigreeGlobals::markerInfo = NULL;
00051 MarkerInfo ** PedigreeGlobals::markerInfoByInteger = NULL;
00052 StringHash    PedigreeGlobals::markerInfoByName;
00053 
00054 int MarkerInfo::count = 0;
00055 
00056 int MarkerInfo::ComparePosition(MarkerInfo ** left, MarkerInfo ** right)
00057 {
00058     if ((*left)->chromosome != (*right)->chromosome)
00059         return (*left)->chromosome - (*right)->chromosome;
00060 
00061     double difference = (*left)->position - (*right)->position;
00062 
00063     if (difference >  0.0)
00064         return 1;
00065     else if (difference == 0.0)
00066         return (*left)->serial - (*right)->serial;
00067     else
00068         return -1;
00069 }
00070 
00071 String MarkerInfo::GetAlleleLabel(int allele)
00072 {
00073     if (alleleLabels.Length() > allele && alleleLabels[allele].Length())
00074         return alleleLabels[allele];
00075     else if (alleleLabels.Length() <= allele)
00076         alleleLabels.Dimension(allele + 1);
00077     return alleleLabels[allele] = allele;
00078 }
00079 
00080 bool MarkerInfo::AdjustFrequencies()
00081 {
00082     if (freq.Length() <= 1)
00083     {
00084         freq.Clear();
00085         return false;
00086     }
00087 
00088     if (freq.Min() < 0.0)
00089         error("Locus %s has negative allele frequencies\n", (const char *) name);
00090 
00091     double sum = freq.Sum();
00092 
00093     if (sum <= 0.0)
00094         error("Locus %s frequencies sum to %f, which doesn't make sense\n",
00095               (const char *) name, sum);
00096 
00097     if (sum != 1.0)
00098         freq *= 1.0 / sum;
00099 
00100     if (fabs(sum - 1.0) > 1.2e-5)
00101     {
00102         printf("Locus %s frequencies sum to %f, adjusted to 1.0\n",
00103                (const char *) name, sum);
00104         return true;
00105     }
00106 
00107     return false;
00108 }
00109 
00110 void MarkerInfo::IndexAlleles()
00111 {
00112     if (alleleLabels.Length() >= 255)
00113         error("Marker %s has more than 254 distinct alleles\n",
00114               (const char *) name);
00115 
00116     alleleNumbers.Clear();
00117 
00118     for (int i = 1; i < alleleLabels.Length(); i++)
00119         alleleNumbers.SetInteger(alleleLabels[i], i);
00120 }
00121 
00122 int MarkerInfo::NewAllele(const String & label)
00123 {
00124     if (alleleLabels.Length() == 0)
00125         alleleLabels.Push("");
00126 
00127     if (alleleLabels.Length() >= 255)
00128         error("Marker %s has more than 254 distinct alleles\n",
00129               (const char *) name);
00130 
00131     alleleNumbers.SetInteger(label, alleleLabels.Length());
00132     alleleLabels.Push(label);
00133 
00134     return alleleLabels.Length() - 1;
00135 }
00136 
00137 int PedigreeGlobals::GetTraitID(const char * name)
00138 {
00139     int idx = traitLookup.Integer(name);
00140 
00141     if (idx != -1) return idx;
00142 
00143     traitNames.Add(name);
00144     traitLookup.SetInteger(name, traitCount);
00145     return traitCount++;
00146 }
00147 
00148 int PedigreeGlobals::GetAffectionID(const char * name)
00149 {
00150     int idx = affectionLookup.Integer(name);
00151 
00152     if (idx != -1) return idx;
00153 
00154     affectionNames.Add(name);
00155     affectionLookup.SetInteger(name, affectionCount);
00156     return affectionCount++;
00157 }
00158 
00159 int PedigreeGlobals::GetCovariateID(const char * name)
00160 {
00161     int idx = covariateLookup.Integer(name);
00162 
00163     if (idx != -1) return idx;
00164 
00165     covariateNames.Add(name);
00166     covariateLookup.SetInteger(name, covariateCount);
00167     return covariateCount++;
00168 }
00169 
00170 int PedigreeGlobals::GetStringID(const char * name)
00171 {
00172     int idx = stringLookup.Integer(name);
00173 
00174     if (idx != -1) return idx;
00175 
00176     stringNames.Add(name);
00177     stringLookup.SetInteger(name, stringCount);
00178     return stringCount++;
00179 }
00180 
00181 int PedigreeGlobals::GetMarkerID(const char * name)
00182 {
00183     int idx = markerLookup.Integer(name);
00184 
00185     if (idx != -1) return idx;
00186 
00187     markerNames.Add(name);
00188     markerLookup.SetInteger(name, markerCount);
00189 
00190     // Grow the marker info key ...
00191     if (markerCount == 0)
00192     {
00193         markerInfoByInteger = new MarkerInfo * [16];
00194 
00195         for (int i = 0; i < 16; i++)
00196             markerInfoByInteger[i] = NULL;
00197     }
00198     else if ((markerCount & (markerCount - 1)) == 0 && markerCount > 15)
00199     {
00200         MarkerInfo ** newKey = new MarkerInfo * [markerCount * 2];
00201 
00202         for (int i = 0; i < markerCount; i++)
00203             newKey[i] = markerInfoByInteger[i];
00204 
00205         for (int i = markerCount; i < markerCount * 2; i++)
00206             newKey[i] = NULL;
00207 
00208         delete [] markerInfoByInteger;
00209 
00210         markerInfoByInteger = newKey;
00211     }
00212 
00213     return markerCount++;
00214 }
00215 
00216 MarkerInfo * PedigreeGlobals::GetMarkerInfo(String & name)
00217 {
00218     MarkerInfo * info = (MarkerInfo *) markerInfoByName.Object(name);
00219 
00220     if (info != NULL) return info;
00221 
00222     info = new MarkerInfo(name);
00223     markerInfoByName.Add(name, info);
00224 
00225     if (markerInfoCount >= markerInfoSize)
00226         GrowMarkerInfo();
00227 
00228     markerInfo[markerInfoCount++] = info;
00229 
00230     int markerId = LookupMarker(name);
00231     if (markerId >= 0) markerInfoByInteger[markerId] = info;
00232 
00233     return info;
00234 }
00235 
00236 MarkerInfo * PedigreeGlobals::GetMarkerInfo(int markerId)
00237 {
00238     if (markerId >= markerCount)
00239         error("Attempted to retrieve MarkerInfo using out-of-bounds index\n");
00240 
00241     if (markerInfoByInteger[markerId] != NULL)
00242         return markerInfoByInteger[markerId];
00243     else
00244         return GetMarkerInfo(markerNames[markerId]);
00245 }
00246 
00247 void PedigreeGlobals::GrowMarkerInfo()
00248 {
00249     int newSize = markerInfoSize ? 2 * markerInfoSize : 32;
00250 
00251     MarkerInfo ** newArray = new MarkerInfo * [newSize];
00252 
00253     if (markerInfoSize)
00254     {
00255         memcpy(newArray, markerInfo, sizeof(MarkerInfo *) * markerInfoSize);
00256         delete [] markerInfo;
00257     }
00258 
00259     markerInfo = newArray;
00260     markerInfoSize = newSize;
00261 }
00262 
00263 void PedigreeGlobals::FlagMissingMarkers(IntArray & missingMarkers)
00264 {
00265     int skipped_markers = 0;
00266 
00267     if (missingMarkers.Length())
00268     {
00269         StringArray names;
00270 
00271         printf("These markers couldn't be placed and won't be analysed:");
00272 
00273         for (int i = 0; i < missingMarkers.Length(); i++)
00274             names.Push(GetMarkerInfo(missingMarkers[i])->name);
00275         names.Sort();
00276 
00277         for (int i = 0, line = 80, lines = 0; i < missingMarkers.Length(); i++)
00278         {
00279             if (line + names[i].Length() + 1 > 79)
00280                 printf("\n   "), line = 3, lines++;
00281 
00282             if (lines < 5)
00283             {
00284                 printf("%s ", (const char *) names[i]);
00285                 line += names[i].Length() + 1;
00286             }
00287             else
00288                 skipped_markers++;
00289         }
00290 
00291         if (skipped_markers)
00292             printf("as well as %d other unlisted markers...", skipped_markers);
00293 
00294         printf("\n\n");
00295     }
00296 }
00297 
00298 void PedigreeGlobals::GetOrderedMarkers(IntArray & markers)
00299 {
00300     if (markers.Length() == 0)
00301     {
00302         markers.Dimension(markerCount);
00303         markers.SetSequence(0, 1);
00304     }
00305 
00306     MarkerInfo ** subset = new MarkerInfo * [markers.Length()];
00307 
00308     int count = 0;
00309     IntArray missingMarkers;
00310 
00311     for (int i = 0; i < markers.Length(); i++)
00312     {
00313         MarkerInfo * info = GetMarkerInfo(markers[i]);
00314 
00315         if (info->chromosome != -1)
00316             subset[count++] = info;
00317         else
00318             missingMarkers.Push(i);
00319     }
00320 
00321     FlagMissingMarkers(missingMarkers);
00322 
00323     QuickSort(subset, count, sizeof(MarkerInfo *),
00324               COMPAREFUNC MarkerInfo::ComparePosition);
00325 
00326     markers.Clear();
00327     for (int i = 0; i < count; i++)
00328         markers.Push(GetMarkerID(subset[i]->name));
00329 }
00330 
00331 int PedigreeGlobals::SortMarkersInMapOrder(IntArray & markers, int chromosome)
00332 {
00333     if (markers.Length() == 0)
00334     {
00335         markers.Dimension(markerCount);
00336         markers.SetSequence(0, 1);
00337     }
00338 
00339     MarkerInfo ** subset = new MarkerInfo * [markers.Length()];
00340 
00341     int count = 0;
00342     IntArray missingMarkers;
00343 
00344     for (int i = 0; i < markers.Length(); i++)
00345     {
00346         MarkerInfo * info = GetMarkerInfo(markers[i]);
00347 
00348         if (info->chromosome != -1)
00349             subset[count++] = info;
00350         else if (chromosome == -1)
00351             missingMarkers.Push(i);
00352     }
00353 
00354     if (chromosome == -1)
00355         FlagMissingMarkers(missingMarkers);
00356 
00357     QuickSort(subset, count, sizeof(MarkerInfo *),
00358               COMPAREFUNC MarkerInfo::ComparePosition);
00359 
00360     markers.Clear();
00361 
00362     int  current_chromosome = -1, next_chromosome = 0;
00363 
00364     for (int i = 0; i < count; i++)
00365         if (subset[i]->chromosome < chromosome)
00366             continue;
00367         else if (current_chromosome == -1 ||
00368                  subset[i]->chromosome == current_chromosome)
00369         {
00370             markers.Push(GetMarkerID(subset[i]->name));
00371             current_chromosome = subset[i]->chromosome;
00372         }
00373         else if (!next_chromosome)
00374         {
00375             next_chromosome = subset[i]->chromosome;
00376             break;
00377         }
00378 
00379     delete [] subset;
00380 
00381     return next_chromosome;
00382 }
00383 
00384 void PedigreeGlobals::VerifySexSpecificOrder()
00385 {
00386     if (markerCount <= 1)
00387         return;
00388 
00389     MarkerInfo ** sortedMarkers = new MarkerInfo * [markerCount];
00390 
00391     for (int i = 0; i < markerCount; i++)
00392         sortedMarkers[i] = GetMarkerInfo(i);
00393 
00394     QuickSort(sortedMarkers, markerCount, sizeof(MarkerInfo *),
00395               COMPAREFUNC MarkerInfo::ComparePosition);
00396 
00397     double prev_female = sortedMarkers[0]->positionFemale;
00398     double prev_male = sortedMarkers[0]->positionMale;
00399     double curr_female, curr_male;
00400 
00401     int    prev_chromosome = sortedMarkers[0]->chromosome;
00402     int    curr_chromosome;
00403 
00404     for (int i = 1; i < markerCount; i++)
00405     {
00406         curr_chromosome = sortedMarkers[i]->chromosome;
00407         curr_female = sortedMarkers[i]->positionFemale;
00408         curr_male = sortedMarkers[i]->positionMale;
00409 
00410         if (curr_chromosome == prev_chromosome &&
00411                 (curr_female < prev_female || curr_male < prev_male))
00412             error("Sex-specific and sex-averaged maps are inconsistent.\n\n"
00413                   "In the sex-averaged map, marker %s (%.2f cM) follows marker %s (%.2f cM).\n"
00414                   "In the %smale map, marker %s (%.2f cM) PRECEDES marker %s (%.2f cM).\n",
00415                   (const char *) sortedMarkers[i]->name,
00416                   sortedMarkers[i]->position * 100,
00417                   (const char *) sortedMarkers[i-1]->name,
00418                   sortedMarkers[i-1]->position * 100,
00419                   curr_female < prev_female ? "fe" : "",
00420                   (const char *) sortedMarkers[i]->name,
00421                   (curr_female < prev_female ? curr_female : curr_male) * 100,
00422                   (const char *) sortedMarkers[i-1]->name,
00423                   (curr_female < prev_female ? prev_female : prev_male) * 100);
00424 
00425         prev_chromosome = curr_chromosome;
00426         prev_female = curr_female;
00427         prev_male = curr_male;
00428     }
00429 
00430     delete [] sortedMarkers;
00431 }
00432 
00433 void PedigreeGlobals::LoadAlleleFrequencies(const char * filename, bool required)
00434 {
00435     // This function is often called with an empty string, and not
00436     // all implementations of the C library like that ...
00437     if (filename[0] == 0)
00438     {
00439         if (required)
00440             error("No name provided for required allele freuquency file\n");
00441         else
00442             return;
00443     }
00444 
00445     // If we get here, the filename is not empty and things should
00446     // work as planned
00447     IFILE f = ifopen(filename, "rb");
00448 
00449     if (f == NULL)
00450     {
00451         if (required)
00452             error("Failed to open required allele frequency file '%s'",
00453                   (const char *) filename);
00454         else
00455             return;
00456     }
00457 
00458     LoadAlleleFrequencies(f);
00459     ifclose(f);
00460 }
00461 
00462 void PedigreeGlobals::LoadAlleleFrequencies(IFILE & input)
00463 {
00464     int         done = 0;
00465     String      buffer;
00466     StringArray tokens;
00467     MarkerInfo *info = NULL;
00468 
00469     bool need_blank_line = false;
00470     int  allele_size, old_max, next_allele = 0; // Initialization avoids compiler warning
00471 
00472     while (!ifeof(input) && !done)
00473     {
00474         int   i, j;
00475 
00476         buffer.ReadLine(input);
00477 
00478         tokens.Clear();
00479         tokens.AddTokens(buffer, WHITESPACE);
00480 
00481         if (tokens.Length() < 1) continue;
00482 
00483         switch (toupper(tokens[0][0]))
00484         {
00485             case 'M' :
00486                 if (tokens.Length() == 1)
00487                     error("Unnamed marker in allele frequency file");
00488                 if (info != NULL)
00489                     need_blank_line |= info->AdjustFrequencies();
00490                 info = GetMarkerInfo(tokens[1]);
00491                 info->freq.Clear();
00492                 info->freq.Push(0.0);
00493                 next_allele = 1;
00494                 break;
00495             case 'F' :
00496                 if (info != NULL)
00497                     for (i = 1; i < tokens.Length(); i++)
00498                     {
00499                         buffer = next_allele++;
00500 
00501                         int allele = LoadAllele(info, buffer);
00502 
00503                         if (allele >= info->freq.Length())
00504                         {
00505                             old_max = info->freq.Length();
00506                             info->freq.Dimension(allele + 1);
00507                             for (j = old_max; j < allele; j++)
00508                                 info->freq[j] = 0.0;
00509                         }
00510 
00511                         info->freq[allele] = tokens[i].AsDouble();
00512                     }
00513                 break;
00514             case 'A' :
00515                 if (info == NULL) continue;
00516 
00517                 if (tokens.Length() != 3)
00518                     error("Error reading named allele frequencies for locus %s\n"
00519                           "Lines with named alleles should have the format\n"
00520                           "   A allele_label allele_frequency\n\n"
00521                           "But the following line was read:\n%s\n",
00522                           (const char *) info->name, (const char *) buffer);
00523 
00524                 allele_size = LoadAllele(info, tokens[1]);
00525                 next_allele = atoi(tokens[1]) + 1;
00526 
00527                 if (allele_size < 1)
00528                     error("Error reading named allele frequencies for locus %s\n"
00529                           "An invalid allele label was encountered\n",
00530                           (const char *) info->name);
00531 
00532                 if (allele_size >= info->freq.Length())
00533                 {
00534                     old_max = info->freq.Length();
00535                     info->freq.Dimension(allele_size + 1);
00536                     for (i = old_max; i < allele_size; i++)
00537                         info->freq[i] = 0.0;
00538                 }
00539 
00540                 info->freq[allele_size] = tokens[2];
00541                 break;
00542             case 'E' :
00543                 done = 1;
00544                 break;
00545             default :
00546                 error("Problem in allele frequency file.\n"
00547                       "Lines in this file should be of two types:\n"
00548                       "  -- Marker name lines begin with an M\n"
00549                       "  -- Frequency lines begin with an F\n\n"
00550                       "However the following line is different:\n%s\n",
00551                       (const char *) buffer);
00552         }
00553     }
00554 
00555     if (info != NULL)
00556         need_blank_line |= info->AdjustFrequencies();
00557 
00558     if (need_blank_line) printf("\n");
00559 }
00560 
00561 void PedigreeGlobals::LoadMarkerMap(const char * filename, bool filter)
00562 {
00563     IFILE f = ifopen(filename, "rb");
00564     if (f == NULL) return;
00565     LoadMarkerMap(f, filter);
00566     ifclose(f);
00567 }
00568 
00569 void PedigreeGlobals::LoadMarkerMap(IFILE & input, bool filter)
00570 {
00571     String      buffer;
00572     StringArray tokens;
00573     bool first_pass = true;
00574 
00575     while (!ifeof(input))
00576     {
00577         buffer.ReadLine(input);
00578 
00579         tokens.Clear();
00580         tokens.AddTokens(buffer, WHITESPACE);
00581 
00582         if (tokens.Length() < 1) continue;
00583 
00584         if (first_pass)
00585         {
00586             sexSpecificMap = (tokens.Length() == 5);
00587 
00588             // if (sexSpecificMap)
00589             //   printf("\n  Found sex-specific map ...\n\n");
00590 
00591             first_pass = false;
00592         }
00593 
00594         if (tokens.Length() != 3 && !sexSpecificMap)
00595             error("Error reading map file\n"
00596                   "Each line in this file should include 3 fields:\n"
00597                   "CHROMOSOME, MARKER_NAME, and POSITION\n"
00598                   "However the following line has %d fields\n%s\n",
00599                   tokens.Length(), (const char *) buffer);
00600 
00601         if (tokens.Length() != 5 && sexSpecificMap)
00602             error("Error reading map file\n"
00603                   "Each line in this file should include 5 fields:\n\n"
00604                   "CHROMOSOME, MARKER_NAME, SEX_AVERAGED_POS, FEMALE_POS AND MALE_POS\n\n"
00605                   "However the following line has %d fields\n%s\n",
00606                   tokens.Length(), (const char *) buffer);
00607 
00608         bool previous_state = String::caseSensitive;
00609         String::caseSensitive = false;
00610 
00611         if ((tokens[0] == "CHR" || tokens[0] == "CHROMOSOME") &&
00612                 (tokens[1] == "MARKER" || tokens[1] == "MARKER_NAME" || tokens[1] == "MRK") &&
00613                 (tokens[2] == "KOSAMBI" || tokens[2] == "POS" || tokens[2] == "POSITION" ||
00614                  tokens[2] == "SEX_AVERAGED_POS" || tokens[2] == "CM" || tokens[2] == "HALDANE"))
00615             continue;
00616 
00617         String::caseSensitive = previous_state;
00618 
00619         if (filter)
00620             if (LookupMarker(tokens[1]) < 0)
00621                 continue;
00622 
00623         MarkerInfo * info = GetMarkerInfo(tokens[1]);
00624 
00625         int chr = (tokens[0][0] == 'x' || tokens[0][0] == 'X') ? 999 : (int) tokens[0];
00626 
00627         info->chromosome = chr;
00628         info->position = (double) tokens[2] * 0.01;
00629 
00630         if (sexSpecificMap)
00631         {
00632             char * flag;
00633 
00634             double female = strtod(tokens[3], &flag);
00635             if (*flag)
00636                 error("In the map file, the female cM position for marker\n"
00637                       "%s is %s. This is not a valid number.",
00638                       (const char *) tokens[1], (const char *) tokens[3]);
00639 
00640             double male = strtod(tokens[4], &flag);
00641             if (*flag)
00642                 error("In the map file, the male cM position for marker\n"
00643                       "%s is %s. This is not a valid number.",
00644                       (const char *) tokens[1], (const char *) tokens[4]);
00645 
00646             info->positionFemale = (double) female * 0.01;
00647             info->positionMale = (double) male * 0.01;
00648         }
00649         else
00650             info->positionFemale = info->positionMale = info->position;
00651     }
00652 
00653     if (sexSpecificMap) VerifySexSpecificOrder();
00654 }
00655 
00656 void PedigreeGlobals::LoadBasepairMap(const char * filename)
00657 {
00658     IFILE f = ifopen(filename, "rb");
00659     if (f == NULL)
00660         error("The map file [%s] could not be opened\n\n"
00661               "Please check that the filename is correct and that the file is\n"
00662               "not being used by another program", filename);
00663     LoadBasepairMap(f);
00664     ifclose(f);
00665 }
00666 
00667 void PedigreeGlobals::LoadBasepairMap(IFILE & input)
00668 {
00669     String      buffer;
00670     StringArray tokens;
00671 
00672     sexSpecificMap = false;
00673 
00674     while (!ifeof(input))
00675     {
00676         buffer.ReadLine(input);
00677 
00678         tokens.Clear();
00679         tokens.AddTokens(buffer, WHITESPACE);
00680 
00681         if (tokens.Length() < 1) continue;
00682 
00683         if (tokens.Length() != 3)
00684             error("Error reading map file\n"
00685                   "Each line in this file should include 3 fields:\n"
00686                   "CHROMOSOME, MARKER_NAME, and POSITION\n"
00687                   "However the following line has %d fields\n%s\n",
00688                   tokens.Length(), (const char *) buffer);
00689 
00690         bool previous_state = String::caseSensitive;
00691         String::caseSensitive = false;
00692 
00693         if ((tokens[0] == "CHR" || tokens[0] == "CHROMOSOME") &&
00694                 (tokens[1] == "MARKER" || tokens[1] == "MARKER_NAME" || tokens[1] == "MRK") &&
00695                 (tokens[2] == "BASEPAIR" || tokens[2] == "POS" || tokens[2] == "POSITION"))
00696             continue;
00697 
00698         String::caseSensitive = previous_state;
00699 
00700         MarkerInfo * info = GetMarkerInfo(tokens[1]);
00701 
00702         int chr = (tokens[0][0] == 'x' || tokens[0][0] == 'X') ? 999 : (int) tokens[0];
00703 
00704         info->chromosome = chr;
00705         info->position = (double) tokens[2];
00706     }
00707 }
00708 
00709 int PedigreeGlobals::instanceCount = 0;
00710 
00711 PedigreeGlobals::~PedigreeGlobals()
00712 {
00713     if (--instanceCount == 0 && markerInfoSize)
00714     {
00715         for (int i = 0; i < markerInfoCount; i++)
00716             delete markerInfo[i];
00717         delete [] markerInfo;
00718         delete [] markerInfoByInteger;
00719     }
00720 }
00721 
00722 void PedigreeGlobals::WriteMapFile(const char * filename)
00723 {
00724     if (!MarkerPositionsAvailable())
00725         return;
00726 
00727     FILE * output = fopen(filename, "wt");
00728 
00729     if (output == NULL)
00730         error("Creating map file \"%s\"", filename);
00731 
00732     WriteMapFile(output);
00733     fclose(output);
00734 }
00735 
00736 void PedigreeGlobals::WriteMapFile(FILE * output)
00737 {
00738     if (!sexSpecificMap)
00739         fprintf(output, "CHR  MARKER    POS\n");
00740     else
00741         fprintf(output, "CHR  MARKER    POS   POSF   POSM\n");
00742 
00743     for (int i = 0; i < markerInfoCount; i++)
00744     {
00745         if (markerInfo[i]->chromosome != -1)
00746         {
00747             if (!sexSpecificMap)
00748                 fprintf(output, "%3d  %-10s  %g\n",
00749                         markerInfo[i]->chromosome,
00750                         (const char *) markerInfo[i]->name,
00751                         markerInfo[i]->position * 100.0);
00752             else
00753                 fprintf(output, "%3d %-10s %g  %g  %g\n",
00754                         markerInfo[i]->chromosome,
00755                         (const char *) markerInfo[i]->name,
00756                         markerInfo[i]->position * 100.0,
00757                         markerInfo[i]->positionFemale * 100.0,
00758                         markerInfo[i]->positionMale * 100.0);
00759         }
00760     }
00761 }
00762 
00763 void PedigreeGlobals::WriteFreqFile(const char * filename, bool old_format)
00764 {
00765     FILE * output = fopen(filename, "wt");
00766 
00767     if (output == NULL)
00768         error("Creating allele frequency file \"%s\"", filename);
00769 
00770     WriteFreqFile(output, old_format);
00771     fclose(output);
00772 }
00773 
00774 void PedigreeGlobals::WriteFreqFile(FILE * output, bool old_format)
00775 {
00776     for (int i = 0; i < markerInfoCount; i++)
00777     {
00778         MarkerInfo * info = markerInfo[i];
00779 
00780         if (info->freq.Length() == 0) continue;
00781 
00782         fprintf(output, "M %s\n", (const char *) info->name);
00783 
00784         if (old_format && info->alleleLabels.Length() == 0)
00785             for (int j = 1; j < info->freq.Length(); j++)
00786                 fprintf(output, "%s%.5f%s",
00787                         j % 7 == 1 ? "F " : "", info->freq[j],
00788                         j == info->freq.Length() - 1 ? "\n" : j % 7 == 0 ? "\n" : " ");
00789         else
00790             for (int j = 1; j < info->freq.Length(); j++)
00791                 if (info->freq[j] > 1e-7)
00792                     fprintf(output, "A %5s %.5f\n",
00793                             (const char *) info->GetAlleleLabel(j), info->freq[j]);
00794     }
00795 }
00796 
00797 bool PedigreeGlobals::MarkerPositionsAvailable()
00798 {
00799     for (int i = 0; i < markerInfoCount; i++)
00800         if (markerInfo[i]->chromosome != -1)
00801             return true;
00802 
00803     return false;
00804 }
00805 
00806 bool PedigreeGlobals::AlleleFrequenciesAvailable()
00807 {
00808     for (int i = 0; i < markerInfoCount; i++)
00809         if (markerInfo[i]->freq.Length() > 1)
00810             return true;
00811 
00812     return false;
00813 }
00814 
00815 int PedigreeGlobals::LoadAllele(int marker, String & token)
00816 {
00817     return LoadAllele(GetMarkerInfo(marker), token);
00818 }
00819 
00820 int PedigreeGlobals::LoadAllele(MarkerInfo * info, String & token)
00821 {
00822     int allele = info->GetAlleleNumber(token);
00823 
00824     if (allele >= 0) return allele;
00825 
00826     static unsigned char lookup[128];
00827     static bool init = false;
00828 
00829     if (!init)
00830     {
00831         init = true;
00832 
00833         for (int i = 0; i < 128; i++)
00834             lookup[i] = 0;
00835 
00836         for (int i = '1'; i <= '9'; i++)
00837             lookup[i] = 1;
00838 
00839         lookup[int('a')] = lookup[int('A')] = lookup[int('c')] = lookup[int('C')] = 2;
00840         lookup[int('g')] = lookup[int('G')] = lookup[int('t')] = lookup[int('T')] = 2;
00841     }
00842 
00843     int  first = token[0];
00844     bool goodstart = first > 0 && first < 128;
00845 
00846     if (token.Length() == 1 && goodstart && lookup[int(token[0])])
00847         return info->NewAllele(token);
00848 
00849     if (!goodstart || lookup[int(token[0])] != 1)
00850         return 0;
00851 
00852     int integer = token.AsInteger();
00853     token = integer;
00854 
00855     allele = info->GetAlleleNumber(token);
00856 
00857     if (allele > 0)
00858         return allele;
00859 
00860     if (integer <= 0) return 0;
00861 
00862     if (integer > 1000000)
00863     {
00864         static bool warn_user = true;
00865 
00866         if (warn_user)
00867         {
00868             printf("Some allele numbers for marker %s are > 1000000\n"
00869                    "All allele numbers >1000000 will be treated as missing\n\n",
00870                    (const char *) info->name);
00871             warn_user = false;
00872         }
00873 
00874         return 0;
00875     }
00876 
00877     return info->NewAllele(token);
00878 }
00879 
00880 std::ostream &operator << (std::ostream &stream, MarkerInfo &m)
00881 {
00882     stream << "MarkerInfo for marker " << m.name << std::endl;
00883     stream << " located on chromsome " << m.chromosome << ":" << (int64_t)(100 * m.position) << std::endl;
00884     stream << " allele count = " << m.freq.Length() << std::endl;
00885     stream << " label count = " << m.alleleLabels.Length() << std::endl;
00886     if (m.freq.Length() == m.alleleLabels.Length())
00887     {
00888         for (int i=0; i<m.freq.Length(); i++)
00889         {
00890             stream << " " << m.alleleLabels[i] << ":" << m.freq[i];
00891         }
00892         stream << std::endl;
00893     }
00894     else
00895     {
00896         stream << " output truncated - counts appear corrupt." << std::endl;
00897     }
00898     return stream;
00899 }
00900 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends