PedigreeGlobals.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "PedigreeGlobals.h"
00019 #include "Sort.h"
00020 #include "Error.h"
00021 
00022 #include <math.h>
00023 #include <string.h>
00024 #include <ctype.h>
00025 
00026 int PedigreeGlobals::traitCount = 0;
00027 int PedigreeGlobals::affectionCount = 0;
00028 int PedigreeGlobals::covariateCount = 0;
00029 int PedigreeGlobals::markerCount = 0;
00030 
00031 // If this value isn't set, all X chromosome data will be rejected
00032 bool PedigreeGlobals::chromosomeX = false;
00033 bool PedigreeGlobals::sexSpecificMap = false;
00034 
00035 StringArray   PedigreeGlobals::traitNames;
00036 StringArray   PedigreeGlobals::markerNames;
00037 StringArray   PedigreeGlobals::covariateNames;
00038 StringArray   PedigreeGlobals::affectionNames;
00039 StringIntHash PedigreeGlobals::markerLookup;
00040 StringIntHash PedigreeGlobals::traitLookup;
00041 StringIntHash PedigreeGlobals::affectionLookup;
00042 StringIntHash PedigreeGlobals::covariateLookup;
00043 
00044 int PedigreeGlobals::markerInfoCount = 0;
00045 int PedigreeGlobals::markerInfoSize = 0;
00046 
00047 MarkerInfo ** PedigreeGlobals::markerInfo = NULL;
00048 MarkerInfo ** PedigreeGlobals::markerInfoByInteger = NULL;
00049 StringHash    PedigreeGlobals::markerInfoByName;
00050 
00051 int MarkerInfo::count = 0;
00052 
00053 int MarkerInfo::ComparePosition(MarkerInfo ** left, MarkerInfo ** right)
00054 {
00055     if ((*left)->chromosome != (*right)->chromosome)
00056         return (*left)->chromosome - (*right)->chromosome;
00057 
00058     double difference = (*left)->position - (*right)->position;
00059 
00060     if (difference >  0.0)
00061         return 1;
00062     else if (difference == 0.0)
00063         return (*left)->serial - (*right)->serial;
00064     else
00065         return -1;
00066 }
00067 
00068 String MarkerInfo::GetAlleleLabel(int allele)
00069 {
00070     if (alleleLabels.Length() > allele && alleleLabels[allele].Length())
00071         return alleleLabels[allele];
00072     else if (alleleLabels.Length() <= allele)
00073         alleleLabels.Dimension(allele + 1);
00074     return alleleLabels[allele] = allele;
00075 }
00076 
00077 bool MarkerInfo::AdjustFrequencies()
00078 {
00079     if (freq.Length() <= 1)
00080     {
00081         freq.Clear();
00082         return false;
00083     }
00084 
00085     if (freq.Min() < 0.0)
00086         error("Locus %s has negative allele frequencies\n", (const char *) name);
00087 
00088     double sum = freq.Sum();
00089 
00090     if (sum <= 0.0)
00091         error("Locus %s frequencies sum to %f, which doesn't make sense\n",
00092               (const char *) name, sum);
00093 
00094     if (sum != 1.0)
00095         freq *= 1.0 / sum;
00096 
00097     if (fabs(sum - 1.0) > 1.2e-5)
00098     {
00099         printf("Locus %s frequencies sum to %f, adjusted to 1.0\n",
00100                (const char *) name, sum);
00101         return true;
00102     }
00103 
00104     return false;
00105 }
00106 
00107 void MarkerInfo::IndexAlleles()
00108 {
00109     if (alleleLabels.Length() >= 255)
00110         error("Marker %s has more than 254 distinct alleles\n",
00111               (const char *) name);
00112 
00113     alleleNumbers.Clear();
00114 
00115     for (int i = 1; i < alleleLabels.Length(); i++)
00116         alleleNumbers.SetInteger(alleleLabels[i], i);
00117 }
00118 
00119 int MarkerInfo::NewAllele(const String & label)
00120 {
00121     if (alleleLabels.Length() == 0)
00122         alleleLabels.Push("");
00123 
00124     if (alleleLabels.Length() >= 255)
00125         error("Marker %s has more than 254 distinct alleles\n",
00126               (const char *) name);
00127 
00128     alleleNumbers.SetInteger(label, alleleLabels.Length());
00129     alleleLabels.Push(label);
00130 
00131     return alleleLabels.Length() - 1;
00132 }
00133 
00134 int PedigreeGlobals::GetTraitID(const char * name)
00135 {
00136     int idx = traitLookup.Integer(name);
00137 
00138     if (idx != -1) return idx;
00139 
00140     traitNames.Add(name);
00141     traitLookup.SetInteger(name, traitCount);
00142     return traitCount++;
00143 }
00144 
00145 int PedigreeGlobals::GetAffectionID(const char * name)
00146 {
00147     int idx = affectionLookup.Integer(name);
00148 
00149     if (idx != -1) return idx;
00150 
00151     affectionNames.Add(name);
00152     affectionLookup.SetInteger(name, affectionCount);
00153     return affectionCount++;
00154 }
00155 
00156 int PedigreeGlobals::GetCovariateID(const char * name)
00157 {
00158     int idx = covariateLookup.Integer(name);
00159 
00160     if (idx != -1) return idx;
00161 
00162     covariateNames.Add(name);
00163     covariateLookup.SetInteger(name, covariateCount);
00164     return covariateCount++;
00165 }
00166 
00167 int PedigreeGlobals::GetMarkerID(const char * name)
00168 {
00169     int idx = markerLookup.Integer(name);
00170 
00171     if (idx != -1) return idx;
00172 
00173     markerNames.Add(name);
00174     markerLookup.SetInteger(name, markerCount);
00175 
00176     // Grow the marker info key ...
00177     if (markerCount == 0)
00178     {
00179         markerInfoByInteger = new MarkerInfo * [16];
00180 
00181         for (int i = 0; i < 16; i++)
00182             markerInfoByInteger[i] = NULL;
00183     }
00184     else if ((markerCount & (markerCount - 1)) == 0 && markerCount > 15)
00185     {
00186         MarkerInfo ** newKey = new MarkerInfo * [markerCount * 2];
00187 
00188         for (int i = 0; i < markerCount; i++)
00189             newKey[i] = markerInfoByInteger[i];
00190 
00191         for (int i = markerCount; i < markerCount * 2; i++)
00192             newKey[i] = NULL;
00193 
00194         delete [] markerInfoByInteger;
00195 
00196         markerInfoByInteger = newKey;
00197     }
00198 
00199     return markerCount++;
00200 }
00201 
00202 MarkerInfo * PedigreeGlobals::GetMarkerInfo(String & name)
00203 {
00204     MarkerInfo * info = (MarkerInfo *) markerInfoByName.Object(name);
00205 
00206     if (info != NULL) return info;
00207 
00208     info = new MarkerInfo(name);
00209     markerInfoByName.Add(name, info);
00210 
00211     if (markerInfoCount >= markerInfoSize)
00212         GrowMarkerInfo();
00213 
00214     markerInfo[markerInfoCount++] = info;
00215 
00216     int markerId = LookupMarker(name);
00217     if (markerId >= 0) markerInfoByInteger[markerId] = info;
00218 
00219     return info;
00220 }
00221 
00222 MarkerInfo * PedigreeGlobals::GetMarkerInfo(int markerId)
00223 {
00224     if (markerId >= markerCount)
00225         error("Attempted to retrieve MarkerInfo using out-of-bounds index\n");
00226 
00227     if (markerInfoByInteger[markerId] != NULL)
00228         return markerInfoByInteger[markerId];
00229     else
00230         return GetMarkerInfo(markerNames[markerId]);
00231 }
00232 
00233 void PedigreeGlobals::GrowMarkerInfo()
00234 {
00235     int newSize = markerInfoSize ? 2 * markerInfoSize : 32;
00236 
00237     MarkerInfo ** newArray = new MarkerInfo * [newSize];
00238 
00239     if (markerInfoSize)
00240     {
00241         memcpy(newArray, markerInfo, sizeof(MarkerInfo *) * markerInfoSize);
00242         delete [] markerInfo;
00243     }
00244 
00245     markerInfo = newArray;
00246     markerInfoSize = newSize;
00247 }
00248 
00249 void PedigreeGlobals::FlagMissingMarkers(IntArray & missingMarkers)
00250 {
00251     int skipped_markers = 0;
00252 
00253     if (missingMarkers.Length())
00254     {
00255         StringArray names;
00256 
00257         printf("These markers couldn't be placed and won't be analysed:");
00258 
00259         for (int i = 0; i < missingMarkers.Length(); i++)
00260             names.Push(GetMarkerInfo(missingMarkers[i])->name);
00261         names.Sort();
00262 
00263         for (int i = 0, line = 80, lines = 0; i < missingMarkers.Length(); i++)
00264         {
00265             if (line + names[i].Length() + 1 > 79)
00266                 printf("\n   "), line = 3, lines++;
00267 
00268             if (lines < 5)
00269             {
00270                 printf("%s ", (const char *) names[i]);
00271                 line += names[i].Length() + 1;
00272             }
00273             else
00274                 skipped_markers++;
00275         }
00276 
00277         if (skipped_markers)
00278             printf("as well as %d other unlisted markers...", skipped_markers);
00279 
00280         printf("\n\n");
00281     }
00282 }
00283 
00284 void PedigreeGlobals::GetOrderedMarkers(IntArray & markers)
00285 {
00286     if (markers.Length() == 0)
00287     {
00288         markers.Dimension(markerCount);
00289         markers.SetSequence(0, 1);
00290     }
00291 
00292     MarkerInfo ** subset = new MarkerInfo * [markers.Length()];
00293 
00294     int count = 0;
00295     IntArray missingMarkers;
00296 
00297     for (int i = 0; i < markers.Length(); i++)
00298     {
00299         MarkerInfo * info = GetMarkerInfo(markers[i]);
00300 
00301         if (info->chromosome != -1)
00302             subset[count++] = info;
00303         else
00304             missingMarkers.Push(i);
00305     }
00306 
00307     FlagMissingMarkers(missingMarkers);
00308 
00309     QuickSort(subset, count, sizeof(MarkerInfo *),
00310               COMPAREFUNC MarkerInfo::ComparePosition);
00311 
00312     markers.Clear();
00313     for (int i = 0; i < count; i++)
00314         markers.Push(GetMarkerID(subset[i]->name));
00315 }
00316 
00317 int PedigreeGlobals::SortMarkersInMapOrder(IntArray & markers, int chromosome)
00318 {
00319     if (markers.Length() == 0)
00320     {
00321         markers.Dimension(markerCount);
00322         markers.SetSequence(0, 1);
00323     }
00324 
00325     MarkerInfo ** subset = new MarkerInfo * [markers.Length()];
00326 
00327     int count = 0;
00328     IntArray missingMarkers;
00329 
00330     for (int i = 0; i < markers.Length(); i++)
00331     {
00332         MarkerInfo * info = GetMarkerInfo(markers[i]);
00333 
00334         if (info->chromosome != -1)
00335             subset[count++] = info;
00336         else if (chromosome == -1)
00337             missingMarkers.Push(i);
00338     }
00339 
00340     if (chromosome == -1)
00341         FlagMissingMarkers(missingMarkers);
00342 
00343     QuickSort(subset, count, sizeof(MarkerInfo *),
00344               COMPAREFUNC MarkerInfo::ComparePosition);
00345 
00346     markers.Clear();
00347 
00348     int  current_chromosome = -1, next_chromosome = 0;
00349 
00350     for (int i = 0; i < count; i++)
00351         if (subset[i]->chromosome < chromosome)
00352             continue;
00353         else if (current_chromosome == -1 ||
00354                  subset[i]->chromosome == current_chromosome)
00355         {
00356             markers.Push(GetMarkerID(subset[i]->name));
00357             current_chromosome = subset[i]->chromosome;
00358         }
00359         else if (!next_chromosome)
00360         {
00361             next_chromosome = subset[i]->chromosome;
00362             break;
00363         }
00364 
00365     delete [] subset;
00366 
00367     return next_chromosome;
00368 }
00369 
00370 void PedigreeGlobals::VerifySexSpecificOrder()
00371 {
00372     if (markerCount <= 1)
00373         return;
00374 
00375     MarkerInfo ** sortedMarkers = new MarkerInfo * [markerCount];
00376 
00377     for (int i = 0; i < markerCount; i++)
00378         sortedMarkers[i] = GetMarkerInfo(i);
00379 
00380     QuickSort(sortedMarkers, markerCount, sizeof(MarkerInfo *),
00381               COMPAREFUNC MarkerInfo::ComparePosition);
00382 
00383     double prev_female = sortedMarkers[0]->positionFemale;
00384     double prev_male = sortedMarkers[0]->positionMale;
00385     double curr_female, curr_male;
00386 
00387     int    prev_chromosome = sortedMarkers[0]->chromosome;
00388     int    curr_chromosome;
00389 
00390     for (int i = 1; i < markerCount; i++)
00391     {
00392         curr_chromosome = sortedMarkers[i]->chromosome;
00393         curr_female = sortedMarkers[i]->positionFemale;
00394         curr_male = sortedMarkers[i]->positionMale;
00395 
00396         if (curr_chromosome == prev_chromosome &&
00397                 (curr_female < prev_female || curr_male < prev_male))
00398             error("Sex-specific and sex-averaged maps are inconsistent.\n\n"
00399                   "In the sex-averaged map, marker %s (%.2f cM) follows marker %s (%.2f cM).\n"
00400                   "In the %smale map, marker %s (%.2f cM) PRECEDES marker %s (%.2f cM).\n",
00401                   (const char *) sortedMarkers[i]->name,
00402                   sortedMarkers[i]->position * 100,
00403                   (const char *) sortedMarkers[i-1]->name,
00404                   sortedMarkers[i-1]->position * 100,
00405                   curr_female < prev_female ? "fe" : "",
00406                   (const char *) sortedMarkers[i]->name,
00407                   (curr_female < prev_female ? curr_female : curr_male) * 100,
00408                   (const char *) sortedMarkers[i-1]->name,
00409                   (curr_female < prev_female ? prev_female : prev_male) * 100);
00410 
00411         prev_chromosome = curr_chromosome;
00412         prev_female = curr_female;
00413         prev_male = curr_male;
00414     }
00415 
00416     delete [] sortedMarkers;
00417 }
00418 
00419 void PedigreeGlobals::LoadAlleleFrequencies(const char * filename, bool required)
00420 {
00421     // This function is often called with an empty string, and not
00422     // all implementations of the C library like that ...
00423     if (filename[0] == 0)
00424     {
00425         if (required)
00426             error("No name provided for required allele freuquency file\n");
00427         else
00428             return;
00429     }
00430 
00431     // If we get here, the filename is not empty and things should
00432     // work as planned
00433     IFILE f = ifopen(filename, "rb");
00434 
00435     if (f == NULL)
00436     {
00437         if (required)
00438             error("Failed to open required alallele frequency file '%s'",
00439                   (const char *) filename);
00440         else
00441             return;
00442     }
00443 
00444     LoadAlleleFrequencies(f);
00445     ifclose(f);
00446 }
00447 
00448 void PedigreeGlobals::LoadAlleleFrequencies(IFILE & input)
00449 {
00450     int         done = 0;
00451     String      buffer;
00452     StringArray tokens;
00453     MarkerInfo *info = NULL;
00454 
00455     bool need_blank_line = false;
00456     int  allele_size, old_max, next_allele = 0; // Initialization avoids compiler warning
00457 
00458     while (!ifeof(input) && !done)
00459     {
00460         int   i, j;
00461 
00462         buffer.ReadLine(input);
00463 
00464         tokens.Clear();
00465         tokens.AddTokens(buffer, WHITESPACE);
00466 
00467         if (tokens.Length() < 1) continue;
00468 
00469         switch (toupper(tokens[0][0]))
00470         {
00471             case 'M' :
00472                 if (tokens.Length() == 1)
00473                     error("Unnamed marker in allele frequency file");
00474                 if (info != NULL)
00475                     need_blank_line |= info->AdjustFrequencies();
00476                 info = GetMarkerInfo(tokens[1]);
00477                 info->freq.Clear();
00478                 info->freq.Push(0.0);
00479                 next_allele = 1;
00480                 break;
00481             case 'F' :
00482                 if (info != NULL)
00483                     for (i = 1; i < tokens.Length(); i++)
00484                     {
00485                         buffer = next_allele++;
00486 
00487                         int allele = LoadAllele(info, buffer);
00488 
00489                         if (allele >= info->freq.Length())
00490                         {
00491                             old_max = info->freq.Length();
00492                             info->freq.Dimension(allele + 1);
00493                             for (j = old_max; j < allele; j++)
00494                                 info->freq[j] = 0.0;
00495                         }
00496 
00497                         info->freq[allele] = tokens[i].AsDouble();
00498                     }
00499                 break;
00500             case 'A' :
00501                 if (info == NULL) continue;
00502 
00503                 if (tokens.Length() != 3)
00504                     error("Error reading named allele frequencies for locus %s\n"
00505                           "Lines with named alleles should have the format\n"
00506                           "   A allele_label allele_frequency\n\n"
00507                           "But the following line was read:\n%s\n",
00508                           (const char *) info->name, (const char *) buffer);
00509 
00510                 allele_size = LoadAllele(info, tokens[1]);
00511                 next_allele = atoi(tokens[1]) + 1;
00512 
00513                 if (allele_size < 1)
00514                     error("Error reading named allele frequencies for locus %s\n"
00515                           "An invalid allele label was encountered\n",
00516                           (const char *) info->name);
00517 
00518                 if (allele_size >= info->freq.Length())
00519                 {
00520                     old_max = info->freq.Length();
00521                     info->freq.Dimension(allele_size + 1);
00522                     for (i = old_max; i < allele_size; i++)
00523                         info->freq[i] = 0.0;
00524                 }
00525 
00526                 info->freq[allele_size] = tokens[2];
00527                 break;
00528             case 'E' :
00529                 done = 1;
00530                 break;
00531             default :
00532                 error("Problem in allele frequency file.\n"
00533                       "Lines in this file should be of two types:\n"
00534                       "  -- Marker name lines begin with an M\n"
00535                       "  -- Frequency lines begin with an F\n\n"
00536                       "However the following line is different:\n%s\n",
00537                       (const char *) buffer);
00538         }
00539     }
00540 
00541     if (info != NULL)
00542         need_blank_line |= info->AdjustFrequencies();
00543 
00544     if (need_blank_line) printf("\n");
00545 }
00546 
00547 void PedigreeGlobals::LoadMarkerMap(const char * filename, bool filter)
00548 {
00549     IFILE f = ifopen(filename, "rb");
00550     if (f == NULL) return;
00551     LoadMarkerMap(f, filter);
00552     ifclose(f);
00553 }
00554 
00555 void PedigreeGlobals::LoadMarkerMap(IFILE & input, bool filter)
00556 {
00557     String      buffer;
00558     StringArray tokens;
00559     bool first_pass = true;
00560 
00561     while (!ifeof(input))
00562     {
00563         buffer.ReadLine(input);
00564 
00565         tokens.Clear();
00566         tokens.AddTokens(buffer, WHITESPACE);
00567 
00568         if (tokens.Length() < 1) continue;
00569 
00570         if (first_pass)
00571         {
00572             sexSpecificMap = (tokens.Length() == 5);
00573 
00574             // if (sexSpecificMap)
00575             //   printf("\n  Found sex-specific map ...\n\n");
00576 
00577             first_pass = false;
00578         }
00579 
00580         if (tokens.Length() != 3 && !sexSpecificMap)
00581             error("Error reading map file\n"
00582                   "Each line in this file should include 3 fields:\n"
00583                   "CHROMOSOME, MARKER_NAME, and POSITION\n"
00584                   "However the following line has %d fields\n%s\n",
00585                   tokens.Length(), (const char *) buffer);
00586 
00587         if (tokens.Length() != 5 && sexSpecificMap)
00588             error("Error reading map file\n"
00589                   "Each line in this file should include 5 fields:\n\n"
00590                   "CHROMOSOME, MARKER_NAME, SEX_AVERAGED_POS, FEMALE_POS AND MALE_POS\n\n"
00591                   "However the following line has %d fields\n%s\n",
00592                   tokens.Length(), (const char *) buffer);
00593 
00594         bool previous_state = String::caseSensitive;
00595         String::caseSensitive = false;
00596 
00597         if ((tokens[0] == "CHR" || tokens[0] == "CHROMOSOME") &&
00598                 (tokens[1] == "MARKER" || tokens[1] == "MARKER_NAME" || tokens[1] == "MRK") &&
00599                 (tokens[2] == "KOSAMBI" || tokens[2] == "POS" || tokens[2] == "POSITION" ||
00600                  tokens[2] == "SEX_AVERAGED_POS" || tokens[2] == "CM" || tokens[2] == "HALDANE"))
00601             continue;
00602 
00603         String::caseSensitive = previous_state;
00604 
00605         if (filter)
00606             if (LookupMarker(tokens[1]) < 0)
00607                 continue;
00608 
00609         MarkerInfo * info = GetMarkerInfo(tokens[1]);
00610 
00611         int chr = (tokens[0][0] == 'x' || tokens[0][0] == 'X') ? 999 : (int) tokens[0];
00612 
00613         info->chromosome = chr;
00614         info->position = (double) tokens[2] * 0.01;
00615 
00616         if (sexSpecificMap)
00617         {
00618             char * flag;
00619 
00620             double female = strtod(tokens[3], &flag);
00621             if (*flag)
00622                 error("In the map file, the female cM position for marker\n"
00623                       "%s is %s. This is not a valid number.",
00624                       (const char *) tokens[1], (const char *) tokens[3]);
00625 
00626             double male = strtod(tokens[4], &flag);
00627             if (*flag)
00628                 error("In the map file, the male cM position for marker\n"
00629                       "%s is %s. This is not a valid number.",
00630                       (const char *) tokens[1], (const char *) tokens[4]);
00631 
00632             info->positionFemale = (double) female * 0.01;
00633             info->positionMale = (double) male * 0.01;
00634         }
00635         else
00636             info->positionFemale = info->positionMale = info->position;
00637     }
00638 
00639     if (sexSpecificMap) VerifySexSpecificOrder();
00640 }
00641 
00642 void PedigreeGlobals::LoadBasepairMap(const char * filename)
00643 {
00644     IFILE f = ifopen(filename, "rb");
00645     if (f == NULL)
00646         error("The map file [%s] could not be opened\n\n"
00647               "Please check that the filename is correct and that the file is\n"
00648               "not being used by another program", filename);
00649     LoadBasepairMap(f);
00650     ifclose(f);
00651 }
00652 
00653 void PedigreeGlobals::LoadBasepairMap(IFILE & input)
00654 {
00655     String      buffer;
00656     StringArray tokens;
00657 
00658     sexSpecificMap = false;
00659 
00660     while (!ifeof(input))
00661     {
00662         buffer.ReadLine(input);
00663 
00664         tokens.Clear();
00665         tokens.AddTokens(buffer, WHITESPACE);
00666 
00667         if (tokens.Length() < 1) continue;
00668 
00669         if (tokens.Length() != 3)
00670             error("Error reading map file\n"
00671                   "Each line in this file should include 3 fields:\n"
00672                   "CHROMOSOME, MARKER_NAME, and POSITION\n"
00673                   "However the following line has %d fields\n%s\n",
00674                   tokens.Length(), (const char *) buffer);
00675 
00676         bool previous_state = String::caseSensitive;
00677         String::caseSensitive = false;
00678 
00679         if ((tokens[0] == "CHR" || tokens[0] == "CHROMOSOME") &&
00680                 (tokens[1] == "MARKER" || tokens[1] == "MARKER_NAME" || tokens[1] == "MRK") &&
00681                 (tokens[2] == "BASEPAIR" || tokens[2] == "POS" || tokens[2] == "POSITION"))
00682             continue;
00683 
00684         String::caseSensitive = previous_state;
00685 
00686         MarkerInfo * info = GetMarkerInfo(tokens[1]);
00687 
00688         int chr = (tokens[0][0] == 'x' || tokens[0][0] == 'X') ? 999 : (int) tokens[0];
00689 
00690         info->chromosome = chr;
00691         info->position = (double) tokens[2];
00692     }
00693 }
00694 
00695 int PedigreeGlobals::instanceCount = 0;
00696 
00697 PedigreeGlobals::~PedigreeGlobals()
00698 {
00699     if (--instanceCount == 0 && markerInfoSize)
00700     {
00701         for (int i = 0; i < markerInfoCount; i++)
00702             delete markerInfo[i];
00703         delete [] markerInfo;
00704         delete [] markerInfoByInteger;
00705     }
00706 }
00707 
00708 void PedigreeGlobals::WriteMapFile(const char * filename)
00709 {
00710     if (!MarkerPositionsAvailable())
00711         return;
00712 
00713     FILE * output = fopen(filename, "wt");
00714 
00715     if (output == NULL)
00716         error("Creating map file \"%s\"", filename);
00717 
00718     WriteMapFile(output);
00719     fclose(output);
00720 }
00721 
00722 void PedigreeGlobals::WriteMapFile(FILE * output)
00723 {
00724     if (!sexSpecificMap)
00725         fprintf(output, "CHR  MARKER    POS\n");
00726     else
00727         fprintf(output, "CHR  MARKER    POS   POSF   POSM\n");
00728 
00729     for (int i = 0; i < markerInfoCount; i++)
00730     {
00731         if (markerInfo[i]->chromosome != -1)
00732         {
00733             if (!sexSpecificMap)
00734                 fprintf(output, "%3d  %-10s  %g\n",
00735                         markerInfo[i]->chromosome,
00736                         (const char *) markerInfo[i]->name,
00737                         markerInfo[i]->position * 100.0);
00738             else
00739                 fprintf(output, "%3d %-10s %g  %g  %g\n",
00740                         markerInfo[i]->chromosome,
00741                         (const char *) markerInfo[i]->name,
00742                         markerInfo[i]->position * 100.0,
00743                         markerInfo[i]->positionFemale * 100.0,
00744                         markerInfo[i]->positionMale * 100.0);
00745         }
00746     }
00747 }
00748 
00749 void PedigreeGlobals::WriteFreqFile(const char * filename, bool old_format)
00750 {
00751     FILE * output = fopen(filename, "wt");
00752 
00753     if (output == NULL)
00754         error("Creating allele frequency file \"%s\"", filename);
00755 
00756     WriteFreqFile(output, old_format);
00757     fclose(output);
00758 }
00759 
00760 void PedigreeGlobals::WriteFreqFile(FILE * output, bool old_format)
00761 {
00762     for (int i = 0; i < markerInfoCount; i++)
00763     {
00764         MarkerInfo * info = markerInfo[i];
00765 
00766         if (info->freq.Length() == 0) continue;
00767 
00768         fprintf(output, "M %s\n", (const char *) info->name);
00769 
00770         if (old_format && info->alleleLabels.Length() == 0)
00771             for (int j = 1; j < info->freq.Length(); j++)
00772                 fprintf(output, "%s%.5f%s",
00773                         j % 7 == 1 ? "F " : "", info->freq[j],
00774                         j == info->freq.Length() - 1 ? "\n" : j % 7 == 0 ? "\n" : " ");
00775         else
00776             for (int j = 1; j < info->freq.Length(); j++)
00777                 if (info->freq[j] > 1e-7)
00778                     fprintf(output, "A %5s %.5f\n",
00779                             (const char *) info->GetAlleleLabel(j), info->freq[j]);
00780     }
00781 }
00782 
00783 bool PedigreeGlobals::MarkerPositionsAvailable()
00784 {
00785     for (int i = 0; i < markerInfoCount; i++)
00786         if (markerInfo[i]->chromosome != -1)
00787             return true;
00788 
00789     return false;
00790 }
00791 
00792 bool PedigreeGlobals::AlleleFrequenciesAvailable()
00793 {
00794     for (int i = 0; i < markerInfoCount; i++)
00795         if (markerInfo[i]->freq.Length() > 1)
00796             return true;
00797 
00798     return false;
00799 }
00800 
00801 int PedigreeGlobals::LoadAllele(int marker, String & token)
00802 {
00803     return LoadAllele(GetMarkerInfo(marker), token);
00804 }
00805 
00806 int PedigreeGlobals::LoadAllele(MarkerInfo * info, String & token)
00807 {
00808     int allele = info->GetAlleleNumber(token);
00809 
00810     if (allele >= 0) return allele;
00811 
00812     static unsigned char lookup[128];
00813     static bool init = false;
00814 
00815     if (!init)
00816     {
00817         init = true;
00818 
00819         for (int i = 0; i < 128; i++)
00820             lookup[i] = 0;
00821 
00822         for (int i = '1'; i <= '9'; i++)
00823             lookup[i] = 1;
00824 
00825         lookup[int('a')] = lookup[int('A')] = lookup[int('c')] = lookup[int('C')] = 2;
00826         lookup[int('g')] = lookup[int('G')] = lookup[int('t')] = lookup[int('T')] = 2;
00827     }
00828 
00829     int  first = token[0];
00830     bool goodstart = first > 0 && first < 128;
00831 
00832     if (token.Length() == 1 && goodstart && lookup[int(token[0])])
00833         return info->NewAllele(token);
00834 
00835     if (!goodstart || lookup[int(token[0])] != 1)
00836         return 0;
00837 
00838     int integer = token.AsInteger();
00839     token = integer;
00840 
00841     allele = info->GetAlleleNumber(token);
00842 
00843     if (allele > 0)
00844         return allele;
00845 
00846     if (integer <= 0) return 0;
00847 
00848     if (integer > 1000000)
00849     {
00850         static bool warn_user = true;
00851 
00852         if (warn_user)
00853         {
00854             printf("Some allele numbers for marker %s are > 1000000\n"
00855                    "All allele numbers >1000000 will be treated as missing\n\n",
00856                    (const char *) info->name);
00857             warn_user = false;
00858         }
00859 
00860         return 0;
00861     }
00862 
00863     return info->NewAllele(token);
00864 }
00865 
00866 std::ostream &operator << (std::ostream &stream, MarkerInfo &m)
00867 {
00868     stream << "MarkerInfo for marker " << m.name << std::endl;
00869     stream << " located on chromsome " << m.chromosome << ":" << (int64_t)(100 * m.position) << std::endl;
00870     stream << " allele count = " << m.freq.Length() << std::endl;
00871     stream << " label count = " << m.alleleLabels.Length() << std::endl;
00872     if (m.freq.Length() == m.alleleLabels.Length())
00873     {
00874         for (int i=0; i<m.freq.Length(); i++)
00875         {
00876             stream << " " << m.alleleLabels[i] << ":" << m.freq[i];
00877         }
00878         stream << std::endl;
00879     }
00880     else
00881     {
00882         stream << " output truncated - counts appear corrupt." << std::endl;
00883     }
00884     return stream;
00885 }
00886 
Generated on Wed Nov 17 15:38:29 2010 for StatGen Software by  doxygen 1.6.3