BaseAsciiMap.h

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef _BASE_ASCII_MAP_H
00019 #define _BASE_ASCII_MAP_H
00020 
00021 #include "StringBasics.h"
00022 
00023 class BaseAsciiMap
00024 {
00025 public:
00026     static const int baseAIndex = 000;
00027     static const int baseTIndex = 001;
00028     static const int baseCIndex = 002;
00029     static const int baseGIndex = 003;
00030     static const int baseNIndex = 004; // baseN --> bad read
00031     static const int baseXIndex = 005; // baseX --> unknown (bad data)
00032     //
00033     // two arrays for converting back and forth between base pair character
00034     // value (ASCII) to a base integer in the range 0..3.  Note there is actually
00035     // a value 4 and 5, for 'N' (indelible) and 'M' (unknown to me).
00036     //
00037     static const char int2base[];
00038     static const char int2colorSpace[];
00039     static unsigned char base2complement[];
00040 
00041     enum SPACE_TYPE {UNKNOWN, BASE_SPACE, COLOR_SPACE};
00042 
00043 public:
00044     BaseAsciiMap();
00045     ~BaseAsciiMap();
00046 
00047     // Set the base type based on the passed in option.
00048     inline void setBaseMapType(SPACE_TYPE spaceType)
00049     {
00050         resetPrimerCount();
00051         //First check to see if it is in base space.
00052         switch (spaceType)
00053         {
00054             case BASE_SPACE:
00055                 // base space.
00056                 myBase2IntMapPtr = base2int;
00057                 break;
00058             case COLOR_SPACE:
00059                 // color space.
00060                 myBase2IntMapPtr = color2int;
00061                 break;
00062             default:
00063                 // Unknown map type, zero the pointer.
00064                 myBase2IntMapPtr = NULL;
00065                 break;
00066         }
00067     };
00068 
00069     // Returns the baseIndex value for the character passed in.
00070     inline int getBaseIndex(const char& letter)
00071     {
00072         if (myBase2IntMapPtr == NULL)
00073         {
00074             // Check to see if we have hit the number of primer bases.
00075             if (myPrimerCount < myNumPrimerBases)
00076             {
00077                 // Still expecting primer bases, so lookup
00078                 // the letter in the base map.
00079                 ++myPrimerCount;
00080                 return(base2int[(int)letter]);
00081             }
00082 
00083             // Have already processed all the primers, so determine
00084             // whether this is base or color space.
00085 
00086             // Need to determime the base type.
00087             setBaseMapType(letter);
00088 
00089             // If it is still null, return invalid.  Will be set when the first
00090             // letter is either color or base.
00091             if (myBase2IntMapPtr == NULL)
00092             {
00093                 return(baseXIndex);
00094             }
00095         }
00096 
00097         // Also check if configured as color space that the primers are correct.
00098         if ((myBase2IntMapPtr == color2int) && (myPrimerCount < myNumPrimerBases))
00099         {
00100             // Still expecting primer bases, so lookup
00101             // the letter in the base map.
00102             ++myPrimerCount;
00103             return(base2int[(int)letter]);
00104         }
00105 
00106         return myBase2IntMapPtr[(int)letter];
00107     }
00108 
00109     inline SPACE_TYPE getSpaceType()
00110     {
00111         if (myBase2IntMapPtr == base2int)
00112         {
00113             return(BASE_SPACE);
00114         }
00115         else if (myBase2IntMapPtr == color2int)
00116         {
00117             return(COLOR_SPACE);
00118         }
00119         else
00120         {
00121             return(UNKNOWN);
00122         }
00123     }
00124 
00125     void setNumPrimerBases(int numPrimerBases)
00126     {
00127         myNumPrimerBases = numPrimerBases;
00128     }
00129 
00130     void resetPrimerCount()
00131     {
00132         myPrimerCount = 0;
00133     };
00134     void resetBaseMapType()
00135     {
00136         myBase2IntMapPtr = NULL;
00137         resetPrimerCount();
00138     };
00139 
00140 private:
00141     // Set the base type based on the passed in letter.
00142     // If the letter is in neither the color space or the base space, both
00143     // will be allowed.
00144     inline void setBaseMapType(const char& letter)
00145     {
00146         //First check to see if it is in base space.
00147         if (base2int[(int)letter] != baseXIndex)
00148         {
00149             // This is a valid base space index, so it is base space.
00150             myBase2IntMapPtr = base2int;
00151         }
00152         else if (color2int[(int)letter] != baseXIndex)
00153         {
00154             // This is a valid color space index, so it is base space.
00155             myBase2IntMapPtr = color2int;
00156         }
00157         else
00158         {
00159             // Unknown map type, zero the pointer.
00160             myBase2IntMapPtr = NULL;
00161         }
00162     };
00163 
00164 
00165     // The number of primer bases to expect for a color-space file.
00166     unsigned int myNumPrimerBases;
00167 
00168     // This is the number of primer bases that have been seen since
00169     // the map type was set/reset.
00170     unsigned int myPrimerCount;
00171 
00172     unsigned char* myBase2IntMapPtr;
00173     static unsigned char baseColor2int[256+1];   // base space read (ATCG)
00174     static unsigned char base2int[256+1];        // base space read (ATCG)
00175     static unsigned char color2int[256+1];       // base space read (ATCG)
00176 };
00177 
00178 #endif
Generated on Wed Nov 17 15:38:28 2010 for StatGen Software by  doxygen 1.6.3