BaseAsciiMap.h

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef _BASE_ASCII_MAP_H
00019 #define _BASE_ASCII_MAP_H
00020 
00021 #include "StringBasics.h"
00022 
00023 class BaseAsciiMap
00024 {
00025 public:
00026     static const int baseAIndex = 000;
00027     static const int baseTIndex = 001;
00028     static const int baseCIndex = 002;
00029     static const int baseGIndex = 003;
00030     static const int baseNIndex = 004; // baseN --> bad read
00031     static const int baseXIndex = 005; // baseX --> unknown (bad data)
00032     //
00033     // two arrays for converting back and forth between base pair character
00034     // value (ASCII) to a base integer in the range 0..3.  Note there is actually
00035     // a value 4 and 5, for 'N' (indelible) and 'M' (unknown to me).
00036     //
00037     static const char int2base[];
00038     static const char int2colorSpace[];
00039     static unsigned char base2complement[];
00040 
00041     enum SPACE_TYPE {UNKNOWN, BASE_SPACE, COLOR_SPACE};
00042 
00043     static unsigned char baseColor2int[256+1];   // base space read (ATCG)
00044     static unsigned char base2int[256+1];        // base space read (ATCG)
00045     static unsigned char color2int[256+1];       // base space read (ATCG)
00046 
00047 public:
00048     BaseAsciiMap();
00049     ~BaseAsciiMap();
00050 
00051     // Set the base type based on the passed in option.
00052     inline void setBaseMapType(SPACE_TYPE spaceType)
00053     {
00054         resetPrimerCount();
00055         //First check to see if it is in base space.
00056         switch (spaceType)
00057         {
00058             case BASE_SPACE:
00059                 // base space.
00060                 myBase2IntMapPtr = base2int;
00061                 break;
00062             case COLOR_SPACE:
00063                 // color space.
00064                 myBase2IntMapPtr = color2int;
00065                 break;
00066             default:
00067                 // Unknown map type, zero the pointer.
00068                 myBase2IntMapPtr = NULL;
00069                 break;
00070         }
00071     };
00072 
00073     // Returns the baseIndex value for the character passed in.
00074     inline int getBaseIndex(const char& letter)
00075     {
00076         if (myBase2IntMapPtr == NULL)
00077         {
00078             // Check to see if we have hit the number of primer bases.
00079             if (myPrimerCount < myNumPrimerBases)
00080             {
00081                 // Still expecting primer bases, so lookup
00082                 // the letter in the base map.
00083                 ++myPrimerCount;
00084                 return(base2int[(int)letter]);
00085             }
00086 
00087             // Have already processed all the primers, so determine
00088             // whether this is base or color space.
00089 
00090             // Need to determime the base type.
00091             setBaseMapType(letter);
00092 
00093             // If it is still null, return invalid.  Will be set when the first
00094             // letter is either color or base.
00095             if (myBase2IntMapPtr == NULL)
00096             {
00097                 return(baseXIndex);
00098             }
00099         }
00100 
00101         // Also check if configured as color space that the primers are correct.
00102         if ((myBase2IntMapPtr == color2int) && (myPrimerCount < myNumPrimerBases))
00103         {
00104             // Still expecting primer bases, so lookup
00105             // the letter in the base map.
00106             ++myPrimerCount;
00107             return(base2int[(int)letter]);
00108         }
00109 
00110         return myBase2IntMapPtr[(int)letter];
00111     }
00112 
00113     inline SPACE_TYPE getSpaceType()
00114     {
00115         if (myBase2IntMapPtr == base2int)
00116         {
00117             return(BASE_SPACE);
00118         }
00119         else if (myBase2IntMapPtr == color2int)
00120         {
00121             return(COLOR_SPACE);
00122         }
00123         else
00124         {
00125             return(UNKNOWN);
00126         }
00127     }
00128 
00129     void setNumPrimerBases(int numPrimerBases)
00130     {
00131         myNumPrimerBases = numPrimerBases;
00132     }
00133 
00134     void resetPrimerCount()
00135     {
00136         myPrimerCount = 0;
00137     };
00138     void resetBaseMapType()
00139     {
00140         myBase2IntMapPtr = NULL;
00141         resetPrimerCount();
00142     };
00143 
00144 private:
00145     // Set the base type based on the passed in letter.
00146     // If the letter is in neither the color space or the base space, both
00147     // will be allowed.
00148     inline void setBaseMapType(const char& letter)
00149     {
00150         //First check to see if it is in base space.
00151         if (base2int[(int)letter] != baseXIndex)
00152         {
00153             // This is a valid base space index, so it is base space.
00154             myBase2IntMapPtr = base2int;
00155         }
00156         else if (color2int[(int)letter] != baseXIndex)
00157         {
00158             // This is a valid color space index, so it is base space.
00159             myBase2IntMapPtr = color2int;
00160         }
00161         else
00162         {
00163             // Unknown map type, zero the pointer.
00164             myBase2IntMapPtr = NULL;
00165         }
00166     };
00167 
00168 
00169     // The number of primer bases to expect for a color-space file.
00170     unsigned int myNumPrimerBases;
00171 
00172     // This is the number of primer bases that have been seen since
00173     // the map type was set/reset.
00174     unsigned int myPrimerCount;
00175 
00176     unsigned char* myBase2IntMapPtr;
00177 };
00178 
00179 #endif
Generated on Tue Aug 23 18:19:04 2011 for libStatGen Software by  doxygen 1.6.3