libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #ifndef _BASE_ASCII_MAP_H 00019 #define _BASE_ASCII_MAP_H 00020 00021 #include "StringBasics.h" 00022 00023 /// Map between characters and the associated base type. 00024 class BaseAsciiMap 00025 { 00026 public: 00027 /// Value associated with 'N' in the ascii to base map (bad read). 00028 static const int baseNIndex = 004; 00029 /// Value associated with any non-base character in the ascii to base 00030 /// map (unknown, bad data). 00031 static const int baseXIndex = 005; 00032 00033 // Two arrays for converting back and forth between base pair character 00034 // value (ASCII) to a base integer in the range 0..3. Note there is actually 00035 // a value 4 and 5, for 'N' (indelible) and 'M' (unknown to me). 00036 // 00037 /// Convert from int representation to the base. 00038 static const char int2base[]; 00039 /// Convert from int representation to colorspace representation. 00040 static const char int2colorSpace[]; 00041 static unsigned char base2complement[]; 00042 00043 /// The type of space (color or base) to use in the mapping. 00044 enum SPACE_TYPE { 00045 /// Base decision on the first raw seq character/type has yet 00046 /// to be determined. 00047 UNKNOWN, 00048 BASE_SPACE, ///< Bases only (A,C,G,T,N). 00049 COLOR_SPACE ///< Color space only (0,1,2,3,.). 00050 }; 00051 00052 /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for 00053 /// both base and color space. 00054 /// 'A'/'a'/'0' -> 0; 'C'/'c'/'1' -> 1; 'G'/'g'/'2' -> 2; 'T'/'t'/'3' -> 3; 00055 /// 'N'/'n'/'4' -> 4; anything else -> 5. 00056 static unsigned char baseColor2int[256+1]; // base space read (ATCG) 00057 /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for 00058 /// just base space (ACTGNactgn). 00059 /// 'A'/'a' -> 0; 'C'/'c' -> 1; 'G'/'g' -> 2; 'T'/'t' -> 3; 00060 /// 'N'/'n' -> 4; anything else -> 5. 00061 static unsigned char base2int[256+1]; // base space read (ATCG) 00062 /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for 00063 /// just color space (0123). 00064 /// '0' -> 0; '1' -> 1; '2' -> 2; '3' -> 3; '4' -> 4; anything else -> 5. 00065 static unsigned char color2int[256+1]; // base space read (ATCG) 00066 00067 public: 00068 BaseAsciiMap(); 00069 ~BaseAsciiMap(); 00070 00071 /// Set the base type based on the passed in option. 00072 inline void setBaseMapType(SPACE_TYPE spaceType) 00073 { 00074 resetPrimerCount(); 00075 //First check to see if it is in base space. 00076 switch (spaceType) 00077 { 00078 case BASE_SPACE: 00079 // base space. 00080 myBase2IntMapPtr = base2int; 00081 break; 00082 case COLOR_SPACE: 00083 // color space. 00084 myBase2IntMapPtr = color2int; 00085 break; 00086 default: 00087 // Unknown map type, zero the pointer. 00088 myBase2IntMapPtr = NULL; 00089 break; 00090 } 00091 }; 00092 00093 /// Returns the baseIndex value for the character passed in. 00094 inline int getBaseIndex(const char& letter) 00095 { 00096 if (myBase2IntMapPtr == NULL) 00097 { 00098 // Check to see if we have hit the number of primer bases. 00099 if (myPrimerCount < myNumPrimerBases) 00100 { 00101 // Still expecting primer bases, so lookup 00102 // the letter in the base map. 00103 ++myPrimerCount; 00104 return(base2int[(int)letter]); 00105 } 00106 00107 // Have already processed all the primers, so determine 00108 // whether this is base or color space. 00109 00110 // Need to determime the base type. 00111 setBaseMapType(letter); 00112 00113 // If it is still null, return invalid. Will be set when the first 00114 // letter is either color or base. 00115 if (myBase2IntMapPtr == NULL) 00116 { 00117 return(baseXIndex); 00118 } 00119 } 00120 00121 // Also check if configured as color space that the primers are correct. 00122 if ((myBase2IntMapPtr == color2int) && (myPrimerCount < myNumPrimerBases)) 00123 { 00124 // Still expecting primer bases, so lookup 00125 // the letter in the base map. 00126 ++myPrimerCount; 00127 return(base2int[(int)letter]); 00128 } 00129 00130 return myBase2IntMapPtr[(int)letter]; 00131 } 00132 00133 /// Return the space type that is currently set. 00134 inline SPACE_TYPE getSpaceType() 00135 { 00136 if (myBase2IntMapPtr == base2int) 00137 { 00138 return(BASE_SPACE); 00139 } 00140 else if (myBase2IntMapPtr == color2int) 00141 { 00142 return(COLOR_SPACE); 00143 } 00144 else 00145 { 00146 return(UNKNOWN); 00147 } 00148 } 00149 00150 /// Set the number of primer bases expected before the actual 00151 /// base/color space type occurs for the rest of the entries. 00152 void setNumPrimerBases(int numPrimerBases) 00153 { 00154 myNumPrimerBases = numPrimerBases; 00155 } 00156 00157 /// Reset the number of primers to 0. 00158 void resetPrimerCount() 00159 { 00160 myPrimerCount = 0; 00161 }; 00162 00163 /// Reset the base mapping type to UNKNOWN. 00164 void resetBaseMapType() 00165 { 00166 myBase2IntMapPtr = NULL; 00167 resetPrimerCount(); 00168 }; 00169 00170 private: 00171 // Set the base type based on the passed in letter. 00172 // If the letter is in neither the color space or the base space, both 00173 // will be allowed. 00174 inline void setBaseMapType(const char& letter) 00175 { 00176 //First check to see if it is in base space. 00177 if (base2int[(int)letter] != baseXIndex) 00178 { 00179 // This is a valid base space index, so it is base space. 00180 myBase2IntMapPtr = base2int; 00181 } 00182 else if (color2int[(int)letter] != baseXIndex) 00183 { 00184 // This is a valid color space index, so it is base space. 00185 myBase2IntMapPtr = color2int; 00186 } 00187 else 00188 { 00189 // Unknown map type, zero the pointer. 00190 myBase2IntMapPtr = NULL; 00191 } 00192 }; 00193 00194 00195 // The number of primer bases to expect for a color-space file. 00196 unsigned int myNumPrimerBases; 00197 00198 // This is the number of primer bases that have been seen since 00199 // the map type was set/reset. 00200 unsigned int myPrimerCount; 00201 00202 unsigned char* myBase2IntMapPtr; 00203 }; 00204 00205 #endif