00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #ifndef _BASE_ASCII_MAP_H 00019 #define _BASE_ASCII_MAP_H 00020 00021 #include "StringBasics.h" 00022 00023 /// Map between characters and the associated base type. 00024 class BaseAsciiMap 00025 { 00026 public: 00027 /// Value associated with 'A' in the ascii to base map. 00028 static const int baseAIndex = 000; 00029 /// Value associated with 'T' in the ascii to base map. 00030 static const int baseTIndex = 001; 00031 /// Value associated with 'C' in the ascii to base map. 00032 static const int baseCIndex = 002; 00033 /// Value associated with 'G' in the ascii to base map. 00034 static const int baseGIndex = 003; 00035 /// Value associated with 'N' in the ascii to base map (bad read). 00036 static const int baseNIndex = 004; 00037 /// Value associated with any non-base character in the ascii to base 00038 /// map (unknown, bad data). 00039 static const int baseXIndex = 005; 00040 00041 // Two arrays for converting back and forth between base pair character 00042 // value (ASCII) to a base integer in the range 0..3. Note there is actually 00043 // a value 4 and 5, for 'N' (indelible) and 'M' (unknown to me). 00044 // 00045 /// Convert from int representation to the base. 00046 static const char int2base[]; 00047 /// Convert from int representation to colorspace representation. 00048 static const char int2colorSpace[]; 00049 static unsigned char base2complement[]; 00050 00051 /// The type of space (color or base) to use in the mapping. 00052 enum SPACE_TYPE { 00053 /// Base decision on the first raw seq character/type has yet 00054 /// to be determined. 00055 UNKNOWN, 00056 BASE_SPACE, ///< Bases only (A,C,G,T,N). 00057 COLOR_SPACE ///< Color space only (0,1,2,3,.). 00058 }; 00059 00060 /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for 00061 /// both base and color space. 00062 /// 'A'/'a'/'0' -> 0; 'C'/'c'/'1' -> 1; 'G'/'g'/'2' -> 2; 'T'/'t'/'3' -> 3; 00063 /// 'N'/'n'/'4' -> 4; anything else -> 5. 00064 static unsigned char baseColor2int[256+1]; // base space read (ATCG) 00065 /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for 00066 /// just base space (ACTGNactgn). 00067 /// 'A'/'a' -> 0; 'C'/'c' -> 1; 'G'/'g' -> 2; 'T'/'t' -> 3; 00068 /// 'N'/'n' -> 4; anything else -> 5. 00069 static unsigned char base2int[256+1]; // base space read (ATCG) 00070 /// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for 00071 /// just color space (0123). 00072 /// '0' -> 0; '1' -> 1; '2' -> 2; '3' -> 3; '4' -> 4; anything else -> 5. 00073 static unsigned char color2int[256+1]; // base space read (ATCG) 00074 00075 public: 00076 BaseAsciiMap(); 00077 ~BaseAsciiMap(); 00078 00079 /// Set the base type based on the passed in option. 00080 inline void setBaseMapType(SPACE_TYPE spaceType) 00081 { 00082 resetPrimerCount(); 00083 //First check to see if it is in base space. 00084 switch (spaceType) 00085 { 00086 case BASE_SPACE: 00087 // base space. 00088 myBase2IntMapPtr = base2int; 00089 break; 00090 case COLOR_SPACE: 00091 // color space. 00092 myBase2IntMapPtr = color2int; 00093 break; 00094 default: 00095 // Unknown map type, zero the pointer. 00096 myBase2IntMapPtr = NULL; 00097 break; 00098 } 00099 }; 00100 00101 /// Returns the baseIndex value for the character passed in. 00102 inline int getBaseIndex(const char& letter) 00103 { 00104 if (myBase2IntMapPtr == NULL) 00105 { 00106 // Check to see if we have hit the number of primer bases. 00107 if (myPrimerCount < myNumPrimerBases) 00108 { 00109 // Still expecting primer bases, so lookup 00110 // the letter in the base map. 00111 ++myPrimerCount; 00112 return(base2int[(int)letter]); 00113 } 00114 00115 // Have already processed all the primers, so determine 00116 // whether this is base or color space. 00117 00118 // Need to determime the base type. 00119 setBaseMapType(letter); 00120 00121 // If it is still null, return invalid. Will be set when the first 00122 // letter is either color or base. 00123 if (myBase2IntMapPtr == NULL) 00124 { 00125 return(baseXIndex); 00126 } 00127 } 00128 00129 // Also check if configured as color space that the primers are correct. 00130 if ((myBase2IntMapPtr == color2int) && (myPrimerCount < myNumPrimerBases)) 00131 { 00132 // Still expecting primer bases, so lookup 00133 // the letter in the base map. 00134 ++myPrimerCount; 00135 return(base2int[(int)letter]); 00136 } 00137 00138 return myBase2IntMapPtr[(int)letter]; 00139 } 00140 00141 /// Return the space type that is currently set. 00142 inline SPACE_TYPE getSpaceType() 00143 { 00144 if (myBase2IntMapPtr == base2int) 00145 { 00146 return(BASE_SPACE); 00147 } 00148 else if (myBase2IntMapPtr == color2int) 00149 { 00150 return(COLOR_SPACE); 00151 } 00152 else 00153 { 00154 return(UNKNOWN); 00155 } 00156 } 00157 00158 /// Set the number of primer bases expected before the actual 00159 /// base/color space type occurs for the rest of the entries. 00160 void setNumPrimerBases(int numPrimerBases) 00161 { 00162 myNumPrimerBases = numPrimerBases; 00163 } 00164 00165 /// Reset the number of primers to 0. 00166 void resetPrimerCount() 00167 { 00168 myPrimerCount = 0; 00169 }; 00170 00171 /// Reset the base mapping type to UNKNOWN. 00172 void resetBaseMapType() 00173 { 00174 myBase2IntMapPtr = NULL; 00175 resetPrimerCount(); 00176 }; 00177 00178 private: 00179 // Set the base type based on the passed in letter. 00180 // If the letter is in neither the color space or the base space, both 00181 // will be allowed. 00182 inline void setBaseMapType(const char& letter) 00183 { 00184 //First check to see if it is in base space. 00185 if (base2int[(int)letter] != baseXIndex) 00186 { 00187 // This is a valid base space index, so it is base space. 00188 myBase2IntMapPtr = base2int; 00189 } 00190 else if (color2int[(int)letter] != baseXIndex) 00191 { 00192 // This is a valid color space index, so it is base space. 00193 myBase2IntMapPtr = color2int; 00194 } 00195 else 00196 { 00197 // Unknown map type, zero the pointer. 00198 myBase2IntMapPtr = NULL; 00199 } 00200 }; 00201 00202 00203 // The number of primer bases to expect for a color-space file. 00204 unsigned int myNumPrimerBases; 00205 00206 // This is the number of primer bases that have been seen since 00207 // the map type was set/reset. 00208 unsigned int myPrimerCount; 00209 00210 unsigned char* myBase2IntMapPtr; 00211 }; 00212 00213 #endif