libStatGen Software  1
BaseAsciiMap.cpp
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "BaseAsciiMap.h"
00019 
00020 //
00021 // Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
00022 // both base and color space.
00023 //  class 0 -> 'A' (Adenine - 0x41 and 0x61)
00024 //  class 1 -> 'C' (Cytosine - 0x43 and 0x63)
00025 //  class 2 -> 'G' (Guanine - 0x47 and 0x67)
00026 //  class 3 -> 'T' (Thymine - 0x54 and 0x74)
00027 //  class 4 -> 'N' (Unknown - read error or incomplete data - 0x4E and 0x6E)
00028 //  class 5 -> not a valid DNA base pair character
00029 //
00030 // Note: The +1 array size is for the terminating NUL character
00031 //
00032 // NB: This table also maps 0, 1, 2, and 3 to the corresponding integers,
00033 // and '.' to class 4.  This allows ABI SOLiD reads to be converted
00034 // to integers via ReadIndexer::Word2Integer.
00035 //
00036 unsigned char BaseAsciiMap::baseColor2int[256+1] =
00037     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x00-0x0F
00038     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x10-0x1F
00039     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\004\005"  // 0x20-0x2F
00040     "\000\001\002\003\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x30-0x3F
00041     "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005"  // 0x40-0x4F
00042     "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005"  // 0x50-0x5F
00043     "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005"  // 0x60-0x6F
00044     "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005"  // 0x70-0x7F
00045 // not used, but included for completeness:
00046     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x80-0x8F
00047     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x90-0x9F
00048     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xA0-0xAF
00049     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xB0-0xBF
00050     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xC0-0xCF
00051     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xD0-0xDF
00052     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xE0-0xEF
00053     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xF0-0xFF
00054     ;
00055 
00056 // Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
00057 // just base space (ACTGNactgn).
00058 unsigned char BaseAsciiMap::base2int[256+1] =
00059     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x00-0x0F
00060     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x10-0x1F
00061     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x20-0x2F
00062     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x30-0x3F
00063     "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005"  // 0x40-0x4F
00064     "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005"  // 0x50-0x5F
00065     "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005"  // 0x60-0x6F
00066     "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005"  // 0x70-0x7F
00067 // not used, but included for completeness:
00068     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x80-0x8F
00069     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x90-0x9F
00070     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xA0-0xAF
00071     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xB0-0xBF
00072     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xC0-0xCF
00073     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xD0-0xDF
00074     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xE0-0xEF
00075     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xF0-0xFF
00076     ;
00077 
00078 // Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
00079 // just color space (0123).
00080 unsigned char BaseAsciiMap::color2int[256+1] =
00081     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x00-0x0F
00082     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x10-0x1F
00083     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\004\005"  // 0x20-0x2F
00084     "\000\001\002\003\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x30-0x3F
00085     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x40-0x4F
00086     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x50-0x5F
00087     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x60-0x6F
00088     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x70-0x7F
00089 // not used, but included for completeness:
00090     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x80-0x8F
00091     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x90-0x9F
00092     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xA0-0xAF
00093     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xB0-0xBF
00094     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xC0-0xCF
00095     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xD0-0xDF
00096     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xE0-0xEF
00097     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xF0-0xFF
00098     ;
00099 
00100 
00101 //
00102 // This is obviously for base space use only:
00103 //
00104 const char BaseAsciiMap::int2base[] = "ACGTNMXXXXXXXXXX";
00105 //
00106 // convert int to color space value
00107 //
00108 const char BaseAsciiMap::int2colorSpace[] = "0123NXXXXXXXXXXX";
00109 
00110 /// This table maps 5' base space to the 3' complement base space
00111 /// values, as well as 5' color space values to the corresponding
00112 /// 3' complement color space values.
00113 ///
00114 /// In both cases, invalids are mapped to 'N', which isn't accurate
00115 /// for ABI SOLiD, but internally it shouldn't matter (on output it
00116 /// will).
00117 unsigned char BaseAsciiMap::base2complement[256+1 /* for NUL char */] =
00118     "NNNNNNNNNNNNNNNN"  // 0x00-0x0F
00119     "NNNNNNNNNNNNNNNN"  // 0x10-0x1F
00120     "NNNNNNNNNNNNNNNN"  // 0x20-0x2F
00121     "0123NNNNNNNNNNNN"  // 0x30-0x3F
00122     "NTNGNNNCNNNNNNNN"  // 0x40-0x4F
00123     "NNNNANNNNNNNNNNN"  // 0x50-0x5F
00124     "NTNGNNNCNNNNNNNN"  // 0x60-0x6F
00125     "NNNNANNNNNNNNNNN"  // 0x70-0x7F
00126 // not used, but included for completeness:
00127     "NNNNNNNNNNNNNNNN"  // 0x80-0x8F
00128     "NNNNNNNNNNNNNNNN"  // 0x90-0x9F
00129     "NNNNNNNNNNNNNNNN"  // 0xA0-0xAF
00130     "NNNNNNNNNNNNNNNN"  // 0xB0-0xBF
00131     "NNNNNNNNNNNNNNNN"  // 0xC0-0xCF
00132     "NNNNNNNNNNNNNNNN"  // 0xD0-0xDF
00133     "NNNNNNNNNNNNNNNN"  // 0xE0-0xEF
00134     "NNNNNNNNNNNNNNNN"  // 0xF0-0xFF
00135     ;
00136 
00137 BaseAsciiMap::BaseAsciiMap()
00138         : myNumPrimerBases(1)
00139 {
00140     myBase2IntMapPtr = NULL;
00141 }
00142 
00143 BaseAsciiMap::~BaseAsciiMap()
00144 {
00145 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends