BaseAsciiMap.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "BaseAsciiMap.h"
00019 
00020 //
00021 // Map ASCII values to a 2 (or 3) bit encoding for the base pair value
00022 //  class 0 -> 'A' (Adenine - 0x41 and 0x61)
00023 //  class 1 -> 'C' (Cytosine - 0x43 and 0x63)
00024 //  class 2 -> 'G' (Guanine - 0x47 and 0x67)
00025 //  class 3 -> 'T' (Thymine - 0x54 and 0x74)
00026 //  class 4 -> 'N' (Unknown - read error or incomplete data - 0x4E and 0x6E)
00027 //  class 5 -> not a valid DNA base pair character
00028 //
00029 // Note: The +1 array size is for the terminating NUL character
00030 //
00031 // NB: This table also maps 0, 1, 2, and 3 to the corresponding integers,
00032 // and '.' to class 4.  This allows ABI SOLiD reads to be converted
00033 // to integers via ReadIndexer::Word2Integer.
00034 //
00035 unsigned char BaseAsciiMap::baseColor2int[256+1] =
00036     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x00-0x0F
00037     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x10-0x1F
00038     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\004\005"  // 0x20-0x2F
00039     "\000\001\002\003\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x30-0x3F
00040     "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005"  // 0x40-0x4F
00041     "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005"  // 0x50-0x5F
00042     "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005"  // 0x60-0x6F
00043     "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005"  // 0x70-0x7F
00044 // not used, but included for completeness:
00045     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x80-0x8F
00046     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x90-0x9F
00047     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xA0-0xAF
00048     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xB0-0xBF
00049     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xC0-0xCF
00050     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xD0-0xDF
00051     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xE0-0xEF
00052     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xF0-0xFF
00053     ;
00054 
00055 // Only allow ACTGNactgn
00056 unsigned char BaseAsciiMap::base2int[256+1] =
00057     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x00-0x0F
00058     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x10-0x1F
00059     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x20-0x2F
00060     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x30-0x3F
00061     "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005"  // 0x40-0x4F
00062     "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005"  // 0x50-0x5F
00063     "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005"  // 0x60-0x6F
00064     "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005"  // 0x70-0x7F
00065 // not used, but included for completeness:
00066     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x80-0x8F
00067     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x90-0x9F
00068     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xA0-0xAF
00069     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xB0-0xBF
00070     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xC0-0xCF
00071     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xD0-0xDF
00072     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xE0-0xEF
00073     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xF0-0xFF
00074     ;
00075 
00076 // Only allow 0123.
00077 unsigned char BaseAsciiMap::color2int[256+1] =
00078     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x00-0x0F
00079     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x10-0x1F
00080     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\004\005"  // 0x20-0x2F
00081     "\000\001\002\003\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x30-0x3F
00082     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x40-0x4F
00083     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x50-0x5F
00084     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x60-0x6F
00085     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x70-0x7F
00086 // not used, but included for completeness:
00087     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x80-0x8F
00088     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0x90-0x9F
00089     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xA0-0xAF
00090     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xB0-0xBF
00091     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xC0-0xCF
00092     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xD0-0xDF
00093     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xE0-0xEF
00094     "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005"  // 0xF0-0xFF
00095     ;
00096 
00097 
00098 //
00099 // This is obviously for base space use only:
00100 //
00101 const char BaseAsciiMap::int2base[] = "ACGTNMXXXXXXXXXX";
00102 //
00103 // convert int to color space value
00104 //
00105 const char BaseAsciiMap::int2colorSpace[] = "0123NXXXXXXXXXXX";
00106 
00107 //
00108 // This table maps 5' base space to the 3' complement base space
00109 // values, as well as 5' color space values to the corresponding
00110 // 3' complement color space values.
00111 //
00112 // In both cases, invalids are mapped to 'N', which isn't accurate
00113 // for ABI SOLiD, but internally it shouldn't matter (on output it
00114 // will).
00115 //
00116 unsigned char BaseAsciiMap::base2complement[256+1 /* for NUL char */] =
00117     "NNNNNNNNNNNNNNNN"  // 0x00-0x0F
00118     "NNNNNNNNNNNNNNNN"  // 0x10-0x1F
00119     "NNNNNNNNNNNNNNNN"  // 0x20-0x2F
00120     "0123NNNNNNNNNNNN"  // 0x30-0x3F
00121     "NTNGNNNCNNNNNNNN"  // 0x40-0x4F
00122     "NNNNANNNNNNNNNNN"  // 0x50-0x5F
00123     "NTNGNNNCNNNNNNNN"  // 0x60-0x6F
00124     "NNNNANNNNNNNNNNN"  // 0x70-0x7F
00125 // not used, but included for completeness:
00126     "NNNNNNNNNNNNNNNN"  // 0x80-0x8F
00127     "NNNNNNNNNNNNNNNN"  // 0x90-0x9F
00128     "NNNNNNNNNNNNNNNN"  // 0xA0-0xAF
00129     "NNNNNNNNNNNNNNNN"  // 0xB0-0xBF
00130     "NNNNNNNNNNNNNNNN"  // 0xC0-0xCF
00131     "NNNNNNNNNNNNNNNN"  // 0xD0-0xDF
00132     "NNNNNNNNNNNNNNNN"  // 0xE0-0xEF
00133     "NNNNNNNNNNNNNNNN"  // 0xF0-0xFF
00134     ;
00135 
00136 BaseAsciiMap::BaseAsciiMap()
00137         : myNumPrimerBases(1)
00138 {
00139     myBase2IntMapPtr = NULL;
00140 }
00141 
00142 BaseAsciiMap::~BaseAsciiMap()
00143 {
00144 }
Generated on Wed Nov 17 15:38:28 2010 for StatGen Software by  doxygen 1.6.3