libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include "BaseAsciiMap.h" 00019 00020 // 00021 // Map ASCII values to a 2 (or 3) bit encoding for the base pair value for 00022 // both base and color space. 00023 // class 0 -> 'A' (Adenine - 0x41 and 0x61) 00024 // class 1 -> 'C' (Cytosine - 0x43 and 0x63) 00025 // class 2 -> 'G' (Guanine - 0x47 and 0x67) 00026 // class 3 -> 'T' (Thymine - 0x54 and 0x74) 00027 // class 4 -> 'N' (Unknown - read error or incomplete data - 0x4E and 0x6E) 00028 // class 5 -> not a valid DNA base pair character 00029 // 00030 // Note: The +1 array size is for the terminating NUL character 00031 // 00032 // NB: This table also maps 0, 1, 2, and 3 to the corresponding integers, 00033 // and '.' to class 4. This allows ABI SOLiD reads to be converted 00034 // to integers via ReadIndexer::Word2Integer. 00035 // 00036 unsigned char BaseAsciiMap::baseColor2int[256+1] = 00037 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x00-0x0F 00038 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x10-0x1F 00039 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\004\005" // 0x20-0x2F 00040 "\000\001\002\003\005\005\005\005\005\005\005\005\005\005\005\005" // 0x30-0x3F 00041 "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005" // 0x40-0x4F 00042 "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005" // 0x50-0x5F 00043 "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005" // 0x60-0x6F 00044 "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005" // 0x70-0x7F 00045 // not used, but included for completeness: 00046 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x80-0x8F 00047 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x90-0x9F 00048 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xA0-0xAF 00049 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xB0-0xBF 00050 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xC0-0xCF 00051 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xD0-0xDF 00052 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xE0-0xEF 00053 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xF0-0xFF 00054 ; 00055 00056 // Map ASCII values to a 2 (or 3) bit encoding for the base pair value for 00057 // just base space (ACTGNactgn). 00058 unsigned char BaseAsciiMap::base2int[256+1] = 00059 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x00-0x0F 00060 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x10-0x1F 00061 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x20-0x2F 00062 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x30-0x3F 00063 "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005" // 0x40-0x4F 00064 "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005" // 0x50-0x5F 00065 "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005" // 0x60-0x6F 00066 "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005" // 0x70-0x7F 00067 // not used, but included for completeness: 00068 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x80-0x8F 00069 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x90-0x9F 00070 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xA0-0xAF 00071 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xB0-0xBF 00072 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xC0-0xCF 00073 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xD0-0xDF 00074 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xE0-0xEF 00075 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xF0-0xFF 00076 ; 00077 00078 // Map ASCII values to a 2 (or 3) bit encoding for the base pair value for 00079 // just color space (0123). 00080 unsigned char BaseAsciiMap::color2int[256+1] = 00081 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x00-0x0F 00082 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x10-0x1F 00083 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\004\005" // 0x20-0x2F 00084 "\000\001\002\003\005\005\005\005\005\005\005\005\005\005\005\005" // 0x30-0x3F 00085 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x40-0x4F 00086 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x50-0x5F 00087 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x60-0x6F 00088 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x70-0x7F 00089 // not used, but included for completeness: 00090 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x80-0x8F 00091 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x90-0x9F 00092 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xA0-0xAF 00093 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xB0-0xBF 00094 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xC0-0xCF 00095 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xD0-0xDF 00096 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xE0-0xEF 00097 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xF0-0xFF 00098 ; 00099 00100 00101 // 00102 // This is obviously for base space use only: 00103 // 00104 const char BaseAsciiMap::int2base[] = "ACGTNMXXXXXXXXXX"; 00105 // 00106 // convert int to color space value 00107 // 00108 const char BaseAsciiMap::int2colorSpace[] = "0123NXXXXXXXXXXX"; 00109 00110 /// This table maps 5' base space to the 3' complement base space 00111 /// values, as well as 5' color space values to the corresponding 00112 /// 3' complement color space values. 00113 /// 00114 /// In both cases, invalids are mapped to 'N', which isn't accurate 00115 /// for ABI SOLiD, but internally it shouldn't matter (on output it 00116 /// will). 00117 unsigned char BaseAsciiMap::base2complement[256+1 /* for NUL char */] = 00118 "NNNNNNNNNNNNNNNN" // 0x00-0x0F 00119 "NNNNNNNNNNNNNNNN" // 0x10-0x1F 00120 "NNNNNNNNNNNNNNNN" // 0x20-0x2F 00121 "0123NNNNNNNNNNNN" // 0x30-0x3F 00122 "NTNGNNNCNNNNNNNN" // 0x40-0x4F 00123 "NNNNANNNNNNNNNNN" // 0x50-0x5F 00124 "NTNGNNNCNNNNNNNN" // 0x60-0x6F 00125 "NNNNANNNNNNNNNNN" // 0x70-0x7F 00126 // not used, but included for completeness: 00127 "NNNNNNNNNNNNNNNN" // 0x80-0x8F 00128 "NNNNNNNNNNNNNNNN" // 0x90-0x9F 00129 "NNNNNNNNNNNNNNNN" // 0xA0-0xAF 00130 "NNNNNNNNNNNNNNNN" // 0xB0-0xBF 00131 "NNNNNNNNNNNNNNNN" // 0xC0-0xCF 00132 "NNNNNNNNNNNNNNNN" // 0xD0-0xDF 00133 "NNNNNNNNNNNNNNNN" // 0xE0-0xEF 00134 "NNNNNNNNNNNNNNNN" // 0xF0-0xFF 00135 ; 00136 00137 BaseAsciiMap::BaseAsciiMap() 00138 : myNumPrimerBases(1) 00139 { 00140 myBase2IntMapPtr = NULL; 00141 } 00142 00143 BaseAsciiMap::~BaseAsciiMap() 00144 { 00145 }