libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #ifndef __GLF_RECORD_H__ 00019 #define __GLF_RECORD_H__ 00020 00021 #include <map> 00022 #include <stdint.h> 00023 00024 #include "InputFile.h" 00025 #include "CharBuffer.h" 00026 00027 /// This class allows a user to easily get/set the fields in a GLF record. 00028 class GlfRecord 00029 { 00030 public: 00031 /// Constructor 00032 GlfRecord(); 00033 00034 /// Destructor 00035 ~GlfRecord(); 00036 00037 // // Copy Constructor 00038 // GlfRecord(const GlfRecord& record); 00039 00040 // // Overload operator = to copy the passed in record into this record. 00041 // GlfRecord & operator = (const GlfRecord& record); 00042 00043 // // Overload operator = to copy the passed in record into this record. 00044 // bool copy(const GlfRecord& record); 00045 00046 /// Clear this record back to the default setting. 00047 void reset(); 00048 00049 /// Read the record from the specified file (file MUST be in 00050 /// the correct position for reading a record). 00051 /// \param filePtr file to read from that is in the correct position. 00052 /// \return true if the record was successfully read from the file (even 00053 /// if it is an endMarker), false if it was not successfully read. 00054 bool read(IFILE filePtr); 00055 00056 /// Write the record to the specified file. 00057 /// \param filePtr file to write to that is in the correct position. 00058 /// \return true if the record was successfully written to the 00059 /// file, false if not. 00060 bool write(IFILE filePtr) const; 00061 00062 /// Print the reference section in a readable format. 00063 void print() const; 00064 00065 /// @name Generic Accessors for Record Types 1 & 2 00066 //@{ 00067 /// Set the record type and reference base. 00068 /// \param rtypeRef record type & reference base. Formatted as: 00069 /// record_type<<4|numeric_ref_base. 00070 /// \return true if the record type and reference base were successfully 00071 /// set, false if not. 00072 bool setRtypeRef(uint8_t rtypeRef); 00073 00074 /// Set the record type. 00075 /// \param recType record type: 1 - simple likelihood record, 00076 /// 2 - indel likelihood record, 0 - end maker 00077 /// \return true if the record type was successfully set, false if not. 00078 bool setRecordType(uint8_t recType); 00079 00080 /// Set the reference base from an integer value. 00081 /// \param refBase integer representation of the reference base. 00082 /// \anchor BaseCharacterIntMap 00083 /// <table> 00084 /// <tr><th>Int Value</th><td>0</td><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td><td>9</td><td>10</td><td>11</td><td>12</td><td>13</td><td>14</td><td>15</td></tr> 00085 /// <tr><th>Character Base</th><td>X</td><td>A</td><td>C</td><td>M</td><td>G</td><td>R</td><td>S</td><td>V</td><td>T</td><td>W</td><td>Y</td><td>H</td><td>K</td><td>D</td><td>B</td><td>N</td></tr> 00086 /// </table> 00087 /// \return true if the reference base was successfully set, false if not. 00088 bool setRefBaseInt(uint8_t refBase); 00089 00090 // TODO bool setRefBaseChar(char refBase); 00091 00092 /// Set the offset from the precedent record. 00093 /// 0-based coordinate of the record minus the coordinate of the 00094 /// precedent record. For the first record in a reference sequence, 00095 /// the previous coordinate is 0. 00096 /// For insertions between x & x+1, the coordinate is x. 00097 /// For deletions between x & y, the coordinate is x. 00098 /// \param offset offset from the precedent record. 00099 /// \return true if successfully set, false if not. 00100 bool setOffset(uint32_t offset); 00101 00102 /// Set the minimum likelihood and the read depth. 00103 /// \param minDepth minimum likelihood and read depth. Formatted as: 00104 /// min_lk<<24|read_dpeth. (min_lk capped at 255) 00105 /// \return true if successfully set, false if not. 00106 bool setMinDepth(uint32_t minDepth); 00107 00108 /// Set the minimum likelihood. 00109 /// \param minLk minimum likelihood (capped at 255). 00110 /// \return true if successfully set, false if not. 00111 bool setMinLk(uint8_t minLk); 00112 00113 /// Set the the read depth. 00114 /// \param readDepth read depth. 00115 /// \return true if successfully set, false if not. 00116 bool setReadDepth(uint32_t readDepth); 00117 00118 /// Set the RMS of mapping qualities of reads covering the site. 00119 /// \param rmsMapQ RMS of mapping qualities 00120 /// \return true if successfully set, false if not. 00121 bool setRmsMapQ(uint8_t rmsMapQ); 00122 00123 /// Return the record type. 00124 /// \return record type for this record: 0 - endMarker, 00125 /// 1 - simple likelihood, 2 - indel likelihood 00126 inline int getRecordType() const 00127 { 00128 return(myRecTypeRefBase >> REC_TYPE_SHIFT); 00129 } 00130 00131 /// Return the reference base as an integer. 00132 /// \return integer representation of the reference base. 00133 /// See: \ref BaseCharacterIntMap 00134 inline int getRefBase() const 00135 { 00136 return(myRecTypeRefBase & REF_BASE_MASK); 00137 } 00138 00139 /// Return the reference base as a character. 00140 /// \return character representation of the reference base. 00141 char getRefBaseChar() const; 00142 00143 /// Return the offset from the precedent record. 00144 /// \return offset from the precedent record. 00145 uint32_t getOffset() const; 00146 00147 /// Return the minimum likelihood and read depth. Formatted as: 00148 /// min_lk<<24|read_dpeth. (min_lk capped at 255) 00149 /// \return minimum likelihood and read depth 00150 uint32_t getMinDepth() const; 00151 00152 /// Return the minimum likelihood 00153 /// \return minimum likelihood 00154 uint8_t getMinLk() const; 00155 00156 /// Return the read depth. 00157 /// \return read depth 00158 uint32_t getReadDepth() const; 00159 00160 /// Return the RMS of mapping qualities of reads covering the site. 00161 /// \return RMS of maping qualities. 00162 uint8_t getRmsMapQ() const; 00163 00164 //@} 00165 00166 /// @name Record Type 1 Accessors 00167 /// Record Type 1: Simple Likelihood Record 00168 //@{ 00169 //bool setType1(all fields for type 1); 00170 00171 /// Set the likelihood for the specified genotype. 00172 /// Throws an exception if index is out of range. 00173 /// \param index index for the genotype for which the likelihood is 00174 /// being set. 00175 /// \anchor GenotypeIndexTable 00176 /// <table> 00177 /// <tr><th>Index</th><td>0</td><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td><td>9</td></tr> 00178 /// <tr><th>Genotype</th><td>AA</td><td>AC</td><td>AG</td><td>AT</td><td>CC</td><td>CG</td><td>CT</td><td>GG</td><td>GT</td><td>TT</td></tr> 00179 /// </table> 00180 /// \param value likelihood for the genotype at the specified index. 00181 /// \return true if successfully set, false if not. 00182 bool setLk(int index, uint8_t value); 00183 00184 //bool getType1(all fields for type 1); 00185 00186 /// Get the likelihood for the specified genotype index. 00187 /// Throws an exception if index is out of range. 00188 /// \param index index of the genotype for which the likelihood should 00189 /// be returned. See: \ref GenotypeIndexTable 00190 /// \return likelihood of the specified index. 00191 uint8_t getLk(int index); 00192 //@} 00193 00194 /// @name Record Type 2 Accessors 00195 /// Record Type2: Indel Likelihood Record 00196 //@{ 00197 // bool setType2(all fields for type 2); 00198 00199 /// Set the likelihood of the first homozygous indel allele. 00200 /// \param lk likelihood of the 1st homozygous indel allele (capped at 255) 00201 /// \return true if successfully set, false if not. 00202 bool setLkHom1(uint8_t lk); 00203 00204 /// Set the likelihood of the 2nd homozygous indel allele. 00205 /// \param lk likelihood of the 2nd homozygous indel allele (capped at 255) 00206 /// \return true if successfully set, false if not. 00207 bool setLkHom2(uint8_t lk); 00208 00209 /// Set the likelihood of a heterozygote. 00210 /// \param lk likelihood of a heterozygote (capped at 255) 00211 /// \return true if successfully set, false if not. 00212 bool setLkHet(uint8_t lk); 00213 00214 /// Set the sequence of the first indel allele if the 00215 /// first indel is an insertion. 00216 /// \param indelSeq sequence of the first indel allele (insertion). 00217 /// \return true if successfully set, false if not. 00218 bool setInsertionIndel1(const std::string& indelSeq); 00219 00220 /// Set the sequence of the first indel allele if the 00221 /// first indel is an deletion. 00222 /// \param indelSeq sequence of the first indel allele (deletion). 00223 /// \return true if successfully set, false if not. 00224 bool setDeletionIndel1(const std::string& indelSeq); 00225 00226 /// Set the sequence of the 2nd indel allele if the 00227 /// 2nd indel is an insertion. 00228 /// \param indelSeq sequence of the 2nd indel allele (insertion). 00229 /// \return true if successfully set, false if not. 00230 bool setInsertionIndel2(const std::string& indelSeq); 00231 00232 /// Set the sequence of the 2nd indel allele if the 00233 /// 2nd indel is an deletion. 00234 /// \param indelSeq sequence of the 2nd indel allele (deletion). 00235 /// \return true if successfully set, false if not. 00236 bool setDeletionIndel2(const std::string& indelSeq); 00237 00238 // bool setType2(all fields for type 2); 00239 00240 /// Return the likelihood of the 1st homozygous indel allele. 00241 /// \return likelihood of the 1st homozygous indel allele. 00242 uint8_t getLkHom1(); 00243 00244 /// Return the likelihood of the 2nd homozygous indel allele. 00245 /// \return likelihood of the 2nd homozygous indel allele. 00246 uint8_t getLkHom2(); 00247 00248 /// Return the likelihood of a heterozygote. 00249 /// \return likelihood of a hetereozygote. 00250 uint8_t getLkHet(); 00251 00252 /// Get the sequence and length (+:ins, -:del) of the 1st indel allele. 00253 /// \param indelSeq string to set with the sequence of the 1st indel allele 00254 /// \return length of the 1st indel allele 00255 /// (positive=insertion; negative=deletion; 0=no-indel) 00256 int16_t getIndel1(std::string& indelSeq); 00257 00258 /// Get the sequence and length (+:ins, -:del) of the 2nd indel allele. 00259 /// \param indelSeq string to set with the sequence of the 2nd indel allele 00260 /// \return length of the 2nd indel allele 00261 /// (positive=insertion; negative=deletion; 0=no-indel) 00262 int16_t getIndel2(std::string& indelSeq); 00263 //@} 00264 00265 private: 00266 // Read a record of record type 1. 00267 void readType1(IFILE filePtr); 00268 00269 // Read a record of record type 2. 00270 void readType2(IFILE filePtr); 00271 00272 00273 // Write the rtyperef field. 00274 void writeRtypeRef(IFILE filePtr) const; 00275 00276 00277 // Write a record of record type 1. 00278 void writeType1(IFILE filePtr) const; 00279 00280 // Write a record of record type 2. 00281 void writeType2(IFILE filePtr) const; 00282 00283 // Contains record_type and ref_base. 00284 uint8_t myRecTypeRefBase; 00285 00286 static const uint8_t REC_TYPE_SHIFT = 4; 00287 static const uint8_t REF_BASE_MASK = 0xF; 00288 static const uint8_t REC_TYPE_MASK = 0xF0; 00289 00290 static const uint32_t MIN_LK_SHIFT = 24; 00291 static const uint32_t READ_DEPTH_MASK = 0xFFFFFF; 00292 static const uint32_t MIN_LK_MASK = 0xFF000000; 00293 00294 static const char REF_BASE_MAX = 15; 00295 static std::string REF_BASE_CHAR; 00296 00297 static const int NUM_REC1_LIKELIHOOD = 10; 00298 00299 struct 00300 { 00301 uint32_t offset; 00302 uint32_t min_depth; 00303 uint8_t rmsMapQ; 00304 uint8_t lk[GlfRecord::NUM_REC1_LIKELIHOOD]; 00305 } myRec1Base; 00306 00307 static const int REC1_BASE_SIZE = 19; 00308 00309 struct 00310 { 00311 uint32_t offset; 00312 uint32_t min_depth; 00313 uint8_t rmsMapQ; 00314 uint8_t lkHom1; 00315 uint8_t lkHom2; 00316 uint8_t lkHet; 00317 int16_t indelLen1; 00318 int16_t indelLen2; 00319 } myRec2Base; 00320 00321 // TODO rest of rec 2. 00322 CharBuffer myIndelSeq1; 00323 CharBuffer myIndelSeq2; 00324 00325 static const int REC2_BASE_SIZE = 16; 00326 00327 }; 00328 00329 #endif