libStatGen Software  1
GlfRecord.h
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef __GLF_RECORD_H__
00019 #define __GLF_RECORD_H__
00020 
00021 #include <map>
00022 #include <stdint.h>
00023 
00024 #include "InputFile.h" 
00025 #include "CharBuffer.h"
00026 
00027 /// This class allows a user to easily get/set the fields in a GLF record. 
00028 class GlfRecord
00029 {
00030 public:
00031     /// Constructor
00032     GlfRecord();
00033 
00034     /// Destructor
00035     ~GlfRecord();
00036 
00037 //     // Copy Constructor   
00038 //     GlfRecord(const GlfRecord& record);
00039 
00040 //     // Overload operator = to copy the passed in record into this record.
00041 //     GlfRecord & operator = (const GlfRecord& record);
00042 
00043 //     // Overload operator = to copy the passed in record into this record.
00044 //     bool copy(const GlfRecord& record);
00045 
00046     /// Clear this record back to the default setting.
00047     void reset();
00048    
00049     /// Read the record from the specified file (file MUST be in
00050     /// the correct position for reading a record).
00051     /// \param filePtr file to read from that is in the correct position.
00052     /// \return true if the record was successfully read from the file (even
00053     /// if it is an endMarker), false if it was not successfully read.
00054     bool read(IFILE filePtr);
00055 
00056     /// Write the record to the specified file.
00057     /// \param filePtr file to write to that is in the correct position.
00058     /// \return true if the record was successfully written to the 
00059     /// file, false if not.
00060     bool write(IFILE filePtr) const;
00061 
00062     /// Print the reference section in a readable format.
00063     void print() const;
00064 
00065     /// @name Generic Accessors for Record Types 1 & 2
00066     //@{
00067     /// Set the record type and reference base.
00068     /// \param rtypeRef record type & reference base. Formatted as:
00069     /// record_type<<4|numeric_ref_base.
00070     /// \return true if the record type and reference base were successfully
00071     /// set, false if not.
00072     bool setRtypeRef(uint8_t rtypeRef);
00073 
00074     /// Set the record type.
00075     /// \param recType record type: 1 - simple likelihood record, 
00076     /// 2 - indel likelihood record, 0 - end maker
00077     /// \return true if the record type was successfully set, false if not.
00078     bool setRecordType(uint8_t recType);
00079 
00080     /// Set the reference base from an integer value.
00081     /// \param refBase integer representation of the reference base.
00082     /// \anchor BaseCharacterIntMap
00083     /// <table>
00084     /// <tr><th>Int Value</th><td>0</td><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td><td>9</td><td>10</td><td>11</td><td>12</td><td>13</td><td>14</td><td>15</td></tr>
00085     /// <tr><th>Character Base</th><td>X</td><td>A</td><td>C</td><td>M</td><td>G</td><td>R</td><td>S</td><td>V</td><td>T</td><td>W</td><td>Y</td><td>H</td><td>K</td><td>D</td><td>B</td><td>N</td></tr>
00086     /// </table>
00087     /// \return true if the reference base was successfully set, false if not.
00088     bool setRefBaseInt(uint8_t refBase);
00089 
00090     // TODO   bool setRefBaseChar(char refBase);
00091 
00092     /// Set the offset from the precedent record.
00093     /// 0-based coordinate of the record minus the coordinate of the
00094     /// precedent record. For the first record in a reference sequence,
00095     /// the previous coordinate is 0.
00096     /// For insertions between x & x+1, the coordinate is x.
00097     /// For deletions between x & y, the coordinate is x. 
00098     /// \param offset offset from the precedent record.
00099     /// \return true if successfully set, false if not.
00100     bool setOffset(uint32_t offset);
00101 
00102     /// Set the minimum likelihood and the read depth.
00103     /// \param minDepth minimum likelihood and read depth. Formatted as:
00104     /// min_lk<<24|read_dpeth. (min_lk capped at 255)
00105     /// \return true if successfully set, false if not.
00106     bool setMinDepth(uint32_t minDepth);
00107 
00108     /// Set the minimum likelihood.
00109     /// \param minLk minimum likelihood (capped at 255).
00110     /// \return true if successfully set, false if not.
00111     bool setMinLk(uint8_t minLk);
00112 
00113     /// Set the the read depth.
00114     /// \param readDepth read depth.
00115     /// \return true if successfully set, false if not.
00116     bool setReadDepth(uint32_t readDepth);
00117 
00118     /// Set the RMS of mapping qualities of reads covering the site.
00119     /// \param rmsMapQ RMS of mapping qualities
00120     /// \return true if successfully set, false if not.
00121     bool setRmsMapQ(uint8_t rmsMapQ);
00122  
00123     /// Return the record type.
00124     /// \return record type for this record: 0 - endMarker, 
00125     /// 1 - simple likelihood, 2 - indel likelihood
00126     inline int getRecordType() const
00127     {
00128         return(myRecTypeRefBase >> REC_TYPE_SHIFT);
00129     }
00130 
00131     /// Return the reference base as an integer.
00132     /// \return integer representation of the reference base.
00133     /// See: \ref BaseCharacterIntMap
00134     inline int getRefBase() const
00135     {
00136         return(myRecTypeRefBase & REF_BASE_MASK);
00137     }
00138 
00139     /// Return the reference base as a character.
00140     /// \return character representation of the reference base.
00141     char getRefBaseChar() const;
00142 
00143     /// Return the offset from the precedent record.
00144     /// \return offset from the precedent record.
00145     uint32_t getOffset() const;
00146 
00147     /// Return the minimum likelihood and read depth.  Formatted as:
00148     /// min_lk<<24|read_dpeth. (min_lk capped at 255)
00149     /// \return minimum likelihood and read depth
00150     uint32_t getMinDepth() const;
00151 
00152     /// Return the minimum likelihood
00153     /// \return minimum likelihood
00154     uint8_t getMinLk() const;
00155 
00156     /// Return the read depth.
00157     /// \return read depth
00158     uint32_t getReadDepth() const;
00159 
00160     /// Return the RMS of mapping qualities of reads covering the site.
00161     /// \return RMS of maping qualities.
00162     uint8_t getRmsMapQ() const;
00163 
00164     //@}
00165     
00166     /// @name Record Type 1 Accessors
00167     /// Record Type 1: Simple Likelihood Record
00168     //@{
00169     //bool setType1(all fields for type 1);
00170 
00171     /// Set the likelihood for the specified genotype.
00172     /// Throws an exception if index is out of range.
00173     /// \param index index for the genotype for which the likelihood is 
00174     /// being set.
00175     /// \anchor GenotypeIndexTable
00176     /// <table>
00177     /// <tr><th>Index</th><td>0</td><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td><td>9</td></tr>
00178     /// <tr><th>Genotype</th><td>AA</td><td>AC</td><td>AG</td><td>AT</td><td>CC</td><td>CG</td><td>CT</td><td>GG</td><td>GT</td><td>TT</td></tr>
00179     /// </table>
00180     /// \param value likelihood for the genotype at the specified index.
00181     /// \return true if successfully set, false if not.
00182     bool setLk(int index, uint8_t value);
00183 
00184     //bool getType1(all fields for type 1);
00185 
00186     /// Get the likelihood for the specified genotype index.
00187     /// Throws an exception if index is out of range.
00188     /// \param index index of the genotype for which the likelihood should
00189     /// be returned.  See: \ref GenotypeIndexTable
00190     /// \return likelihood of the specified index.
00191     uint8_t getLk(int index);    
00192     //@}
00193 
00194     /// @name Record Type 2 Accessors
00195     /// Record Type2: Indel Likelihood Record
00196     //@{
00197 //     bool setType2(all fields for type 2);
00198 
00199     /// Set the likelihood of the first homozygous indel allele.
00200     /// \param lk likelihood of the 1st homozygous indel allele (capped at 255)
00201     /// \return true if successfully set, false if not.
00202     bool setLkHom1(uint8_t lk);
00203 
00204     /// Set the likelihood of the 2nd homozygous indel allele.
00205     /// \param lk likelihood of the 2nd homozygous indel allele (capped at 255)
00206     /// \return true if successfully set, false if not.
00207     bool setLkHom2(uint8_t lk);
00208 
00209     /// Set the likelihood of a heterozygote.
00210     /// \param lk likelihood of a heterozygote (capped at 255)
00211     /// \return true if successfully set, false if not.
00212     bool setLkHet(uint8_t lk);
00213 
00214     /// Set the sequence of the first indel allele if the
00215     /// first indel is an insertion.
00216     /// \param indelSeq sequence of the first indel allele (insertion).
00217     /// \return true if successfully set, false if not.
00218     bool setInsertionIndel1(const std::string& indelSeq);
00219 
00220     /// Set the sequence of the first indel allele if the
00221     /// first indel is an deletion.
00222     /// \param indelSeq sequence of the first indel allele (deletion).
00223     /// \return true if successfully set, false if not.
00224     bool setDeletionIndel1(const std::string& indelSeq);
00225 
00226     /// Set the sequence of the 2nd indel allele if the
00227     /// 2nd indel is an insertion.
00228     /// \param indelSeq sequence of the 2nd indel allele (insertion).
00229     /// \return true if successfully set, false if not.
00230     bool setInsertionIndel2(const std::string& indelSeq);
00231 
00232     /// Set the sequence of the 2nd indel allele if the
00233     /// 2nd indel is an deletion.
00234     /// \param indelSeq sequence of the 2nd indel allele (deletion).
00235     /// \return true if successfully set, false if not.
00236     bool setDeletionIndel2(const std::string& indelSeq);
00237 
00238     //     bool setType2(all fields for type 2);
00239 
00240     /// Return the likelihood of the 1st homozygous indel allele.
00241     /// \return likelihood of the 1st homozygous indel allele.
00242     uint8_t getLkHom1();
00243 
00244     /// Return the likelihood of the 2nd homozygous indel allele.
00245     /// \return likelihood of the 2nd homozygous indel allele.
00246     uint8_t getLkHom2();
00247 
00248     /// Return the likelihood of a heterozygote.
00249     /// \return likelihood of a hetereozygote.
00250     uint8_t getLkHet();
00251 
00252     /// Get the sequence and length (+:ins, -:del) of the 1st indel allele.
00253     /// \param indelSeq string to set with the sequence of the 1st indel allele
00254     /// \return length of the 1st indel allele
00255     /// (positive=insertion; negative=deletion; 0=no-indel)
00256     int16_t getIndel1(std::string& indelSeq);
00257 
00258     /// Get the sequence and length (+:ins, -:del) of the 2nd indel allele.
00259     /// \param indelSeq string to set with the sequence of the 2nd indel allele
00260     /// \return length of the 2nd indel allele
00261     /// (positive=insertion; negative=deletion; 0=no-indel)
00262     int16_t getIndel2(std::string& indelSeq);
00263     //@}
00264 
00265 private:
00266     // Read a record of record type 1.
00267     void readType1(IFILE filePtr);
00268 
00269     // Read a record of record type 2.
00270     void readType2(IFILE filePtr);
00271 
00272 
00273     // Write the rtyperef field.
00274     void writeRtypeRef(IFILE filePtr) const;
00275 
00276 
00277     // Write a record of record type 1.
00278     void writeType1(IFILE filePtr) const;
00279 
00280     // Write a record of record type 2.
00281     void writeType2(IFILE filePtr) const;
00282 
00283     // Contains record_type and ref_base.
00284     uint8_t myRecTypeRefBase;
00285 
00286     static const uint8_t REC_TYPE_SHIFT = 4;
00287     static const uint8_t REF_BASE_MASK = 0xF;
00288     static const uint8_t REC_TYPE_MASK = 0xF0;
00289 
00290     static const uint32_t MIN_LK_SHIFT = 24;
00291     static const uint32_t READ_DEPTH_MASK = 0xFFFFFF;
00292     static const uint32_t MIN_LK_MASK = 0xFF000000;
00293 
00294     static const char REF_BASE_MAX = 15;
00295     static std::string REF_BASE_CHAR;
00296 
00297     static const int NUM_REC1_LIKELIHOOD = 10;
00298 
00299     struct
00300     {
00301         uint32_t offset;
00302         uint32_t min_depth;
00303         uint8_t rmsMapQ;
00304         uint8_t lk[GlfRecord::NUM_REC1_LIKELIHOOD];
00305     } myRec1Base;
00306 
00307     static const int REC1_BASE_SIZE = 19;
00308 
00309     struct
00310     {
00311         uint32_t offset;
00312         uint32_t min_depth;
00313         uint8_t rmsMapQ;
00314         uint8_t lkHom1;
00315         uint8_t lkHom2;
00316         uint8_t lkHet;
00317         int16_t indelLen1;
00318         int16_t indelLen2;
00319     } myRec2Base;
00320 
00321     // TODO rest of rec 2.
00322     CharBuffer myIndelSeq1;
00323     CharBuffer myIndelSeq2;
00324 
00325     static const int REC2_BASE_SIZE = 16;
00326 
00327 };
00328 
00329 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends