glfHandler.h

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef __GLF_HANDLER_H__
00019 #define __GLF_HANDLER_H__
00020 
00021 #include "InputFile.h"
00022 #include "StringBasics.h"
00023 
00024 #pragma pack(push)
00025 #pragma pack(1)
00026 
00027 struct glfIndel
00028 {
00029     // Likelihood for the 1/1, 2/2 and 1/2
00030     unsigned char lk[3];
00031 
00032     // Allele lengths
00033     short length[2];
00034 
00035     unsigned char padding[3];
00036 };
00037 
00038 struct glfEntry
00039 {
00040     /**  "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */
00041     unsigned char refBase:4, recordType:4;
00042 
00043     /** offset of this record from the previous one, in bases */
00044     unsigned int offset;
00045 
00046     /** log10 minimum likelihood * 10 and the number of mapped reads */
00047     unsigned depth:24, minLLK:8;
00048 
00049     /** root mean squared maximum mapping quality for overlapping reads */
00050     unsigned char mapQuality;
00051 
00052     union
00053     {
00054         /** log10 likelihood ratio * 10 for genotypes AA, AC, AG, AT, CC, CG, CT, GG, GT, TT */
00055         unsigned char lk[10];
00056         glfIndel indel;
00057     };
00058 
00059     glfEntry & operator = (glfEntry & rhs);
00060 };
00061 
00062 #pragma pack(pop)
00063 
00064 class glfHandler
00065 {
00066 public:
00067     // Global information about the current GLF file
00068     IFILE    handle;
00069     String   header;
00070 
00071     // Information about the current section
00072     String   label;
00073     int      sections;
00074     int      currentSection;
00075     int      maxPosition;
00076 
00077     // Currently active GLF record
00078     glfEntry data;
00079     int      position;
00080     double   likelihoods[10];
00081     String   indelSequence[2];
00082 
00083     // Error message in case previous command fails
00084     const char * errorMsg;
00085 
00086     glfHandler();
00087     ~glfHandler();
00088 
00089     bool Open(const String & filename);
00090     bool Create(const String & filename);
00091     bool isOpen();
00092     void Close();
00093     void Rewind();
00094 
00095     bool NextSection();
00096     bool NextEntry();
00097     bool NextBaseEntry();
00098 
00099     void BeginSection(const String & sectionLabel, int sectionLength);
00100     void EndSection();
00101 
00102     void WriteEntry(int outputPosition);
00103 
00104     char     GetReference(int position, char defaultBase);
00105     int      GetDepth(int position);
00106     const double * GetLikelihoods(int position);
00107     const unsigned char *   GetLogLikelihoods(int position);
00108     int      GetMapQuality(int position);
00109 
00110     static const double * GetDefaultLikelihoods()
00111     {
00112         return nullLikelihoods;
00113     }
00114     static const unsigned char * GetDefaultLogLikelihoods()
00115     {
00116         return nullLogLikelihoods;
00117     }
00118 
00119     static int GenotypeIndex(int base1, int base2)
00120     {
00121         return base1 < base2 ? (base1 - 1) *(10 - base1) / 2 + (base2 - base1) :
00122                (base2 - 1) *(10 - base2) / 2 + (base1 - base2);
00123     }
00124 
00125 private:
00126     static char           translateBase[16];
00127     static char           backTranslateBase[5];
00128     static double         nullLikelihoods[10];
00129     static unsigned char  nullLogLikelihoods[10];
00130 
00131     bool ReadHeader();
00132     void WriteHeader(const String & headerText = "");
00133 };
00134 
00135 #endif
00136 
Generated on Wed Nov 17 15:38:28 2010 for StatGen Software by  doxygen 1.6.3