libStatGen Software  1
glfHandler.h
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef __GLF_HANDLER_H__
00019 #define __GLF_HANDLER_H__
00020 
00021 #include "InputFile.h"
00022 #include "StringBasics.h"
00023 
00024 #if defined(__APPLE__)
00025 // #pragma warn "Caution, glfHandler.h is non-portable"
00026 #else
00027 #pragma pack(push)
00028 #pragma pack(1)
00029 #endif
00030 
00031 struct glfIndel
00032 {
00033     // Likelihood for the 1/1, 2/2 and 1/2
00034     unsigned char lk[3];
00035 
00036     // Allele lengths
00037     short length[2];
00038 
00039     unsigned char padding[3];
00040 };
00041 
00042 struct glfEntry
00043 {
00044     /**  "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */
00045     unsigned char refBase:4, recordType:4;
00046 
00047     /** offset of this record from the previous one, in bases */
00048     unsigned int offset;
00049 
00050     /** log10 minimum likelihood * 10 and the number of mapped reads */
00051     unsigned depth:24, minLLK:8;
00052 
00053     /** root mean squared maximum mapping quality for overlapping reads */
00054     unsigned char mapQuality;
00055 
00056     union
00057     {
00058         /** log10 likelihood ratio * 10 for genotypes AA, AC, AG, AT, CC, CG, CT, GG, GT, TT */
00059         unsigned char lk[10];
00060         glfIndel indel;
00061     };
00062 
00063     glfEntry & operator = (glfEntry & rhs);
00064 };
00065 
00066 #if defined(__APPLE__)
00067 // #pragma warn "Caution, glfHandler.h is non-portable"
00068 #else
00069 #pragma pack(pop)
00070 #endif
00071 
00072 class glfHandler
00073 {
00074 public:
00075     // Global information about the current GLF file
00076     bool     isStub;
00077     IFILE    handle;
00078     String   header;
00079 
00080     // Information about the current section
00081     String   label;
00082     int      sections;
00083     int      currentSection;
00084     int      maxPosition;
00085 
00086     // Information on whether the end of the current section has been reached
00087     bool   endOfSection;
00088 
00089     // Currently active GLF record
00090     glfEntry data;
00091     int      position;
00092     double   likelihoods[10];
00093     String   indelSequence[2];
00094 
00095     // Error message in case previous command fails
00096     const char * errorMsg;
00097 
00098     glfHandler();
00099     ~glfHandler();
00100 
00101     bool Open(const String & filename);
00102     void OpenStub();
00103     bool Create(const String & filename);
00104     bool isOpen();
00105     void Close();
00106     void Rewind();
00107 
00108     bool NextSection();
00109     bool NextEntry();
00110     bool NextBaseEntry();
00111 
00112     void BeginSection(const String & sectionLabel, int sectionLength);
00113     void EndSection();
00114 
00115     void WriteEntry(int outputPosition);
00116 
00117     char     GetReference(int position, char defaultBase);
00118     int      GetDepth(int position);
00119     const double * GetLikelihoods(int position);
00120     const unsigned char *   GetLogLikelihoods(int position);
00121     int      GetMapQuality(int position);
00122 
00123     static const double * GetDefaultLikelihoods()
00124     {
00125         return nullLikelihoods;
00126     }
00127     static const unsigned char * GetDefaultLogLikelihoods()
00128     {
00129         return nullLogLikelihoods;
00130     }
00131 
00132     static int GenotypeIndex(int base1, int base2)
00133     {
00134         return base1 < base2 ? (base1 - 1) *(10 - base1) / 2 + (base2 - base1) :
00135                (base2 - 1) *(10 - base2) / 2 + (base1 - base2);
00136     }
00137 
00138 private:
00139     static char           translateBase[16];
00140     static char           backTranslateBase[5];
00141     static double         nullLikelihoods[10];
00142     static unsigned char  nullLogLikelihoods[10];
00143 
00144     bool ReadHeader();
00145     void WriteHeader(const String & headerText = "");
00146 };
00147 
00148 #endif
00149 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends