glfHandler.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "glfHandler.h"
00019 #include "BaseQualityHelper.h"
00020 
00021 char glfHandler::translateBase[16] = {0, 1, 2, 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0};
00022 char glfHandler::backTranslateBase[5] = { 15, 1, 2, 4, 8 };
00023 unsigned char glfHandler::nullLogLikelihoods[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
00024 double glfHandler::nullLikelihoods[10] = {0., 0., 0., 0., 0., 0., 0., 0., 0., 0.};
00025 
00026 glfHandler::glfHandler()
00027 {
00028     sections = 0;
00029     currentSection = 0;
00030     maxPosition = position = 0;
00031 }
00032 
00033 glfHandler::~glfHandler()
00034 {
00035     // Not safe to close the file here in case a copy of the file was generated
00036     // if (isOpen())
00037     //   Close();
00038 }
00039 
00040 bool glfHandler::Open(const String & filename)
00041 {
00042     handle = ifopen(filename, "rb");
00043 
00044     if (handle == NULL)
00045         return false;
00046 
00047     if (!ReadHeader())
00048         ifclose(handle);
00049 
00050     return handle != NULL;
00051 }
00052 
00053 bool glfHandler::Create(const String & filename)
00054 {
00055     // glf is in BGZF format.
00056     handle = ifopen(filename, "wb", InputFile::BGZF);
00057 
00058     if (handle == NULL)
00059         return false;
00060 
00061     WriteHeader();
00062 
00063     return handle != NULL;
00064 }
00065 
00066 bool glfHandler::isOpen()
00067 {
00068     return handle != NULL;
00069 }
00070 
00071 bool glfHandler::ReadHeader()
00072 {
00073     if (handle == NULL)
00074         return false;
00075 
00076     char magicNumber[4];
00077 
00078     if (ifread(handle, magicNumber, 4) != 4)
00079     {
00080         errorMsg = "unexpected end of file";
00081         return false;
00082     }
00083 
00084     if (magicNumber[0] != 'G' || magicNumber[1] != 'L' || magicNumber[2] != 'F')
00085     {
00086         errorMsg = "invalid format";
00087         return false;
00088     }
00089 
00090     if (magicNumber[3] != 3)
00091     {
00092         errorMsg = "unsupported version";
00093         return false;
00094     }
00095 
00096     unsigned int headerLength = 0;
00097 
00098     if (ifread(handle, &headerLength, 4) != 4)
00099     {
00100         errorMsg = "unexpected end of file";
00101         return false;
00102     }
00103 
00104     if (headerLength > 1024 * 1024)
00105     {
00106         errorMsg = "header too large -- bailing";
00107         return false;
00108     }
00109 
00110     header.SetLength(headerLength + 1);
00111     header[headerLength] = 0;
00112 
00113     if (headerLength && ifread(handle, header.LockBuffer(headerLength + 1), headerLength) != headerLength)
00114     {
00115         errorMsg = "unexpected end of file";
00116         return false;
00117     }
00118 
00119     return true;
00120 }
00121 
00122 void glfHandler::Close()
00123 {
00124     if (isOpen())
00125         ifclose(handle);
00126 }
00127 
00128 void glfHandler::Rewind()
00129 {
00130     if (isOpen())
00131     {
00132         ifrewind(handle);
00133 
00134         if (!ReadHeader())
00135             ifclose(handle);
00136     }
00137 }
00138 
00139 bool glfHandler::NextSection()
00140 {
00141     int labelLength = 0;
00142 
00143     currentSection++;
00144     position = 0;
00145     if (ifread(handle, &labelLength, sizeof(int)) == sizeof(int))
00146     {
00147         ifread(handle, label.LockBuffer(labelLength+1), labelLength * sizeof(char));
00148         label.UnlockBuffer();
00149 
00150         maxPosition = 0;
00151         ifread(handle, &maxPosition, sizeof(int));
00152 
00153         return ((maxPosition > 0) && !ifeof(handle));
00154     }
00155 
00156     return false;
00157 }
00158 
00159 bool glfHandler::NextBaseEntry()
00160 {
00161     bool result = true;
00162 
00163     do
00164     {
00165         result = NextEntry();
00166     }
00167     while (result && data.recordType == 2);
00168 
00169     return result;
00170 }
00171 
00172 
00173 bool glfHandler::NextEntry()
00174 {
00175     // Read record type
00176     if (ifread(handle, &data, 1) != 1)
00177     {
00178         data.recordType = 0;
00179         position = maxPosition + 1;
00180         return false;
00181     }
00182 
00183     // printf("%d/%d\n", data.recordType, data.refBase);
00184 
00185     if (position > maxPosition)
00186         return true;
00187 
00188     switch (data.recordType)
00189     {
00190         case 0 :
00191             position = maxPosition + 1;
00192             return true;
00193         case 1 :
00194             if (ifread(handle,((char *) &data) + 1, sizeof(data) - 1) == sizeof(data) - 1)
00195             {
00196                 data.refBase = translateBase[data.refBase];
00197 
00198                 for (int i = 0; i < 10; i++)
00199                     likelihoods[i] = bQualityConvertor.toDouble(data.lk[i]);
00200 
00201                 position = position + data.offset;
00202                 return true;
00203             }
00204 
00205             // Premature end of file
00206             data.recordType = 0;
00207             position = maxPosition + 1;
00208             return false;
00209         case 2 :
00210             while (ifread(handle, ((char *) &data) + 1, sizeof(data) - 4) == sizeof(data) - 4)
00211             {
00212                 data.refBase = translateBase[data.refBase];
00213 
00214                 for (int i = 0; i < 3; i++)
00215                     likelihoods[i] = bQualityConvertor.toDouble(data.indel.lk[i]);
00216 
00217                 position = position + data.offset;
00218 
00219                 indelSequence[0].SetLength(abs(data.indel.length[0]) + 1);
00220                 indelSequence[0][abs(data.indel.length[0])] = 0;
00221                 if (ifread(handle, indelSequence[0].LockBuffer(), abs(data.indel.length[0])) != (unsigned int) abs(data.indel.length[0]))
00222                     break;
00223 
00224                 indelSequence[1].SetLength(abs(data.indel.length[1]) + 1);
00225                 indelSequence[1][abs(data.indel.length[1])] = 0;
00226                 if (ifread(handle, indelSequence[1].LockBuffer(), abs(data.indel.length[1])) != (unsigned int) abs(data.indel.length[1]))
00227                     break;
00228 
00229                 return true;
00230             }
00231 
00232             // Premature end of file
00233             data.recordType = 0;
00234             position = maxPosition + 1;
00235             return false;
00236     }
00237 
00238     return false;
00239 }
00240 
00241 glfEntry & glfEntry::operator = (glfEntry & rhs)
00242 {
00243     refBase = rhs.refBase;
00244     recordType = rhs.recordType;
00245     offset = rhs.offset;
00246     mapQuality = rhs.mapQuality;
00247 
00248     for (int i = 0; i < 10; i++)
00249         lk[i] = rhs.lk[i];
00250 
00251     minLLK = rhs.minLLK;
00252     depth = rhs.depth;
00253 
00254     return * this;
00255 }
00256 
00257 const double * glfHandler::GetLikelihoods(int pos)
00258 {
00259     if (pos == position)
00260         return likelihoods;
00261 
00262     return nullLikelihoods;
00263 }
00264 
00265 const unsigned char * glfHandler::GetLogLikelihoods(int pos)
00266 {
00267     if (pos == position)
00268         return data.lk;
00269 
00270     return nullLogLikelihoods;
00271 }
00272 
00273 char glfHandler::GetReference(int pos, char defaultBase)
00274 {
00275     if (pos == position)
00276         return data.refBase;
00277 
00278     return defaultBase;
00279 }
00280 
00281 int glfHandler::GetDepth(int pos)
00282 {
00283     if (pos == position)
00284         return data.depth;
00285 
00286     return 0;
00287 }
00288 
00289 int glfHandler::GetMapQuality(int pos)
00290 {
00291     if (pos == position)
00292         return data.mapQuality;
00293 
00294     return 0;
00295 }
00296 
00297 void glfHandler::WriteHeader(const String & headerText)
00298 {
00299     char magicNumber[4] = {'G', 'L', 'F', 3};
00300 
00301     ifwrite(handle, magicNumber, 4);
00302 
00303     unsigned int headerLength = headerText.Length();
00304 
00305     ifwrite(handle, &headerLength, 4);
00306     ifwrite(handle, (void *)(const char *) headerText, headerLength);
00307 }
00308 
00309 void glfHandler::BeginSection(const String & sectionLabel, int sectionLength)
00310 {
00311     int labelLength = sectionLabel.Length() + 1;
00312 
00313     ifwrite(handle, &labelLength, sizeof(int));
00314     ifwrite(handle, (void *)(const char *) sectionLabel, labelLength);
00315     ifwrite(handle, &sectionLength, sizeof(int));
00316 
00317     label = sectionLabel;
00318     maxPosition = sectionLength;
00319 }
00320 
00321 void glfHandler::EndSection()
00322 {
00323     char marker = 0;
00324 
00325     ifwrite(handle, &marker, sizeof(char));
00326 }
00327 
00328 void glfHandler::WriteEntry(int outputPosition)
00329 {
00330     data.offset = outputPosition - position;
00331     position = outputPosition;
00332 
00333     switch (data.recordType)
00334     {
00335         case 0 :
00336             EndSection();
00337             return;
00338         case 1 :
00339             data.refBase = backTranslateBase[data.refBase];
00340             ifwrite(handle, &data, sizeof(data));
00341             data.refBase = translateBase[data.refBase];
00342             return;
00343         case 2 :
00344             data.refBase = backTranslateBase[data.refBase];
00345             ifwrite(handle, &data, sizeof(data) - 3);
00346             data.refBase = translateBase[data.refBase];
00347 
00348             ifwrite(handle, (void *)(const char *) indelSequence[0], abs(data.indel.length[0]));
00349             ifwrite(handle, (void *)(const char *) indelSequence[1], abs(data.indel.length[1]));
00350             return;
00351     }
00352 }
00353 
Generated on Wed Nov 17 15:38:28 2010 for StatGen Software by  doxygen 1.6.3