libStatGen Software  1
glfHandler.cpp
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "glfHandler.h"
00019 #include "BaseQualityHelper.h"
00020 
00021 char glfHandler::translateBase[16] = {0, 1, 2, 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0};
00022 char glfHandler::backTranslateBase[5] = { 15, 1, 2, 4, 8 };
00023 unsigned char glfHandler::nullLogLikelihoods[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
00024 double glfHandler::nullLikelihoods[10] = {1., 1., 1., 1., 1., 1., 1., 1., 1., 1.};
00025 
00026 glfHandler::glfHandler()
00027 {
00028     isStub = true;
00029     sections = 0;
00030     currentSection = 0;
00031     maxPosition = position = endOfSection = 0;
00032 }
00033 
00034 glfHandler::~glfHandler()
00035 {
00036     // Not safe to close the file here in case a copy of the file was generated
00037     // if (isOpen())
00038     //   Close();
00039 }
00040 
00041 bool glfHandler::Open(const String & filename)
00042 {
00043     isStub = false;
00044     handle = ifopen(filename, "rb");
00045 
00046     if (handle == NULL)
00047     {
00048         isStub = true;
00049         return false;
00050     }
00051 
00052     if (!ReadHeader())
00053         ifclose(handle);
00054 
00055     endOfSection = true;
00056 
00057     return handle != NULL;
00058 }
00059 
00060 void glfHandler::OpenStub()
00061 {
00062     isStub = true;
00063     handle = NULL;
00064 
00065     endOfSection = true;
00066     data.recordType = 0;
00067     maxPosition = 1999999999;
00068     position = maxPosition + 1;
00069 }
00070 
00071 bool glfHandler::Create(const String & filename)
00072 {
00073     isStub = false;
00074     // glf is in BGZF format.
00075     handle = ifopen(filename, "wb", InputFile::BGZF);
00076 
00077     if (handle == NULL)
00078     {
00079         isStub = true;
00080         return false;
00081     }
00082 
00083     WriteHeader();
00084 
00085     return handle != NULL;
00086 }
00087 
00088 bool glfHandler::isOpen()
00089 {
00090     return handle != NULL;
00091 }
00092 
00093 bool glfHandler::ReadHeader()
00094 {
00095     if (isStub)
00096         return true;
00097 
00098     if (handle == NULL)
00099         return false;
00100 
00101     char magicNumber[4];
00102 
00103     if (ifread(handle, magicNumber, 4) != 4)
00104     {
00105         errorMsg = "unexpected end of file";
00106         return false;
00107     }
00108 
00109     if (magicNumber[0] != 'G' || magicNumber[1] != 'L' || magicNumber[2] != 'F')
00110     {
00111         errorMsg = "invalid format";
00112         return false;
00113     }
00114 
00115     if (magicNumber[3] != 3)
00116     {
00117         errorMsg = "unsupported version";
00118         return false;
00119     }
00120 
00121     unsigned int headerLength = 0;
00122 
00123     if (ifread(handle, &headerLength, 4) != 4)
00124     {
00125         errorMsg = "unexpected end of file";
00126         return false;
00127     }
00128 
00129     if (headerLength > 1024 * 1024)
00130     {
00131         errorMsg = "header too large -- bailing";
00132         return false;
00133     }
00134 
00135     header.SetLength(headerLength + 1);
00136     header[headerLength] = 0;
00137 
00138     if (headerLength && ifread(handle, header.LockBuffer(headerLength + 1), headerLength) != headerLength)
00139     {
00140         errorMsg = "unexpected end of file";
00141         return false;
00142     }
00143 
00144     return true;
00145 }
00146 
00147 void glfHandler::Close()
00148 {
00149     if (isOpen())
00150         ifclose(handle);
00151 }
00152 
00153 void glfHandler::Rewind()
00154 {
00155     if (isOpen())
00156     {
00157         ifrewind(handle);
00158 
00159         if (!ReadHeader())
00160             ifclose(handle);
00161 
00162         endOfSection = true;
00163     }
00164 }
00165 
00166 bool glfHandler::NextSection()
00167 {
00168     if (isStub)
00169     {
00170         endOfSection = true;
00171         data.recordType = 0;
00172         maxPosition = 1999999999;
00173         position = maxPosition + 1;
00174         return true;
00175     }
00176 
00177     while (!endOfSection && !ifeof(handle))
00178         NextEntry();
00179 
00180     endOfSection = false;
00181 
00182     int labelLength = 0;
00183 
00184     currentSection++;
00185     position = 0;
00186     if (ifread(handle, &labelLength, sizeof(int)) == sizeof(int))
00187     {
00188         ifread(handle, label.LockBuffer(labelLength+1), labelLength * sizeof(char));
00189         label.UnlockBuffer();
00190 
00191         maxPosition = 0;
00192         ifread(handle, &maxPosition, sizeof(int));
00193 
00194         return ((maxPosition > 0) && !ifeof(handle));
00195     }
00196 
00197     return false;
00198 }
00199 
00200 bool glfHandler::NextBaseEntry()
00201 {
00202     bool result = true;
00203 
00204     do
00205     {
00206         result = NextEntry();
00207     }
00208     while (result && data.recordType == 2);
00209 
00210     return result;
00211 }
00212 
00213 
00214 bool glfHandler::NextEntry()
00215 {
00216     if (isStub)
00217         return false;
00218 
00219     // Read record type
00220     if (endOfSection || (ifread(handle, &data, 1) != 1))
00221     {
00222         endOfSection = true;
00223         data.recordType = 0;
00224         position = maxPosition + 1;
00225         return false;
00226     }
00227 
00228     // printf("%d/%d\n", data.recordType, data.refBase);
00229 
00230     if (position > maxPosition)
00231         return true;
00232 
00233     switch (data.recordType)
00234     {
00235         case 0 :
00236             endOfSection = true;
00237             position = maxPosition + 1;
00238             return true;
00239         case 1 :
00240             if (ifread(handle,((char *) &data) + 1, sizeof(data) - 1) == sizeof(data) - 1)
00241             {
00242                 data.refBase = translateBase[data.refBase];
00243 
00244                 for (int i = 0; i < 10; i++)
00245                     likelihoods[i] = bQualityConvertor.toDouble(data.lk[i]);
00246 
00247                 position = position + data.offset;
00248                 return true;
00249             }
00250 
00251             // Premature end of file
00252             data.recordType = 0;
00253             position = maxPosition + 1;
00254             return false;
00255         case 2 :
00256             while (ifread(handle, ((char *) &data) + 1, sizeof(data) - 4) == sizeof(data) - 4)
00257             {
00258                 data.refBase = translateBase[data.refBase];
00259 
00260                 for (int i = 0; i < 3; i++)
00261                     likelihoods[i] = bQualityConvertor.toDouble(data.indel.lk[i]);
00262 
00263                 position = position + data.offset;
00264 
00265                 indelSequence[0].SetLength(abs(data.indel.length[0]) + 1);
00266                 indelSequence[0][abs(data.indel.length[0])] = 0;
00267                 if (ifread(handle, indelSequence[0].LockBuffer(), abs(data.indel.length[0])) != (unsigned int) abs(data.indel.length[0]))
00268                     break;
00269 
00270                 indelSequence[1].SetLength(abs(data.indel.length[1]) + 1);
00271                 indelSequence[1][abs(data.indel.length[1])] = 0;
00272                 if (ifread(handle, indelSequence[1].LockBuffer(), abs(data.indel.length[1])) != (unsigned int) abs(data.indel.length[1]))
00273                     break;
00274 
00275                 return true;
00276             }
00277 
00278             // Premature end of file
00279             data.recordType = 0;
00280             position = maxPosition + 1;
00281             return false;
00282     }
00283 
00284     return false;
00285 }
00286 
00287 glfEntry & glfEntry::operator = (glfEntry & rhs)
00288 {
00289     refBase = rhs.refBase;
00290     recordType = rhs.recordType;
00291     offset = rhs.offset;
00292     mapQuality = rhs.mapQuality;
00293 
00294     for (int i = 0; i < 10; i++)
00295         lk[i] = rhs.lk[i];
00296 
00297     minLLK = rhs.minLLK;
00298     depth = rhs.depth;
00299 
00300     return * this;
00301 }
00302 
00303 const double * glfHandler::GetLikelihoods(int pos)
00304 {
00305     if (pos == position)
00306         return likelihoods;
00307 
00308     return nullLikelihoods;
00309 }
00310 
00311 const unsigned char * glfHandler::GetLogLikelihoods(int pos)
00312 {
00313     if (pos == position)
00314         return data.lk;
00315 
00316     return nullLogLikelihoods;
00317 }
00318 
00319 char glfHandler::GetReference(int pos, char defaultBase)
00320 {
00321     if (pos == position)
00322         return data.refBase;
00323 
00324     return defaultBase;
00325 }
00326 
00327 int glfHandler::GetDepth(int pos)
00328 {
00329     if (pos == position)
00330         return data.depth;
00331 
00332     return 0;
00333 }
00334 
00335 int glfHandler::GetMapQuality(int pos)
00336 {
00337     if (pos == position)
00338         return data.mapQuality;
00339 
00340     return 0;
00341 }
00342 
00343 void glfHandler::WriteHeader(const String & headerText)
00344 {
00345     char magicNumber[4] = {'G', 'L', 'F', 3};
00346 
00347     ifwrite(handle, magicNumber, 4);
00348 
00349     unsigned int headerLength = headerText.Length();
00350 
00351     ifwrite(handle, &headerLength, 4);
00352     ifwrite(handle, (void *)(const char *) headerText, headerLength);
00353 }
00354 
00355 void glfHandler::BeginSection(const String & sectionLabel, int sectionLength)
00356 {
00357     int labelLength = sectionLabel.Length() + 1;
00358 
00359     ifwrite(handle, &labelLength, sizeof(int));
00360     ifwrite(handle, (void *)(const char *) sectionLabel, labelLength);
00361     ifwrite(handle, &sectionLength, sizeof(int));
00362 
00363     label = sectionLabel;
00364     maxPosition = sectionLength;
00365 }
00366 
00367 void glfHandler::EndSection()
00368 {
00369     char marker = 0;
00370 
00371     ifwrite(handle, &marker, sizeof(char));
00372 }
00373 
00374 void glfHandler::WriteEntry(int outputPosition)
00375 {
00376     data.offset = outputPosition - position;
00377     position = outputPosition;
00378 
00379     switch (data.recordType)
00380     {
00381         case 0 :
00382             EndSection();
00383             return;
00384         case 1 :
00385             data.refBase = backTranslateBase[data.refBase];
00386             ifwrite(handle, &data, sizeof(data));
00387             data.refBase = translateBase[data.refBase];
00388             return;
00389         case 2 :
00390             data.refBase = backTranslateBase[data.refBase];
00391             ifwrite(handle, &data, sizeof(data) - 3);
00392             data.refBase = translateBase[data.refBase];
00393 
00394             ifwrite(handle, (void *)(const char *) indelSequence[0], abs(data.indel.length[0]));
00395             ifwrite(handle, (void *)(const char *) indelSequence[1], abs(data.indel.length[1]));
00396             return;
00397     }
00398 }
00399 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends