00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "glfHandler.h"
00019 #include "BaseQualityHelper.h"
00020
00021 char glfHandler::translateBase[16] = {0, 1, 2, 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0};
00022 char glfHandler::backTranslateBase[5] = { 15, 1, 2, 4, 8 };
00023 unsigned char glfHandler::nullLogLikelihoods[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
00024 double glfHandler::nullLikelihoods[10] = {1., 1., 1., 1., 1., 1., 1., 1., 1., 1.};
00025
00026 glfHandler::glfHandler()
00027 {
00028 isStub = true;
00029 sections = 0;
00030 currentSection = 0;
00031 maxPosition = position = endOfSection = 0;
00032 }
00033
00034 glfHandler::~glfHandler()
00035 {
00036
00037
00038
00039 }
00040
00041 bool glfHandler::Open(const String & filename)
00042 {
00043 isStub = false;
00044 handle = ifopen(filename, "rb");
00045
00046 if (handle == NULL)
00047 {
00048 isStub = true;
00049 return false;
00050 }
00051
00052 if (!ReadHeader())
00053 ifclose(handle);
00054
00055 endOfSection = true;
00056
00057 return handle != NULL;
00058 }
00059
00060 void glfHandler::OpenStub()
00061 {
00062 isStub = true;
00063 handle = NULL;
00064
00065 endOfSection = true;
00066 data.recordType = 0;
00067 maxPosition = 1999999999;
00068 position = maxPosition + 1;
00069 }
00070
00071 bool glfHandler::Create(const String & filename)
00072 {
00073 isStub = false;
00074
00075 handle = ifopen(filename, "wb", InputFile::BGZF);
00076
00077 if (handle == NULL)
00078 {
00079 isStub = true;
00080 return false;
00081 }
00082
00083 WriteHeader();
00084
00085 return handle != NULL;
00086 }
00087
00088 bool glfHandler::isOpen()
00089 {
00090 return handle != NULL;
00091 }
00092
00093 bool glfHandler::ReadHeader()
00094 {
00095 if (isStub)
00096 return true;
00097
00098 if (handle == NULL)
00099 return false;
00100
00101 char magicNumber[4];
00102
00103 if (ifread(handle, magicNumber, 4) != 4)
00104 {
00105 errorMsg = "unexpected end of file";
00106 return false;
00107 }
00108
00109 if (magicNumber[0] != 'G' || magicNumber[1] != 'L' || magicNumber[2] != 'F')
00110 {
00111 errorMsg = "invalid format";
00112 return false;
00113 }
00114
00115 if (magicNumber[3] != 3)
00116 {
00117 errorMsg = "unsupported version";
00118 return false;
00119 }
00120
00121 unsigned int headerLength = 0;
00122
00123 if (ifread(handle, &headerLength, 4) != 4)
00124 {
00125 errorMsg = "unexpected end of file";
00126 return false;
00127 }
00128
00129 if (headerLength > 1024 * 1024)
00130 {
00131 errorMsg = "header too large -- bailing";
00132 return false;
00133 }
00134
00135 header.SetLength(headerLength + 1);
00136 header[headerLength] = 0;
00137
00138 if (headerLength && ifread(handle, header.LockBuffer(headerLength + 1), headerLength) != headerLength)
00139 {
00140 errorMsg = "unexpected end of file";
00141 return false;
00142 }
00143
00144 return true;
00145 }
00146
00147 void glfHandler::Close()
00148 {
00149 if (isOpen())
00150 ifclose(handle);
00151 }
00152
00153 void glfHandler::Rewind()
00154 {
00155 if (isOpen())
00156 {
00157 ifrewind(handle);
00158
00159 if (!ReadHeader())
00160 ifclose(handle);
00161
00162 endOfSection = true;
00163 }
00164 }
00165
00166 bool glfHandler::NextSection()
00167 {
00168 if (isStub)
00169 {
00170 endOfSection = true;
00171 data.recordType = 0;
00172 maxPosition = 1999999999;
00173 position = maxPosition + 1;
00174 return true;
00175 }
00176
00177 while (!endOfSection && !ifeof(handle))
00178 NextEntry();
00179
00180 endOfSection = false;
00181
00182 int labelLength = 0;
00183
00184 currentSection++;
00185 position = 0;
00186 if (ifread(handle, &labelLength, sizeof(int)) == sizeof(int))
00187 {
00188 ifread(handle, label.LockBuffer(labelLength+1), labelLength * sizeof(char));
00189 label.UnlockBuffer();
00190
00191 maxPosition = 0;
00192 ifread(handle, &maxPosition, sizeof(int));
00193
00194 return ((maxPosition > 0) && !ifeof(handle));
00195 }
00196
00197 return false;
00198 }
00199
00200 bool glfHandler::NextBaseEntry()
00201 {
00202 bool result = true;
00203
00204 do
00205 {
00206 result = NextEntry();
00207 }
00208 while (result && data.recordType == 2);
00209
00210 return result;
00211 }
00212
00213
00214 bool glfHandler::NextEntry()
00215 {
00216 if (isStub)
00217 return false;
00218
00219
00220 if (endOfSection || (ifread(handle, &data, 1) != 1))
00221 {
00222 endOfSection = true;
00223 data.recordType = 0;
00224 position = maxPosition + 1;
00225 return false;
00226 }
00227
00228
00229
00230 if (position > maxPosition)
00231 return true;
00232
00233 switch (data.recordType)
00234 {
00235 case 0 :
00236 endOfSection = true;
00237 position = maxPosition + 1;
00238 return true;
00239 case 1 :
00240 if (ifread(handle,((char *) &data) + 1, sizeof(data) - 1) == sizeof(data) - 1)
00241 {
00242 data.refBase = translateBase[data.refBase];
00243
00244 for (int i = 0; i < 10; i++)
00245 likelihoods[i] = bQualityConvertor.toDouble(data.lk[i]);
00246
00247 position = position + data.offset;
00248 return true;
00249 }
00250
00251
00252 data.recordType = 0;
00253 position = maxPosition + 1;
00254 return false;
00255 case 2 :
00256 while (ifread(handle, ((char *) &data) + 1, sizeof(data) - 4) == sizeof(data) - 4)
00257 {
00258 data.refBase = translateBase[data.refBase];
00259
00260 for (int i = 0; i < 3; i++)
00261 likelihoods[i] = bQualityConvertor.toDouble(data.indel.lk[i]);
00262
00263 position = position + data.offset;
00264
00265 indelSequence[0].SetLength(abs(data.indel.length[0]) + 1);
00266 indelSequence[0][abs(data.indel.length[0])] = 0;
00267 if (ifread(handle, indelSequence[0].LockBuffer(), abs(data.indel.length[0])) != (unsigned int) abs(data.indel.length[0]))
00268 break;
00269
00270 indelSequence[1].SetLength(abs(data.indel.length[1]) + 1);
00271 indelSequence[1][abs(data.indel.length[1])] = 0;
00272 if (ifread(handle, indelSequence[1].LockBuffer(), abs(data.indel.length[1])) != (unsigned int) abs(data.indel.length[1]))
00273 break;
00274
00275 return true;
00276 }
00277
00278
00279 data.recordType = 0;
00280 position = maxPosition + 1;
00281 return false;
00282 }
00283
00284 return false;
00285 }
00286
00287 glfEntry & glfEntry::operator = (glfEntry & rhs)
00288 {
00289 refBase = rhs.refBase;
00290 recordType = rhs.recordType;
00291 offset = rhs.offset;
00292 mapQuality = rhs.mapQuality;
00293
00294 for (int i = 0; i < 10; i++)
00295 lk[i] = rhs.lk[i];
00296
00297 minLLK = rhs.minLLK;
00298 depth = rhs.depth;
00299
00300 return * this;
00301 }
00302
00303 const double * glfHandler::GetLikelihoods(int pos)
00304 {
00305 if (pos == position)
00306 return likelihoods;
00307
00308 return nullLikelihoods;
00309 }
00310
00311 const unsigned char * glfHandler::GetLogLikelihoods(int pos)
00312 {
00313 if (pos == position)
00314 return data.lk;
00315
00316 return nullLogLikelihoods;
00317 }
00318
00319 char glfHandler::GetReference(int pos, char defaultBase)
00320 {
00321 if (pos == position)
00322 return data.refBase;
00323
00324 return defaultBase;
00325 }
00326
00327 int glfHandler::GetDepth(int pos)
00328 {
00329 if (pos == position)
00330 return data.depth;
00331
00332 return 0;
00333 }
00334
00335 int glfHandler::GetMapQuality(int pos)
00336 {
00337 if (pos == position)
00338 return data.mapQuality;
00339
00340 return 0;
00341 }
00342
00343 void glfHandler::WriteHeader(const String & headerText)
00344 {
00345 char magicNumber[4] = {'G', 'L', 'F', 3};
00346
00347 ifwrite(handle, magicNumber, 4);
00348
00349 unsigned int headerLength = headerText.Length();
00350
00351 ifwrite(handle, &headerLength, 4);
00352 ifwrite(handle, (void *)(const char *) headerText, headerLength);
00353 }
00354
00355 void glfHandler::BeginSection(const String & sectionLabel, int sectionLength)
00356 {
00357 int labelLength = sectionLabel.Length() + 1;
00358
00359 ifwrite(handle, &labelLength, sizeof(int));
00360 ifwrite(handle, (void *)(const char *) sectionLabel, labelLength);
00361 ifwrite(handle, §ionLength, sizeof(int));
00362
00363 label = sectionLabel;
00364 maxPosition = sectionLength;
00365 }
00366
00367 void glfHandler::EndSection()
00368 {
00369 char marker = 0;
00370
00371 ifwrite(handle, &marker, sizeof(char));
00372 }
00373
00374 void glfHandler::WriteEntry(int outputPosition)
00375 {
00376 data.offset = outputPosition - position;
00377 position = outputPosition;
00378
00379 switch (data.recordType)
00380 {
00381 case 0 :
00382 EndSection();
00383 return;
00384 case 1 :
00385 data.refBase = backTranslateBase[data.refBase];
00386 ifwrite(handle, &data, sizeof(data));
00387 data.refBase = translateBase[data.refBase];
00388 return;
00389 case 2 :
00390 data.refBase = backTranslateBase[data.refBase];
00391 ifwrite(handle, &data, sizeof(data) - 3);
00392 data.refBase = translateBase[data.refBase];
00393
00394 ifwrite(handle, (void *)(const char *) indelSequence[0], abs(data.indel.length[0]));
00395 ifwrite(handle, (void *)(const char *) indelSequence[1], abs(data.indel.length[1]));
00396 return;
00397 }
00398 }
00399