00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "glfHandler.h"
00019 #include "BaseQualityHelper.h"
00020
00021 char glfHandler::translateBase[16] = {0, 1, 2, 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0};
00022 char glfHandler::backTranslateBase[5] = { 15, 1, 2, 4, 8 };
00023 unsigned char glfHandler::nullLogLikelihoods[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
00024 double glfHandler::nullLikelihoods[10] = {0., 0., 0., 0., 0., 0., 0., 0., 0., 0.};
00025
00026 glfHandler::glfHandler()
00027 {
00028 sections = 0;
00029 currentSection = 0;
00030 maxPosition = position = 0;
00031 }
00032
00033 glfHandler::~glfHandler()
00034 {
00035
00036
00037
00038 }
00039
00040 bool glfHandler::Open(const String & filename)
00041 {
00042 handle = ifopen(filename, "rb");
00043
00044 if (handle == NULL)
00045 return false;
00046
00047 if (!ReadHeader())
00048 ifclose(handle);
00049
00050 return handle != NULL;
00051 }
00052
00053 bool glfHandler::Create(const String & filename)
00054 {
00055
00056 handle = ifopen(filename, "wb", InputFile::BGZF);
00057
00058 if (handle == NULL)
00059 return false;
00060
00061 WriteHeader();
00062
00063 return handle != NULL;
00064 }
00065
00066 bool glfHandler::isOpen()
00067 {
00068 return handle != NULL;
00069 }
00070
00071 bool glfHandler::ReadHeader()
00072 {
00073 if (handle == NULL)
00074 return false;
00075
00076 char magicNumber[4];
00077
00078 if (ifread(handle, magicNumber, 4) != 4)
00079 {
00080 errorMsg = "unexpected end of file";
00081 return false;
00082 }
00083
00084 if (magicNumber[0] != 'G' || magicNumber[1] != 'L' || magicNumber[2] != 'F')
00085 {
00086 errorMsg = "invalid format";
00087 return false;
00088 }
00089
00090 if (magicNumber[3] != 3)
00091 {
00092 errorMsg = "unsupported version";
00093 return false;
00094 }
00095
00096 unsigned int headerLength = 0;
00097
00098 if (ifread(handle, &headerLength, 4) != 4)
00099 {
00100 errorMsg = "unexpected end of file";
00101 return false;
00102 }
00103
00104 if (headerLength > 1024 * 1024)
00105 {
00106 errorMsg = "header too large -- bailing";
00107 return false;
00108 }
00109
00110 header.SetLength(headerLength + 1);
00111 header[headerLength] = 0;
00112
00113 if (headerLength && ifread(handle, header.LockBuffer(headerLength + 1), headerLength) != headerLength)
00114 {
00115 errorMsg = "unexpected end of file";
00116 return false;
00117 }
00118
00119 return true;
00120 }
00121
00122 void glfHandler::Close()
00123 {
00124 if (isOpen())
00125 ifclose(handle);
00126 }
00127
00128 void glfHandler::Rewind()
00129 {
00130 if (isOpen())
00131 {
00132 ifrewind(handle);
00133
00134 if (!ReadHeader())
00135 ifclose(handle);
00136 }
00137 }
00138
00139 bool glfHandler::NextSection()
00140 {
00141 int labelLength = 0;
00142
00143 currentSection++;
00144 position = 0;
00145 if (ifread(handle, &labelLength, sizeof(int)) == sizeof(int))
00146 {
00147 ifread(handle, label.LockBuffer(labelLength+1), labelLength * sizeof(char));
00148 label.UnlockBuffer();
00149
00150 maxPosition = 0;
00151 ifread(handle, &maxPosition, sizeof(int));
00152
00153 return ((maxPosition > 0) && !ifeof(handle));
00154 }
00155
00156 return false;
00157 }
00158
00159 bool glfHandler::NextBaseEntry()
00160 {
00161 bool result = true;
00162
00163 do
00164 {
00165 result = NextEntry();
00166 }
00167 while (result && data.recordType == 2);
00168
00169 return result;
00170 }
00171
00172
00173 bool glfHandler::NextEntry()
00174 {
00175
00176 if (ifread(handle, &data, 1) != 1)
00177 {
00178 data.recordType = 0;
00179 position = maxPosition + 1;
00180 return false;
00181 }
00182
00183
00184
00185 if (position > maxPosition)
00186 return true;
00187
00188 switch (data.recordType)
00189 {
00190 case 0 :
00191 position = maxPosition + 1;
00192 return true;
00193 case 1 :
00194 if (ifread(handle,((char *) &data) + 1, sizeof(data) - 1) == sizeof(data) - 1)
00195 {
00196 data.refBase = translateBase[data.refBase];
00197
00198 for (int i = 0; i < 10; i++)
00199 likelihoods[i] = bQualityConvertor.toDouble(data.lk[i]);
00200
00201 position = position + data.offset;
00202 return true;
00203 }
00204
00205
00206 data.recordType = 0;
00207 position = maxPosition + 1;
00208 return false;
00209 case 2 :
00210 while (ifread(handle, ((char *) &data) + 1, sizeof(data) - 4) == sizeof(data) - 4)
00211 {
00212 data.refBase = translateBase[data.refBase];
00213
00214 for (int i = 0; i < 3; i++)
00215 likelihoods[i] = bQualityConvertor.toDouble(data.indel.lk[i]);
00216
00217 position = position + data.offset;
00218
00219 indelSequence[0].SetLength(abs(data.indel.length[0]) + 1);
00220 indelSequence[0][abs(data.indel.length[0])] = 0;
00221 if (ifread(handle, indelSequence[0].LockBuffer(), abs(data.indel.length[0])) != (unsigned int) abs(data.indel.length[0]))
00222 break;
00223
00224 indelSequence[1].SetLength(abs(data.indel.length[1]) + 1);
00225 indelSequence[1][abs(data.indel.length[1])] = 0;
00226 if (ifread(handle, indelSequence[1].LockBuffer(), abs(data.indel.length[1])) != (unsigned int) abs(data.indel.length[1]))
00227 break;
00228
00229 return true;
00230 }
00231
00232
00233 data.recordType = 0;
00234 position = maxPosition + 1;
00235 return false;
00236 }
00237
00238 return false;
00239 }
00240
00241 glfEntry & glfEntry::operator = (glfEntry & rhs)
00242 {
00243 refBase = rhs.refBase;
00244 recordType = rhs.recordType;
00245 offset = rhs.offset;
00246 mapQuality = rhs.mapQuality;
00247
00248 for (int i = 0; i < 10; i++)
00249 lk[i] = rhs.lk[i];
00250
00251 minLLK = rhs.minLLK;
00252 depth = rhs.depth;
00253
00254 return * this;
00255 }
00256
00257 const double * glfHandler::GetLikelihoods(int pos)
00258 {
00259 if (pos == position)
00260 return likelihoods;
00261
00262 return nullLikelihoods;
00263 }
00264
00265 const unsigned char * glfHandler::GetLogLikelihoods(int pos)
00266 {
00267 if (pos == position)
00268 return data.lk;
00269
00270 return nullLogLikelihoods;
00271 }
00272
00273 char glfHandler::GetReference(int pos, char defaultBase)
00274 {
00275 if (pos == position)
00276 return data.refBase;
00277
00278 return defaultBase;
00279 }
00280
00281 int glfHandler::GetDepth(int pos)
00282 {
00283 if (pos == position)
00284 return data.depth;
00285
00286 return 0;
00287 }
00288
00289 int glfHandler::GetMapQuality(int pos)
00290 {
00291 if (pos == position)
00292 return data.mapQuality;
00293
00294 return 0;
00295 }
00296
00297 void glfHandler::WriteHeader(const String & headerText)
00298 {
00299 char magicNumber[4] = {'G', 'L', 'F', 3};
00300
00301 ifwrite(handle, magicNumber, 4);
00302
00303 unsigned int headerLength = headerText.Length();
00304
00305 ifwrite(handle, &headerLength, 4);
00306 ifwrite(handle, (void *)(const char *) headerText, headerLength);
00307 }
00308
00309 void glfHandler::BeginSection(const String & sectionLabel, int sectionLength)
00310 {
00311 int labelLength = sectionLabel.Length() + 1;
00312
00313 ifwrite(handle, &labelLength, sizeof(int));
00314 ifwrite(handle, (void *)(const char *) sectionLabel, labelLength);
00315 ifwrite(handle, §ionLength, sizeof(int));
00316
00317 label = sectionLabel;
00318 maxPosition = sectionLength;
00319 }
00320
00321 void glfHandler::EndSection()
00322 {
00323 char marker = 0;
00324
00325 ifwrite(handle, &marker, sizeof(char));
00326 }
00327
00328 void glfHandler::WriteEntry(int outputPosition)
00329 {
00330 data.offset = outputPosition - position;
00331 position = outputPosition;
00332
00333 switch (data.recordType)
00334 {
00335 case 0 :
00336 EndSection();
00337 return;
00338 case 1 :
00339 data.refBase = backTranslateBase[data.refBase];
00340 ifwrite(handle, &data, sizeof(data));
00341 data.refBase = translateBase[data.refBase];
00342 return;
00343 case 2 :
00344 data.refBase = backTranslateBase[data.refBase];
00345 ifwrite(handle, &data, sizeof(data) - 3);
00346 data.refBase = translateBase[data.refBase];
00347
00348 ifwrite(handle, (void *)(const char *) indelSequence[0], abs(data.indel.length[0]));
00349 ifwrite(handle, (void *)(const char *) indelSequence[1], abs(data.indel.length[1]));
00350 return;
00351 }
00352 }
00353