libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include "glfHandler.h" 00019 #include "BaseQualityHelper.h" 00020 00021 char glfHandler::translateBase[16] = {0, 1, 2, 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0}; 00022 char glfHandler::backTranslateBase[5] = { 15, 1, 2, 4, 8 }; 00023 unsigned char glfHandler::nullLogLikelihoods[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 00024 double glfHandler::nullLikelihoods[10] = {1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}; 00025 00026 glfHandler::glfHandler() 00027 { 00028 isStub = true; 00029 sections = 0; 00030 currentSection = 0; 00031 maxPosition = position = endOfSection = 0; 00032 } 00033 00034 glfHandler::~glfHandler() 00035 { 00036 // Not safe to close the file here in case a copy of the file was generated 00037 // if (isOpen()) 00038 // Close(); 00039 } 00040 00041 bool glfHandler::Open(const String & filename) 00042 { 00043 isStub = false; 00044 handle = ifopen(filename, "rb"); 00045 00046 if (handle == NULL) 00047 { 00048 isStub = true; 00049 return false; 00050 } 00051 00052 if (!ReadHeader()) 00053 ifclose(handle); 00054 00055 endOfSection = true; 00056 00057 return handle != NULL; 00058 } 00059 00060 void glfHandler::OpenStub() 00061 { 00062 isStub = true; 00063 handle = NULL; 00064 00065 endOfSection = true; 00066 data.recordType = 0; 00067 maxPosition = 1999999999; 00068 position = maxPosition + 1; 00069 } 00070 00071 bool glfHandler::Create(const String & filename) 00072 { 00073 isStub = false; 00074 // glf is in BGZF format. 00075 handle = ifopen(filename, "wb", InputFile::BGZF); 00076 00077 if (handle == NULL) 00078 { 00079 isStub = true; 00080 return false; 00081 } 00082 00083 WriteHeader(); 00084 00085 return handle != NULL; 00086 } 00087 00088 bool glfHandler::isOpen() 00089 { 00090 return handle != NULL; 00091 } 00092 00093 bool glfHandler::ReadHeader() 00094 { 00095 if (isStub) 00096 return true; 00097 00098 if (handle == NULL) 00099 return false; 00100 00101 char magicNumber[4]; 00102 00103 if (ifread(handle, magicNumber, 4) != 4) 00104 { 00105 errorMsg = "unexpected end of file"; 00106 return false; 00107 } 00108 00109 if (magicNumber[0] != 'G' || magicNumber[1] != 'L' || magicNumber[2] != 'F') 00110 { 00111 errorMsg = "invalid format"; 00112 return false; 00113 } 00114 00115 if (magicNumber[3] != 3) 00116 { 00117 errorMsg = "unsupported version"; 00118 return false; 00119 } 00120 00121 unsigned int headerLength = 0; 00122 00123 if (ifread(handle, &headerLength, 4) != 4) 00124 { 00125 errorMsg = "unexpected end of file"; 00126 return false; 00127 } 00128 00129 if (headerLength > 1024 * 1024) 00130 { 00131 errorMsg = "header too large -- bailing"; 00132 return false; 00133 } 00134 00135 header.SetLength(headerLength + 1); 00136 header[headerLength] = 0; 00137 00138 if (headerLength && ifread(handle, header.LockBuffer(headerLength + 1), headerLength) != headerLength) 00139 { 00140 errorMsg = "unexpected end of file"; 00141 return false; 00142 } 00143 00144 return true; 00145 } 00146 00147 void glfHandler::Close() 00148 { 00149 if (isOpen()) 00150 ifclose(handle); 00151 } 00152 00153 void glfHandler::Rewind() 00154 { 00155 if (isOpen()) 00156 { 00157 ifrewind(handle); 00158 00159 if (!ReadHeader()) 00160 ifclose(handle); 00161 00162 endOfSection = true; 00163 } 00164 } 00165 00166 bool glfHandler::NextSection() 00167 { 00168 if (isStub) 00169 { 00170 endOfSection = true; 00171 data.recordType = 0; 00172 maxPosition = 1999999999; 00173 position = maxPosition + 1; 00174 return true; 00175 } 00176 00177 while (!endOfSection && !ifeof(handle)) 00178 NextEntry(); 00179 00180 endOfSection = false; 00181 00182 int labelLength = 0; 00183 00184 currentSection++; 00185 position = 0; 00186 if (ifread(handle, &labelLength, sizeof(int)) == sizeof(int)) 00187 { 00188 ifread(handle, label.LockBuffer(labelLength+1), labelLength * sizeof(char)); 00189 label.UnlockBuffer(); 00190 00191 maxPosition = 0; 00192 ifread(handle, &maxPosition, sizeof(int)); 00193 00194 return ((maxPosition > 0) && !ifeof(handle)); 00195 } 00196 00197 return false; 00198 } 00199 00200 bool glfHandler::NextBaseEntry() 00201 { 00202 bool result = true; 00203 00204 do 00205 { 00206 result = NextEntry(); 00207 } 00208 while (result && data.recordType == 2); 00209 00210 return result; 00211 } 00212 00213 00214 bool glfHandler::NextEntry() 00215 { 00216 if (isStub) 00217 return false; 00218 00219 // Read record type 00220 if (endOfSection || (ifread(handle, &data, 1) != 1)) 00221 { 00222 endOfSection = true; 00223 data.recordType = 0; 00224 position = maxPosition + 1; 00225 return false; 00226 } 00227 00228 // printf("%d/%d\n", data.recordType, data.refBase); 00229 00230 if (position > maxPosition) 00231 return true; 00232 00233 switch (data.recordType) 00234 { 00235 case 0 : 00236 endOfSection = true; 00237 position = maxPosition + 1; 00238 return true; 00239 case 1 : 00240 if (ifread(handle,((char *) &data) + 1, sizeof(data) - 1) == sizeof(data) - 1) 00241 { 00242 data.refBase = translateBase[data.refBase]; 00243 00244 for (int i = 0; i < 10; i++) 00245 likelihoods[i] = bQualityConvertor.toDouble(data.lk[i]); 00246 00247 position = position + data.offset; 00248 return true; 00249 } 00250 00251 // Premature end of file 00252 data.recordType = 0; 00253 position = maxPosition + 1; 00254 return false; 00255 case 2 : 00256 while (ifread(handle, ((char *) &data) + 1, sizeof(data) - 4) == sizeof(data) - 4) 00257 { 00258 data.refBase = translateBase[data.refBase]; 00259 00260 for (int i = 0; i < 3; i++) 00261 likelihoods[i] = bQualityConvertor.toDouble(data.indel.lk[i]); 00262 00263 position = position + data.offset; 00264 00265 indelSequence[0].SetLength(abs(data.indel.length[0]) + 1); 00266 indelSequence[0][abs(data.indel.length[0])] = 0; 00267 if (ifread(handle, indelSequence[0].LockBuffer(), abs(data.indel.length[0])) != (unsigned int) abs(data.indel.length[0])) 00268 break; 00269 00270 indelSequence[1].SetLength(abs(data.indel.length[1]) + 1); 00271 indelSequence[1][abs(data.indel.length[1])] = 0; 00272 if (ifread(handle, indelSequence[1].LockBuffer(), abs(data.indel.length[1])) != (unsigned int) abs(data.indel.length[1])) 00273 break; 00274 00275 return true; 00276 } 00277 00278 // Premature end of file 00279 data.recordType = 0; 00280 position = maxPosition + 1; 00281 return false; 00282 } 00283 00284 return false; 00285 } 00286 00287 glfEntry & glfEntry::operator = (glfEntry & rhs) 00288 { 00289 refBase = rhs.refBase; 00290 recordType = rhs.recordType; 00291 offset = rhs.offset; 00292 mapQuality = rhs.mapQuality; 00293 00294 for (int i = 0; i < 10; i++) 00295 lk[i] = rhs.lk[i]; 00296 00297 minLLK = rhs.minLLK; 00298 depth = rhs.depth; 00299 00300 return * this; 00301 } 00302 00303 const double * glfHandler::GetLikelihoods(int pos) 00304 { 00305 if (pos == position) 00306 return likelihoods; 00307 00308 return nullLikelihoods; 00309 } 00310 00311 const unsigned char * glfHandler::GetLogLikelihoods(int pos) 00312 { 00313 if (pos == position) 00314 return data.lk; 00315 00316 return nullLogLikelihoods; 00317 } 00318 00319 char glfHandler::GetReference(int pos, char defaultBase) 00320 { 00321 if (pos == position) 00322 return data.refBase; 00323 00324 return defaultBase; 00325 } 00326 00327 int glfHandler::GetDepth(int pos) 00328 { 00329 if (pos == position) 00330 return data.depth; 00331 00332 return 0; 00333 } 00334 00335 int glfHandler::GetMapQuality(int pos) 00336 { 00337 if (pos == position) 00338 return data.mapQuality; 00339 00340 return 0; 00341 } 00342 00343 void glfHandler::WriteHeader(const String & headerText) 00344 { 00345 char magicNumber[4] = {'G', 'L', 'F', 3}; 00346 00347 ifwrite(handle, magicNumber, 4); 00348 00349 unsigned int headerLength = headerText.Length(); 00350 00351 ifwrite(handle, &headerLength, 4); 00352 ifwrite(handle, (void *)(const char *) headerText, headerLength); 00353 } 00354 00355 void glfHandler::BeginSection(const String & sectionLabel, int sectionLength) 00356 { 00357 int labelLength = sectionLabel.Length() + 1; 00358 00359 ifwrite(handle, &labelLength, sizeof(int)); 00360 ifwrite(handle, (void *)(const char *) sectionLabel, labelLength); 00361 ifwrite(handle, §ionLength, sizeof(int)); 00362 00363 label = sectionLabel; 00364 maxPosition = sectionLength; 00365 } 00366 00367 void glfHandler::EndSection() 00368 { 00369 char marker = 0; 00370 00371 ifwrite(handle, &marker, sizeof(char)); 00372 } 00373 00374 void glfHandler::WriteEntry(int outputPosition) 00375 { 00376 data.offset = outputPosition - position; 00377 position = outputPosition; 00378 00379 switch (data.recordType) 00380 { 00381 case 0 : 00382 EndSection(); 00383 return; 00384 case 1 : 00385 data.refBase = backTranslateBase[data.refBase]; 00386 ifwrite(handle, &data, sizeof(data)); 00387 data.refBase = translateBase[data.refBase]; 00388 return; 00389 case 2 : 00390 data.refBase = backTranslateBase[data.refBase]; 00391 ifwrite(handle, &data, sizeof(data) - 3); 00392 data.refBase = translateBase[data.refBase]; 00393 00394 ifwrite(handle, (void *)(const char *) indelSequence[0], abs(data.indel.length[0])); 00395 ifwrite(handle, (void *)(const char *) indelSequence[1], abs(data.indel.length[1])); 00396 return; 00397 } 00398 } 00399