libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 #include <stdexcept> 00018 #include <stdlib.h> 00019 #include "GlfFile.h" 00020 #include "GlfException.h" 00021 00022 // Constructor, init variables. 00023 GlfFile::GlfFile() 00024 : myFilePtr(NULL), 00025 myEndMarker() 00026 { 00027 resetFile(); 00028 } 00029 00030 00031 // Constructor, init variables and open the specified file based on the 00032 // specified mode (READ/WRITE). Default is READ.. 00033 GlfFile::GlfFile(const char* filename, OpenType mode) 00034 : myFilePtr(NULL), 00035 myEndMarker() 00036 { 00037 resetFile(); 00038 00039 bool openStatus = true; 00040 if(mode == READ) 00041 { 00042 // open the file for read. 00043 openStatus = openForRead(filename); 00044 } 00045 else 00046 { 00047 // open the file for write. 00048 openStatus = openForWrite(filename); 00049 } 00050 if(!openStatus) 00051 { 00052 // Failed to open the file - print error and abort. 00053 fprintf(stderr, "%s\n", getStatusMessage()); 00054 std::cerr << "FAILURE - EXITING!!!" << std::endl; 00055 exit(-1); 00056 } 00057 } 00058 00059 GlfFile::~GlfFile() 00060 { 00061 resetFile(); 00062 } 00063 00064 00065 // Open a glf file for reading with the specified filename. 00066 bool GlfFile::openForRead(const char * filename) 00067 { 00068 // Reset for any previously operated on files. 00069 resetFile(); 00070 00071 myFilePtr = ifopen(filename, "rb"); 00072 00073 if (myFilePtr == NULL) 00074 { 00075 std::string errorMessage = "Failed to Open "; 00076 errorMessage += filename; 00077 errorMessage += " for reading"; 00078 myStatus.setStatus(GlfStatus::FAIL_IO, errorMessage.c_str()); 00079 throw(GlfException(myStatus)); 00080 return(false); 00081 } 00082 00083 myIsOpenForRead = true; 00084 // Successfully opened the file. 00085 myStatus = GlfStatus::SUCCESS; 00086 return(true); 00087 } 00088 00089 00090 // Open a glf file for reading with the specified filename and read the 00091 // header into the specified header. 00092 bool GlfFile::openForRead(const char * filename, GlfHeader& header) 00093 { 00094 if(!openForRead(filename)) 00095 { 00096 return(false); 00097 } 00098 00099 // Read the header 00100 if(!readHeader(header)) 00101 { 00102 return(false); 00103 } 00104 return(true); 00105 } 00106 00107 00108 // Open a glf file for writing with the specified filename. 00109 bool GlfFile::openForWrite(const char * filename, bool compressed) 00110 { 00111 // Reset for any previously operated on files. 00112 resetFile(); 00113 00114 if(compressed) 00115 { 00116 myFilePtr = ifopen(filename, "wb", InputFile::BGZF); 00117 } 00118 else 00119 { 00120 myFilePtr = ifopen(filename, "wb", InputFile::UNCOMPRESSED); 00121 } 00122 00123 if (myFilePtr == NULL) 00124 { 00125 std::string errorMessage = "Failed to Open "; 00126 errorMessage += filename; 00127 errorMessage += " for writing"; 00128 myStatus.setStatus(GlfStatus::FAIL_IO, errorMessage.c_str()); 00129 throw(GlfException(myStatus)); 00130 return(false); 00131 } 00132 00133 myIsOpenForWrite = true; 00134 00135 // Successfully opened the file. 00136 myStatus = GlfStatus::SUCCESS; 00137 return(true); 00138 } 00139 00140 00141 // Close the file if there is one open. 00142 void GlfFile::close() 00143 { 00144 // Resetting the file will close it if it is open, and 00145 // will reset all other variables. 00146 resetFile(); 00147 } 00148 00149 00150 // Returns whether or not the end of the file has been reached. 00151 // return: int - true = EOF; false = not eof. 00152 bool GlfFile::isEOF() 00153 { 00154 if (myFilePtr != NULL) 00155 { 00156 // File Pointer is set, so return if eof. 00157 return(ifeof(myFilePtr)); 00158 } 00159 // File pointer is not set, so return true, eof. 00160 return true; 00161 } 00162 00163 00164 // Read the header from the currently opened file. 00165 bool GlfFile::readHeader(GlfHeader& header) 00166 { 00167 if(myIsOpenForRead == false) 00168 { 00169 // File is not open for read 00170 myStatus.setStatus(GlfStatus::FAIL_ORDER, 00171 "Cannot read header since the file is not open for reading"); 00172 throw(GlfException(myStatus)); 00173 return(false); 00174 } 00175 00176 if(myNextSection != HEADER) 00177 { 00178 // The header has already been read. 00179 myStatus.setStatus(GlfStatus::FAIL_ORDER, 00180 "Cannot read header since it has already been read."); 00181 throw(GlfException(myStatus)); 00182 return(false); 00183 } 00184 00185 if(header.read(myFilePtr)) 00186 { 00187 // The header has now been successfully read. 00188 myNextSection = REF_SECTION; 00189 myStatus = GlfStatus::SUCCESS; 00190 return(true); 00191 } 00192 myStatus.setStatus(GlfStatus::UNKNOWN, 00193 "Failed to read the header."); 00194 throw(GlfException(myStatus)); 00195 return(false); 00196 } 00197 00198 00199 // Write the header to the currently opened file. 00200 bool GlfFile::writeHeader(GlfHeader& header) 00201 { 00202 if(myIsOpenForWrite == false) 00203 { 00204 // File is not open for write 00205 // -OR- 00206 // The header has already been written. 00207 myStatus.setStatus(GlfStatus::FAIL_ORDER, 00208 "Cannot write header since the file is not open for writing"); 00209 throw(GlfException(myStatus)); 00210 return(false); 00211 } 00212 00213 if(myNextSection != HEADER) 00214 { 00215 // The header has already been written. 00216 myStatus.setStatus(GlfStatus::FAIL_ORDER, 00217 "Cannot write header since it has already been written"); 00218 throw(GlfException(myStatus)); 00219 return(false); 00220 } 00221 00222 if(header.write(myFilePtr)) 00223 { 00224 // The header has now been successfully written. 00225 myNextSection = REF_SECTION; 00226 myStatus = GlfStatus::SUCCESS; 00227 return(true); 00228 } 00229 00230 // return the status. 00231 myStatus.setStatus(GlfStatus::UNKNOWN, 00232 "Failed to write the header."); 00233 throw(GlfException(myStatus)); 00234 return(false); 00235 } 00236 00237 00238 // Gets the next reference section from the file & stores it in the 00239 // passed in section. It will read until a new section is found. 00240 bool GlfFile::getNextRefSection(GlfRefSection& refSection) 00241 { 00242 if(myIsOpenForRead == false) 00243 { 00244 // File is not open for read 00245 myStatus.setStatus(GlfStatus::FAIL_ORDER, 00246 "Cannot read reference section since the file is not open for reading"); 00247 throw(GlfException(myStatus)); 00248 return(false); 00249 } 00250 00251 if(myNextSection == HEADER) 00252 { 00253 // The header has not yet been read. 00254 // TODO - maybe just read the header. 00255 myStatus.setStatus(GlfStatus::FAIL_ORDER, 00256 "Cannot read reference section since the header has not been read."); 00257 throw(GlfException(myStatus)); 00258 return(false); 00259 } 00260 00261 // Keep reading until the next section is found. 00262 if(myNextSection == RECORD) 00263 { 00264 GlfRecord record; 00265 while(getNextRecord(record)) 00266 { 00267 // Nothing to do, with the record. 00268 } 00269 } 00270 00271 // Check for end of file. If end of file, return false. 00272 if(isEOF()) 00273 { 00274 return(false); 00275 } 00276 00277 if(myNextSection != REF_SECTION) 00278 { 00279 // Failed reading all the records, so throw exception. 00280 myStatus.setStatus(GlfStatus::FAIL_IO, 00281 "Failed to get to a reference section."); 00282 throw(GlfException(myStatus)); 00283 return(false); 00284 } 00285 00286 // Ready to read the section: 00287 if(refSection.read(myFilePtr)) 00288 { 00289 myStatus = GlfStatus::SUCCESS; 00290 // Next a record should be read. 00291 myNextSection = RECORD; 00292 return(true); 00293 } 00294 00295 // If it is the EOF, just return false. 00296 if(isEOF()) 00297 { 00298 return(false); 00299 } 00300 myStatus.setStatus(GlfStatus::UNKNOWN, 00301 "Failed reading a reference section from the file."); 00302 throw(GlfException(myStatus)); 00303 return(false); 00304 } 00305 00306 00307 // Write the reference section to the file. 00308 bool GlfFile::writeRefSection(const GlfRefSection& refSection) 00309 { 00310 if(myIsOpenForWrite == false) 00311 { 00312 // File is not open for write 00313 myStatus.setStatus(GlfStatus::FAIL_ORDER, 00314 "Cannot write reference section since the file is not open for writing"); 00315 throw(GlfException(myStatus)); 00316 return(false); 00317 } 00318 00319 if(myNextSection == HEADER) 00320 { 00321 // The header has not been written. 00322 myStatus.setStatus(GlfStatus::FAIL_ORDER, 00323 "Cannot write reference section since the header has not been written"); 00324 throw(GlfException(myStatus)); 00325 return(false); 00326 } 00327 00328 if(myNextSection == RECORD) 00329 { 00330 // did not write a end marker record, so write one now. 00331 if(!writeRecord(myEndMarker)) 00332 { 00333 // Failed to write the end marker record. 00334 myStatus.setStatus(GlfStatus::FAIL_IO, 00335 "Failed to write end of chromosome/section marker."); 00336 throw(GlfException(myStatus)); 00337 return(false); 00338 } 00339 } 00340 00341 if(myNextSection != REF_SECTION) 00342 { 00343 // Not ready to write a reference section. 00344 myStatus.setStatus(GlfStatus::FAIL_IO, 00345 "Not ready for a chromosome/section header."); 00346 throw(GlfException(myStatus)); 00347 return(false); 00348 } 00349 00350 if(refSection.write(myFilePtr)) 00351 { 00352 myStatus = GlfStatus::SUCCESS; 00353 // A reference section has now been successfully written. 00354 myNextSection = RECORD; 00355 return(true); 00356 } 00357 00358 // return the status. 00359 myStatus.setStatus(GlfStatus::UNKNOWN, 00360 "Failed writing a reference section to the file."); 00361 throw(GlfException(myStatus)); 00362 return(false); 00363 } 00364 00365 00366 // Gets the next reference section from the file & stores it in the 00367 // passed in record. 00368 bool GlfFile::getNextRecord(GlfRecord& record) 00369 { 00370 if(myIsOpenForRead == false) 00371 { 00372 // File is not open for read 00373 myStatus.setStatus(GlfStatus::FAIL_ORDER, 00374 "Cannot read reference section since the file is not open for reading"); 00375 throw(GlfException(myStatus)); 00376 return(false); 00377 } 00378 00379 if(myNextSection == HEADER) 00380 { 00381 // The header has not yet been read. 00382 myStatus.setStatus(GlfStatus::FAIL_ORDER, 00383 "Cannot read reference section since the header has not been read."); 00384 throw(GlfException(myStatus)); 00385 return(false); 00386 } 00387 00388 if(myNextSection == REF_SECTION) 00389 { 00390 // The reference section has not yet been read. 00391 // TODO - maybe just read the reference section. 00392 myStatus.setStatus(GlfStatus::FAIL_ORDER, 00393 "Cannot read record since a reference section has not been read."); 00394 throw(GlfException(myStatus)); 00395 return(false); 00396 } 00397 00398 // Check for end of file. If end of file, return false. 00399 if(isEOF()) 00400 { 00401 return(false); 00402 } 00403 00404 // Read the record. 00405 if(record.read(myFilePtr)) 00406 { 00407 myStatus = GlfStatus::SUCCESS; 00408 if(record.getRecordType() != 0) 00409 { 00410 return(true); 00411 } 00412 else 00413 { 00414 // Not an error, so no exception thrown, but no more records. 00415 // The next thing is a reference section. 00416 myNextSection = REF_SECTION; 00417 return(false); 00418 } 00419 } 00420 00421 myStatus.setStatus(GlfStatus::UNKNOWN, 00422 "Failed reading a record from the file."); 00423 throw(GlfException(myStatus)); 00424 return(false); 00425 } 00426 00427 00428 // Write the reference section to the file. 00429 bool GlfFile::writeRecord(const GlfRecord& record) 00430 { 00431 if(myIsOpenForWrite == false) 00432 { 00433 // File is not open for write 00434 // -OR- 00435 // The header has already been written. 00436 myStatus.setStatus(GlfStatus::FAIL_ORDER, 00437 "Cannot write record since the file is not open for writing"); 00438 throw(GlfException(myStatus)); 00439 return(false); 00440 } 00441 00442 if(myNextSection == HEADER) 00443 { 00444 // The header has not been written. 00445 myStatus.setStatus(GlfStatus::FAIL_ORDER, 00446 "Cannot write record since the header has not been written"); 00447 throw(GlfException(myStatus)); 00448 return(false); 00449 } 00450 00451 if(myNextSection != RECORD) 00452 { 00453 // The header has not been written. 00454 myStatus.setStatus(GlfStatus::FAIL_ORDER, 00455 "Cannot write record since a reference section has not been written"); 00456 throw(GlfException(myStatus)); 00457 return(false); 00458 } 00459 00460 if(record.write(myFilePtr)) 00461 { 00462 myStatus = GlfStatus::SUCCESS; 00463 // The record has now been successfully written. 00464 00465 // Check if it was the end marker - if so, set that next a 00466 // reference section is expected. 00467 if(record.getRecordType() == 0) 00468 { 00469 myNextSection = REF_SECTION; 00470 } 00471 return(true); 00472 } 00473 00474 // return the status. 00475 myStatus.setStatus(GlfStatus::UNKNOWN, 00476 "Failed writing a record to the file."); 00477 throw(GlfException(myStatus)); 00478 return(false); 00479 } 00480 00481 00482 // Return the number of records that have been read/written so far. 00483 uint32_t GlfFile::getCurrentRecordCount() 00484 { 00485 return(myRecordCount); 00486 } 00487 00488 00489 // Reset variables for each file. 00490 void GlfFile::resetFile() 00491 { 00492 // Close the file. 00493 if (myFilePtr != NULL) 00494 { 00495 // If we already have an open file, close it. 00496 00497 // First check if this is a write file and an end record needs to 00498 // be written, which is the case if the state is RECORD. 00499 if(myIsOpenForWrite && (myNextSection == RECORD)) 00500 { 00501 if(!writeRecord(myEndMarker)) 00502 { 00503 // Failed to write the end marker record. 00504 myStatus.setStatus(GlfStatus::FAIL_IO, 00505 "Failed to write end of chromosome/section marker."); 00506 throw(GlfException(myStatus)); 00507 } 00508 } 00509 ifclose(myFilePtr); 00510 myFilePtr = NULL; 00511 } 00512 00513 myIsOpenForRead = false; 00514 myIsOpenForWrite = false; 00515 myRecordCount = 0; 00516 myStatus = GlfStatus::SUCCESS; 00517 myNextSection = HEADER; 00518 } 00519 00520 00521 // Default Constructor. 00522 GlfFileReader::GlfFileReader() 00523 { 00524 } 00525 00526 00527 // Constructor that opens the specified file for read. 00528 GlfFileReader::GlfFileReader(const char* filename) 00529 { 00530 if(!openForRead(filename)) 00531 { 00532 // Failed to open for reading - print error and abort. 00533 fprintf(stderr, "%s\n", getStatusMessage()); 00534 std::cerr << "FAILURE - EXITING!!!" << std::endl; 00535 exit(-1); 00536 } 00537 } 00538 00539 00540 GlfFileReader::~GlfFileReader() 00541 { 00542 } 00543 00544 00545 // Default Constructor. 00546 GlfFileWriter::GlfFileWriter() 00547 { 00548 } 00549 00550 00551 // Constructor that opens the specified file for write. 00552 GlfFileWriter::GlfFileWriter(const char* filename) 00553 { 00554 if(!openForWrite(filename)) 00555 { 00556 // Failed to open for reading - print error and abort. 00557 fprintf(stderr, "%s\n", getStatusMessage()); 00558 std::cerr << "FAILURE - EXITING!!!" << std::endl; 00559 exit(-1); 00560 } 00561 } 00562 00563 00564 GlfFileWriter::~GlfFileWriter() 00565 { 00566 }