libStatGen Software  1
GlfFile.cpp
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 #include <stdexcept>
00018 #include <stdlib.h>
00019 #include "GlfFile.h"
00020 #include "GlfException.h"
00021 
00022 // Constructor, init variables.
00023 GlfFile::GlfFile()
00024     : myFilePtr(NULL),
00025       myEndMarker()
00026 {
00027     resetFile();
00028 }
00029 
00030 
00031 // Constructor, init variables and open the specified file based on the
00032 // specified mode (READ/WRITE).  Default is READ..
00033 GlfFile::GlfFile(const char* filename, OpenType mode)
00034     : myFilePtr(NULL),
00035       myEndMarker()
00036 {
00037     resetFile();
00038 
00039     bool openStatus = true;
00040     if(mode == READ)
00041     {
00042         // open the file for read.
00043         openStatus = openForRead(filename);
00044     }
00045     else
00046     {
00047         // open the file for write.
00048         openStatus = openForWrite(filename);
00049     }
00050     if(!openStatus)
00051     {
00052         // Failed to open the file - print error and abort.
00053         fprintf(stderr, "%s\n", getStatusMessage());
00054         std::cerr << "FAILURE - EXITING!!!" << std::endl;
00055         exit(-1);
00056     }
00057 }
00058 
00059 GlfFile::~GlfFile()
00060 {
00061     resetFile();
00062 }
00063 
00064 
00065 // Open a glf file for reading with the specified filename.
00066 bool GlfFile::openForRead(const char * filename)
00067 {
00068     // Reset for any previously operated on files.
00069     resetFile();
00070 
00071     myFilePtr = ifopen(filename, "rb");
00072    
00073     if (myFilePtr == NULL)
00074     {
00075         std::string errorMessage = "Failed to Open ";
00076         errorMessage += filename;
00077         errorMessage += " for reading";
00078         myStatus.setStatus(GlfStatus::FAIL_IO, errorMessage.c_str());
00079         throw(GlfException(myStatus));
00080         return(false);
00081     }
00082 
00083     myIsOpenForRead = true;
00084     // Successfully opened the file.
00085     myStatus = GlfStatus::SUCCESS;
00086     return(true);
00087 }
00088 
00089 
00090 // Open a glf file for reading with the specified filename and read the
00091 // header into the specified header.
00092 bool GlfFile::openForRead(const char * filename, GlfHeader& header)
00093 {
00094     if(!openForRead(filename))
00095     {
00096         return(false);
00097     }
00098 
00099     // Read the header
00100     if(!readHeader(header))
00101     {
00102         return(false);
00103     }
00104     return(true);
00105 }
00106 
00107 
00108 // Open a glf file for writing with the specified filename.
00109 bool GlfFile::openForWrite(const char * filename, bool compressed)
00110 {
00111     // Reset for any previously operated on files.
00112     resetFile();
00113 
00114     if(compressed)
00115     {
00116         myFilePtr = ifopen(filename, "wb", InputFile::BGZF);
00117     }
00118     else
00119     {
00120         myFilePtr = ifopen(filename, "wb", InputFile::UNCOMPRESSED);
00121     }
00122 
00123     if (myFilePtr == NULL)
00124     {
00125         std::string errorMessage = "Failed to Open ";
00126         errorMessage += filename;
00127         errorMessage += " for writing";
00128         myStatus.setStatus(GlfStatus::FAIL_IO, errorMessage.c_str());
00129         throw(GlfException(myStatus));
00130         return(false);
00131     }
00132    
00133     myIsOpenForWrite = true;
00134 
00135     // Successfully opened the file.
00136     myStatus = GlfStatus::SUCCESS;
00137     return(true);
00138 }
00139 
00140 
00141 // Close the file if there is one open.
00142 void GlfFile::close()
00143 {
00144     // Resetting the file will close it if it is open, and
00145     // will reset all other variables.
00146     resetFile();
00147 }
00148 
00149 
00150 // Returns whether or not the end of the file has been reached.
00151 // return: int - true = EOF; false = not eof.
00152 bool GlfFile::isEOF()
00153 {
00154     if (myFilePtr != NULL)
00155     {
00156         // File Pointer is set, so return if eof.
00157         return(ifeof(myFilePtr));
00158     }
00159     // File pointer is not set, so return true, eof.
00160     return true;
00161 }
00162 
00163 
00164 // Read the header from the currently opened file.
00165 bool GlfFile::readHeader(GlfHeader& header)
00166 {
00167     if(myIsOpenForRead == false)
00168     {
00169         // File is not open for read
00170         myStatus.setStatus(GlfStatus::FAIL_ORDER, 
00171                            "Cannot read header since the file is not open for reading");
00172         throw(GlfException(myStatus));
00173         return(false);
00174     }
00175 
00176     if(myNextSection != HEADER)
00177     {
00178         // The header has already been read.
00179         myStatus.setStatus(GlfStatus::FAIL_ORDER, 
00180                            "Cannot read header since it has already been read.");
00181         throw(GlfException(myStatus));
00182         return(false);
00183     }
00184 
00185     if(header.read(myFilePtr))
00186     {
00187         // The header has now been successfully read.
00188         myNextSection = REF_SECTION;
00189         myStatus = GlfStatus::SUCCESS;
00190         return(true);
00191     }
00192     myStatus.setStatus(GlfStatus::UNKNOWN, 
00193                        "Failed to read the header.");
00194     throw(GlfException(myStatus));
00195     return(false);
00196 }
00197 
00198 
00199 // Write the header to the currently opened file.
00200 bool GlfFile::writeHeader(GlfHeader& header)
00201 {
00202     if(myIsOpenForWrite == false)
00203     {
00204         // File is not open for write
00205         // -OR-
00206         // The header has already been written.
00207         myStatus.setStatus(GlfStatus::FAIL_ORDER, 
00208                            "Cannot write header since the file is not open for writing");
00209         throw(GlfException(myStatus));
00210         return(false);
00211     }
00212 
00213     if(myNextSection != HEADER)
00214     {
00215         // The header has already been written.
00216         myStatus.setStatus(GlfStatus::FAIL_ORDER, 
00217                            "Cannot write header since it has already been written");
00218         throw(GlfException(myStatus));
00219         return(false);
00220     }
00221 
00222     if(header.write(myFilePtr))
00223     {
00224         // The header has now been successfully written.
00225         myNextSection = REF_SECTION;
00226         myStatus = GlfStatus::SUCCESS;
00227         return(true);
00228     }
00229 
00230     // return the status.
00231     myStatus.setStatus(GlfStatus::UNKNOWN, 
00232                        "Failed to write the header.");
00233     throw(GlfException(myStatus));
00234     return(false);
00235 }
00236 
00237 
00238 // Gets the next reference section from the file & stores it in the
00239 // passed in section.  It will read until a new section is found.
00240 bool GlfFile::getNextRefSection(GlfRefSection& refSection)
00241 {
00242     if(myIsOpenForRead == false)
00243     {
00244         // File is not open for read
00245         myStatus.setStatus(GlfStatus::FAIL_ORDER, 
00246                            "Cannot read reference section since the file is not open for reading");
00247         throw(GlfException(myStatus));
00248         return(false);
00249     }
00250 
00251     if(myNextSection == HEADER)
00252     {
00253         // The header has not yet been read.
00254         // TODO - maybe just read the header.
00255         myStatus.setStatus(GlfStatus::FAIL_ORDER, 
00256                            "Cannot read reference section since the header has not been read.");
00257         throw(GlfException(myStatus));
00258         return(false);
00259     }
00260 
00261     // Keep reading until the next section is found.
00262     if(myNextSection == RECORD)
00263     {
00264         GlfRecord record;
00265         while(getNextRecord(record))
00266         {
00267             // Nothing to do, with the record.
00268         }
00269     }
00270 
00271     // Check for end of file.  If end of file, return false.
00272     if(isEOF())
00273     {
00274         return(false);
00275     }
00276 
00277     if(myNextSection != REF_SECTION)
00278     {
00279         // Failed reading all the records, so throw exception.
00280         myStatus.setStatus(GlfStatus::FAIL_IO, 
00281                            "Failed to get to a reference section.");
00282         throw(GlfException(myStatus));
00283         return(false);
00284     }
00285 
00286     // Ready to read the section:
00287     if(refSection.read(myFilePtr))
00288     {
00289         myStatus = GlfStatus::SUCCESS;
00290         // Next a record should be read.
00291         myNextSection = RECORD;
00292         return(true);
00293     }
00294 
00295     // If it is the EOF, just return false.
00296     if(isEOF())
00297     {
00298         return(false);
00299     }
00300     myStatus.setStatus(GlfStatus::UNKNOWN, 
00301                        "Failed reading a reference section from the file.");
00302     throw(GlfException(myStatus));
00303     return(false);
00304 }
00305 
00306 
00307 // Write the reference section to the file.
00308 bool GlfFile::writeRefSection(const GlfRefSection& refSection)
00309 {
00310     if(myIsOpenForWrite == false)
00311     {
00312         // File is not open for write
00313         myStatus.setStatus(GlfStatus::FAIL_ORDER, 
00314                            "Cannot write reference section since the file is not open for writing");
00315         throw(GlfException(myStatus));
00316         return(false);
00317     }
00318 
00319     if(myNextSection == HEADER)
00320     {
00321         // The header has not been written.
00322         myStatus.setStatus(GlfStatus::FAIL_ORDER, 
00323                            "Cannot write reference section since the header has not been written");
00324         throw(GlfException(myStatus));
00325        return(false);
00326     }
00327 
00328     if(myNextSection == RECORD)
00329     {
00330         // did not write a end marker record, so write one now.
00331         if(!writeRecord(myEndMarker))
00332         {
00333             // Failed to write the end marker record.
00334             myStatus.setStatus(GlfStatus::FAIL_IO,
00335                                "Failed to write end of chromosome/section marker.");
00336             throw(GlfException(myStatus));
00337             return(false);
00338         }
00339     }
00340 
00341     if(myNextSection != REF_SECTION)
00342     {
00343         // Not ready to write a reference section.
00344         myStatus.setStatus(GlfStatus::FAIL_IO,
00345                            "Not ready for a chromosome/section header.");
00346         throw(GlfException(myStatus));
00347         return(false);
00348     }
00349 
00350     if(refSection.write(myFilePtr))
00351     {
00352         myStatus = GlfStatus::SUCCESS;
00353         // A reference section has now been successfully written.
00354         myNextSection = RECORD;
00355         return(true);
00356     }
00357 
00358     // return the status.
00359     myStatus.setStatus(GlfStatus::UNKNOWN, 
00360                        "Failed writing a reference section to the file.");
00361     throw(GlfException(myStatus));
00362     return(false);    
00363 }
00364 
00365 
00366 // Gets the next reference section from the file & stores it in the
00367 // passed in record.
00368 bool GlfFile::getNextRecord(GlfRecord& record)
00369 {
00370     if(myIsOpenForRead == false)
00371     {
00372         // File is not open for read
00373         myStatus.setStatus(GlfStatus::FAIL_ORDER, 
00374                            "Cannot read reference section since the file is not open for reading");
00375         throw(GlfException(myStatus));
00376         return(false);
00377     }
00378 
00379     if(myNextSection == HEADER)
00380     {
00381         // The header has not yet been read.
00382         myStatus.setStatus(GlfStatus::FAIL_ORDER, 
00383                            "Cannot read reference section since the header has not been read.");
00384         throw(GlfException(myStatus));
00385         return(false);
00386     }
00387     
00388     if(myNextSection == REF_SECTION)
00389     {
00390         // The reference section has not yet been read.
00391         // TODO - maybe just read the reference section.
00392         myStatus.setStatus(GlfStatus::FAIL_ORDER, 
00393                            "Cannot read record since a reference section has not been read.");
00394         throw(GlfException(myStatus));
00395         return(false);
00396     }
00397 
00398     // Check for end of file.  If end of file, return false.
00399     if(isEOF())
00400     {
00401         return(false);
00402     }
00403 
00404     // Read the record.
00405     if(record.read(myFilePtr))
00406     {
00407         myStatus = GlfStatus::SUCCESS;
00408         if(record.getRecordType() != 0)
00409         {
00410             return(true);
00411         }
00412         else
00413         {
00414             // Not an error, so no exception thrown, but no more records.
00415             // The next thing is a reference section.
00416             myNextSection = REF_SECTION;
00417             return(false);
00418         }
00419     }
00420     
00421     myStatus.setStatus(GlfStatus::UNKNOWN, 
00422                        "Failed reading a record from the file.");
00423     throw(GlfException(myStatus));
00424     return(false);
00425 }
00426 
00427 
00428 // Write the reference section to the file.
00429 bool GlfFile::writeRecord(const GlfRecord& record)
00430 {
00431     if(myIsOpenForWrite == false)
00432     {
00433         // File is not open for write
00434         // -OR-
00435         // The header has already been written.
00436         myStatus.setStatus(GlfStatus::FAIL_ORDER, 
00437                            "Cannot write record since the file is not open for writing");
00438         throw(GlfException(myStatus));
00439        return(false);
00440     }
00441 
00442     if(myNextSection == HEADER)
00443     {
00444         // The header has not been written.
00445         myStatus.setStatus(GlfStatus::FAIL_ORDER, 
00446                            "Cannot write record since the header has not been written");
00447         throw(GlfException(myStatus));
00448         return(false);
00449     }
00450 
00451     if(myNextSection != RECORD)
00452     {
00453         // The header has not been written.
00454         myStatus.setStatus(GlfStatus::FAIL_ORDER, 
00455                            "Cannot write record since a reference section has not been written");
00456         throw(GlfException(myStatus));
00457         return(false);
00458     }
00459 
00460     if(record.write(myFilePtr))
00461     {
00462         myStatus = GlfStatus::SUCCESS;
00463         // The record has now been successfully written.
00464 
00465         // Check if it was the end marker - if so, set that next a 
00466         // reference section is expected.
00467         if(record.getRecordType() == 0)
00468         {
00469             myNextSection = REF_SECTION;
00470         }
00471         return(true);
00472     }
00473 
00474     // return the status.
00475     myStatus.setStatus(GlfStatus::UNKNOWN, 
00476                        "Failed writing a record to the file.");
00477     throw(GlfException(myStatus));
00478     return(false);    
00479 }
00480 
00481 
00482 // Return the number of records that have been read/written so far.
00483 uint32_t GlfFile::getCurrentRecordCount()
00484 {
00485     return(myRecordCount);
00486 }
00487 
00488 
00489 // Reset variables for each file.
00490 void GlfFile::resetFile()
00491 {
00492     // Close the file.
00493     if (myFilePtr != NULL)
00494     {
00495         // If we already have an open file, close it.
00496 
00497         // First check if this is a write file and an end record needs to
00498         // be written, which is the case if the state is RECORD.
00499         if(myIsOpenForWrite && (myNextSection == RECORD))
00500         {
00501             if(!writeRecord(myEndMarker))
00502             {
00503                 // Failed to write the end marker record.
00504                 myStatus.setStatus(GlfStatus::FAIL_IO,
00505                                    "Failed to write end of chromosome/section marker.");
00506                 throw(GlfException(myStatus));
00507             }
00508         }
00509         ifclose(myFilePtr);
00510         myFilePtr = NULL;
00511     }
00512 
00513     myIsOpenForRead = false;
00514     myIsOpenForWrite = false;
00515     myRecordCount = 0;
00516     myStatus = GlfStatus::SUCCESS;
00517     myNextSection = HEADER;
00518 }
00519 
00520 
00521 // Default Constructor.
00522 GlfFileReader::GlfFileReader()
00523 {
00524 }
00525 
00526 
00527 // Constructor that opens the specified file for read.
00528 GlfFileReader::GlfFileReader(const char* filename)
00529 {
00530     if(!openForRead(filename))
00531     {
00532         // Failed to open for reading - print error and abort.
00533         fprintf(stderr, "%s\n", getStatusMessage());
00534         std::cerr << "FAILURE - EXITING!!!" << std::endl;
00535         exit(-1);
00536     }
00537 }
00538 
00539 
00540 GlfFileReader::~GlfFileReader()
00541 {
00542 }
00543 
00544 
00545 // Default Constructor.
00546 GlfFileWriter::GlfFileWriter()
00547 {
00548 }
00549 
00550 
00551 // Constructor that opens the specified file for write.
00552 GlfFileWriter::GlfFileWriter(const char* filename)
00553 {
00554     if(!openForWrite(filename))
00555     {
00556         // Failed to open for reading - print error and abort.
00557         fprintf(stderr, "%s\n", getStatusMessage());
00558         std::cerr << "FAILURE - EXITING!!!" << std::endl;
00559         exit(-1);
00560     }
00561 }
00562 
00563 
00564 GlfFileWriter::~GlfFileWriter()
00565 {
00566 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends