InputFile.cpp

00001 /*
00002  *  Copyright (C) 2010-2012  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "InputFile.h"
00019 #include "StringBasics.h"
00020 #include "GzipHeader.h"
00021 #include "BgzfFileType.h"
00022 #include "BgzfFileTypeRecovery.h"
00023 #include "GzipFileType.h"
00024 #include "UncompressedFileType.h"
00025 
00026 #include <stdarg.h>
00027 
00028 InputFile::InputFile(const char * filename, const char * mode,
00029                      InputFile::ifileCompression compressionMode)
00030 {
00031     // XXX duplicate code
00032     myAttemptRecovery = false;
00033     myFileTypePtr = NULL;
00034     myBufferIndex = 0;
00035     myCurrentBufferSize = 0;
00036     myAllocatedBufferSize = DEFAULT_BUFFER_SIZE;
00037     myFileBuffer = new char[myAllocatedBufferSize];
00038     myFileName.clear();
00039 
00040     openFile(filename, mode, compressionMode);
00041 }
00042 
00043 
00044 int InputFile::readTilChar(const std::string& stopChars, std::string& stringRef)
00045 {
00046     int charRead = 0;
00047     size_t pos = std::string::npos;
00048     // Loop until the character was not found in the stop characters.
00049     while(pos == std::string::npos)
00050     {
00051         charRead = ifgetc();
00052 
00053         // First Check for EOF.  If EOF is found, just return -1
00054         if(charRead == EOF)
00055         {
00056             return(-1);
00057         }
00058         
00059         // Try to find the character in the stopChars.
00060         pos = stopChars.find(charRead);
00061 
00062         if(pos == std::string::npos)
00063         {
00064             // Didn't find a stop character and it is not an EOF, 
00065             // so add it to the string.
00066             stringRef += charRead;
00067         }
00068     }
00069     return(pos);
00070 }
00071 
00072 
00073 int InputFile::readTilChar(const std::string& stopChars)
00074 {
00075     int charRead = 0;
00076     size_t pos = std::string::npos;
00077     // Loop until the character was not found in the stop characters.
00078     while(pos == std::string::npos)
00079     {
00080         charRead = ifgetc();
00081 
00082         // First Check for EOF.  If EOF is found, just return -1
00083         if(charRead == EOF)
00084         {
00085             return(-1);
00086         }
00087         
00088         // Try to find the character in the stopChars.
00089         pos = stopChars.find(charRead);
00090     }
00091     return(pos);
00092 }
00093 
00094 
00095 int InputFile::discardLine()
00096 {
00097     int charRead = 0;
00098     // Loop until the character was not found in the stop characters.
00099     while((charRead != EOF) && (charRead != '\n'))
00100     {
00101         charRead = ifgetc();
00102     }
00103     // First Check for EOF.  If EOF is found, just return -1
00104     if(charRead == EOF)
00105     {
00106         return(-1);
00107     }
00108     return(0);
00109 }
00110 
00111 
00112 int InputFile::readLine(std::string& line)
00113 {
00114     int charRead = 0;
00115     while(!ifeof())
00116     {
00117         charRead = ifgetc();
00118         if(charRead == EOF)
00119         {
00120             return(-1);
00121         }
00122         if(charRead == '\n')
00123         {
00124             return(0);
00125         }
00126         line += charRead;
00127     }
00128     // Should never get here.
00129     return(-1);
00130 }
00131 
00132 
00133 int InputFile::readTilTab(std::string& field)
00134 {
00135     int charRead = 0;
00136     while(!ifeof())
00137     {
00138         charRead = ifgetc();
00139         if(charRead == EOF)
00140         {
00141             return(-1);
00142         }
00143         if(charRead == '\n')
00144         {
00145             return(0);
00146         }
00147         if(charRead == '\t')
00148         {
00149             return(1);
00150         }
00151         field += charRead;
00152     }
00153     return(-1);
00154 }
00155 
00156 
00157 #ifdef __ZLIB_AVAILABLE__
00158 
00159 // Open a file. Called by the constructor.
00160 // Returns true if the file was successfully opened, false otherwise.
00161 bool InputFile::openFile(const char * filename, const char * mode,
00162                          InputFile::ifileCompression compressionMode)
00163 {
00164     //
00165     // if recovering, we don't want to issue big readaheads, since
00166     // that interferes with the decompression - we only want to 
00167     // decompress one at a time, and handle the exceptions immediately
00168     // rather than at some indeterminate point in time.
00169     //
00170     if(myAttemptRecovery) {
00171         bufferReads(1);
00172     }
00173     // If a file is for write, just open a new file.
00174     if (mode[0] == 'w' || mode[0] == 'W')
00175     {
00176         openFileUsingMode(filename, mode, compressionMode);
00177     }
00178     else
00179     {
00180         // Check if reading from stdin.
00181         if((strcmp(filename, "-") == 0) || (strcmp(filename, "-.gz") == 0))
00182         {
00183             // Reading from stdin, open it based on the 
00184             // compression mode.
00185             openFileUsingMode(filename, mode, compressionMode);
00186         }
00187         else
00188         {
00189             // Not from stdin, so determine the file type.
00190 
00191             // Open the file read only to determine file type.
00192             FILE* filePtr = fopen(filename, "r");
00193             
00194             // If the file could not be opened, either create a new one or
00195             // return failure.
00196             if (filePtr == NULL)
00197             {
00198                 // If the mode is for read, then the file must exist, otherwise,
00199                 // create a new file.
00200                 if (mode[0] == 'r' || mode[0] == 'R')
00201                 {
00202                     // File must exist.
00203                     if (myFileTypePtr != NULL)
00204                     {
00205                         delete myFileTypePtr;
00206                         myFileTypePtr = NULL;
00207                     }
00208                     // Return false, was not opened.
00209                     return false;
00210                 }
00211                 else
00212                 {
00213                     openFileUsingMode(filename, mode, compressionMode);
00214                 }
00215             }
00216             else
00217             {
00218                 // File was successfully opened, so try to determine the
00219                 // filetype from the file.
00220                 // Read the file to see if it a gzip file.
00221                 GzipHeader gzipHeader;
00222                 bool isGzip = gzipHeader.readHeader(filePtr);
00223                 
00224                 // The file header has been read, so close the file, so it can
00225                 // be re-opened as the correct type.
00226                 fclose(filePtr);
00227 
00228                 if (isGzip)
00229                 {
00230                     // This file is a gzip file.
00231                     // Check to see if it is BGZF Compression.
00232                     if (gzipHeader.isBgzfFile())
00233                     {
00234                         // This file has BGZF Compression, so set the file
00235                         // pointer.
00236                         if(myAttemptRecovery) {
00237                             // NB: this reader will throw std::runtime_error when it recovers
00238                             myFileTypePtr = new BgzfFileTypeRecovery(filename, mode);
00239                         } else {
00240                             // use the standard bgzf reader (samtools)
00241                             myFileTypePtr = new BgzfFileType(filename, mode);
00242                         }
00243                     }
00244                     else
00245                     {
00246                         // Not BGZF, just a normal gzip.
00247                         myFileTypePtr = new GzipFileType(filename, mode);
00248                    }
00249                 }
00250                 else
00251                 {
00252                     // The file is a uncompressed, uncompressed file,
00253                     // so set the myFileTypePtr accordingly.
00254                     myFileTypePtr = new UncompressedFileType(filename, mode);
00255                 }
00256             }
00257         }
00258     }
00259     if(myFileTypePtr == NULL)
00260     {
00261         return(false);
00262     }
00263     if (!myFileTypePtr->isOpen())
00264     {
00265         // The file was not opened, so delete the pointer and set to null.
00266         delete myFileTypePtr;
00267         myFileTypePtr = NULL;
00268         return false;
00269     }
00270 
00271     if(myAllocatedBufferSize == 1)
00272     {
00273         myFileTypePtr->setBuffered(false);
00274     }
00275     else
00276     {
00277         myFileTypePtr->setBuffered(true);
00278     }
00279     myFileName = filename;
00280     return true;
00281 }
00282 
00283 
00284 // Open a file.  This method will open a file with the specified name and
00285 // mode with the fileTypePtr associated with the specified compressionMode.
00286 void InputFile::openFileUsingMode(const char * filename, const char * mode,
00287                                   ifileCompression compressionMode)
00288 {
00289     switch (compressionMode)
00290     {
00291         case GZIP:
00292             // Gzipped.
00293             myFileTypePtr = new GzipFileType(filename, mode);
00294             break;
00295         case BGZF:
00296             //
00297             // BGZF compression - recovery is possible, so use
00298             // Bgzf recovery reader if asked.
00299             //
00300             if(myAttemptRecovery && ((mode[0] == 'r') || (mode[0] == 'R')))
00301             {
00302                 // NB: this reader will throw std::runtime_error when it recovers
00303                 myFileTypePtr = new BgzfFileTypeRecovery(filename, mode);
00304             }
00305             else
00306             {
00307                 myFileTypePtr = new BgzfFileType(filename, mode);
00308             }
00309             break;
00310         case UNCOMPRESSED:
00311             myFileTypePtr = new UncompressedFileType(filename, mode);
00312             break;
00313         case InputFile::DEFAULT:
00314         default:
00315             // Check the extension. If it is ".gz", treat as gzip.
00316             // otherwise treat it as UNCOMPRESSED.
00317             int lastchar = 0;
00318             while (filename[lastchar] != 0) lastchar++;
00319             if ((lastchar >= 3 &&
00320                     filename[lastchar - 3] == '.' &&
00321                     filename[lastchar - 2] == 'g' &&
00322                     filename[lastchar - 1] == 'z'))
00323             {
00324                 // .gz files files should be gzipped.
00325                 myFileTypePtr = new GzipFileType(filename, mode);
00326             }
00327             else
00328             {
00329                 // Create an uncompressed file.
00330                 myFileTypePtr = new UncompressedFileType(filename, mode);
00331             }
00332             break;
00333     }
00334 
00335     if(myFileTypePtr == NULL)
00336     {
00337         return;
00338     }
00339     if(myAllocatedBufferSize == 1)
00340     {
00341         myFileTypePtr->setBuffered(false);
00342     }
00343     else
00344     {
00345         myFileTypePtr->setBuffered(true);
00346     }
00347 }
00348 
00349 #else
00350 
00351 // No zlib, so just treat all files as std files.
00352 // Open a file. Called by the constructor.
00353 // Returns true if the file was successfully opened, false otherwise.
00354 bool InputFile::openFile(const char * filename, const char * mode,
00355                          InputFile::ifileCompression compressionMode)
00356 {
00357     //  No zlib, so it is a uncompressed, uncompressed file.
00358     myFileTypePtr = new UncompressedFileType(filename, mode);
00359 
00360     if(myFileTypePtr == NULL)
00361     {
00362         return(false);
00363     }
00364     if (!myFileTypePtr->isOpen())
00365     {
00366         // The file was not opened, so delete the pointer and set to null.
00367         delete myFileTypePtr;
00368         myFileTypePtr = NULL;
00369         return false;
00370     }
00371     if(myAllocatedBufferSize == 1)
00372     {
00373         myFileTypePtr->setBuffered(false);
00374     }
00375     else
00376     {
00377         myFileTypePtr->setBuffered(true);
00378     }
00379     myFileName = filename;
00380     return true;
00381 }
00382 
00383 #endif
00384 
00385 
00386 InputFile::~InputFile()
00387 {
00388     delete myFileTypePtr;
00389     myFileTypePtr = NULL;
00390 
00391     if(myFileBuffer != NULL)
00392     {
00393         delete[] myFileBuffer;
00394         myFileBuffer = NULL;
00395     }
00396 }
00397 
00398 
00399 int ifprintf(IFILE output, const char * format, ...)
00400 {
00401     String buffer;
00402 
00403     va_list  ap;
00404     va_start(ap, format);
00405 
00406     buffer.vprintf(format, ap);
00407 
00408     va_end(ap);
00409 
00410     return ::ifwrite(output, (const char *) buffer, buffer.Length());
00411 }
00412 
00413 
00414 InputFile& operator << (InputFile& stream, double num)
00415 {
00416     String val;
00417     val = num;
00418     stream << val;
00419     return(stream);
00420 }
00421 
00422 
00423 InputFile& operator << (InputFile& stream, int num)
00424 {
00425     String val;
00426     val = num;
00427     stream << val;
00428     return(stream);
00429 }
00430 
00431 
00432 InputFile& operator << (InputFile& stream, unsigned int num)
00433 {
00434     String val;
00435     val = num;
00436     stream << val;
00437     return(stream);
00438 }
Generated on Mon Feb 11 13:45:18 2013 for libStatGen Software by  doxygen 1.6.3