InputFile.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "InputFile.h"
00019 #include "StringBasics.h"
00020 #include "GzipHeader.h"
00021 #include "BgzfFileType.h"
00022 #include "BgzfFileTypeRecovery.h"
00023 #include "GzipFileType.h"
00024 #include "UncompressedFileType.h"
00025 
00026 #include <stdarg.h>
00027 
00028 InputFile::InputFile(const char * filename, const char * mode,
00029                      InputFile::ifileCompression compressionMode)
00030 {
00031     // XXX duplicate code
00032     myAttemptRecovery = false;
00033     myFileTypePtr = NULL;
00034     myBufferIndex = 0;
00035     myCurrentBufferSize = 0;
00036     myAllocatedBufferSize = DEFAULT_BUFFER_SIZE;
00037     myFileBuffer = new char[myAllocatedBufferSize];
00038     myFileName.clear();
00039 
00040     openFile(filename, mode, compressionMode);
00041 }
00042 
00043 
00044 #ifdef __ZLIB_AVAILABLE__
00045 
00046 // Open a file. Called by the constructor.
00047 // Returns true if the file was successfully opened, false otherwise.
00048 bool InputFile::openFile(const char * filename, const char * mode,
00049                          InputFile::ifileCompression compressionMode)
00050 {
00051     //
00052     // if recovering, we don't want to issue big readaheads, since
00053     // that interferes with the decompression - we only want to 
00054     // decompress one at a time, and handle the exceptions immediately
00055     // rather than at some indeterminate point in time.
00056     //
00057     if(myAttemptRecovery) {
00058         bufferReads(1);
00059     }
00060     // If a file is for write, just open a new file.
00061     if (mode[0] == 'w' || mode[0] == 'W')
00062     {
00063         openFileUsingMode(filename, mode, compressionMode);
00064     }
00065     else
00066     {
00067         // Check if reading from stdin.
00068         if((strcmp(filename, "-") == 0) || (strcmp(filename, "-.gz") == 0))
00069         {
00070             // Reading from stdin, open it based on the 
00071             // compression mode.
00072             openFileUsingMode(filename, mode, compressionMode);
00073         }
00074         else
00075         {
00076             // Not from stdin, so determine the file type.
00077 
00078             // Open the file read only to determine file type.
00079             FILE* filePtr = fopen(filename, "r");
00080             
00081             // If the file could not be opened, either create a new one or
00082             // return failure.
00083             if (filePtr == NULL)
00084             {
00085                 // If the mode is for read, then the file must exist, otherwise,
00086                 // create a new file.
00087                 if (mode[0] == 'r' || mode[0] == 'R')
00088                 {
00089                     // File must exist.
00090                     if (myFileTypePtr != NULL)
00091                     {
00092                         delete myFileTypePtr;
00093                         myFileTypePtr = NULL;
00094                     }
00095                     // Return false, was not opened.
00096                     return false;
00097                 }
00098                 else
00099                 {
00100                     openFileUsingMode(filename, mode, compressionMode);
00101                 }
00102             }
00103             else
00104             {
00105                 // File was successfully opened, so try to determine the
00106                 // filetype from the file.
00107                 // Read the file to see if it a gzip file.
00108                 GzipHeader gzipHeader;
00109                 bool isGzip = gzipHeader.readHeader(filePtr);
00110                 
00111                 // The file header has been read, so close the file, so it can
00112                 // be re-opened as the correct type.
00113                 fclose(filePtr);
00114 
00115                 if (isGzip)
00116                 {
00117                     // This file is a gzip file.
00118                     // Check to see if it is BGZF Compression.
00119                     if (gzipHeader.isBgzfFile())
00120                     {
00121                         // This file has BGZF Compression, so set the file
00122                         // pointer.
00123                         if(myAttemptRecovery) {
00124                             // NB: this reader will throw std::runtime_error when it recovers
00125                             myFileTypePtr = new BgzfFileTypeRecovery(filename, mode);
00126                         } else {
00127                             // use the standard bgzf reader (samtools)
00128                             myFileTypePtr = new BgzfFileType(filename, mode);
00129                         }
00130                     }
00131                     else
00132                     {
00133                         // Not BGZF, just a normal gzip.
00134                         myFileTypePtr = new GzipFileType(filename, mode);
00135                    }
00136                 }
00137                 else
00138                 {
00139                     // The file is a uncompressed, uncompressed file,
00140                     // so set the myFileTypePtr accordingly.
00141                     myFileTypePtr = new UncompressedFileType(filename, mode);
00142                 }
00143             }
00144         }
00145     }
00146     if(myFileTypePtr == NULL)
00147     {
00148         return(false);
00149     }
00150     if (!myFileTypePtr->isOpen())
00151     {
00152         // The file was not opened, so delete the pointer and set to null.
00153         delete myFileTypePtr;
00154         myFileTypePtr = NULL;
00155         return false;
00156     }
00157 
00158     if(myAllocatedBufferSize == 1)
00159     {
00160         myFileTypePtr->setBuffered(false);
00161     }
00162     else
00163     {
00164         myFileTypePtr->setBuffered(true);
00165     }
00166     myFileName = filename;
00167     return true;
00168 }
00169 
00170 
00171 // Open a file.  This method will open a file with the specified name and
00172 // mode with the fileTypePtr associated with the specified compressionMode.
00173 void InputFile::openFileUsingMode(const char * filename, const char * mode,
00174                                   ifileCompression compressionMode)
00175 {
00176     switch (compressionMode)
00177     {
00178         case GZIP:
00179             // Gzipped.
00180             myFileTypePtr = new GzipFileType(filename, mode);
00181             break;
00182         case BGZF:
00183             //
00184             // BGZF compression - recovery is possible, so use
00185             // Bgzf recovery reader if asked.
00186             //
00187             if(myAttemptRecovery && index(mode,'r') ) {
00188                 // NB: this reader will throw std::runtime_error when it recovers
00189                 myFileTypePtr = new BgzfFileTypeRecovery(filename, mode);
00190             } else {
00191                 myFileTypePtr = new BgzfFileType(filename, mode);
00192             }
00193             break;
00194         case UNCOMPRESSED:
00195             myFileTypePtr = new UncompressedFileType(filename, mode);
00196             break;
00197         case InputFile::DEFAULT:
00198         default:
00199             // Check the extension. If it is ".gz", treat as gzip.
00200             // otherwise treat it as UNCOMPRESSED.
00201             int lastchar = 0;
00202             while (filename[lastchar] != 0) lastchar++;
00203             if ((lastchar >= 3 &&
00204                     filename[lastchar - 3] == '.' &&
00205                     filename[lastchar - 2] == 'g' &&
00206                     filename[lastchar - 1] == 'z'))
00207             {
00208                 // .gz files files should be gzipped.
00209                 myFileTypePtr = new GzipFileType(filename, mode);
00210             }
00211             else
00212             {
00213                 // Create an uncompressed file.
00214                 myFileTypePtr = new UncompressedFileType(filename, mode);
00215             }
00216             break;
00217     }
00218 
00219     if(myFileTypePtr == NULL)
00220     {
00221         return;
00222     }
00223     if(myAllocatedBufferSize == 1)
00224     {
00225         myFileTypePtr->setBuffered(false);
00226     }
00227     else
00228     {
00229         myFileTypePtr->setBuffered(true);
00230     }
00231 }
00232 
00233 #else
00234 
00235 // No zlib, so just treat all files as std files.
00236 // Open a file. Called by the constructor.
00237 // Returns true if the file was successfully opened, false otherwise.
00238 bool InputFile::openFile(const char * filename, const char * mode)
00239 {
00240     //  No zlib, so it is a uncompressed, uncompressed file.
00241     myFileTypePtr = new UncompressedFileType(filename, mode);
00242 
00243     if(myFileTypePtr == NULL)
00244     {
00245         return(false);
00246     }
00247     if (!myFileTypePtr->isOpen())
00248     {
00249         // The file was not opened, so delete the pointer and set to null.
00250         delete myFileTypePtr;
00251         myFileTypePtr = NULL;
00252         return false;
00253     }
00254     if(myAllocatedBufferSize == 1)
00255     {
00256         myFileTypePtr->setBuffered(false);
00257     }
00258     else
00259     {
00260         myFileTypePtr->setBuffered(true);
00261     }
00262     myFileName = filename;
00263     return true;
00264 }
00265 
00266 #endif
00267 
00268 
00269 InputFile::~InputFile()
00270 {
00271     delete myFileTypePtr;
00272     myFileTypePtr = NULL;
00273 
00274     if(myFileBuffer != NULL)
00275     {
00276         delete[] myFileBuffer;
00277         myFileBuffer = NULL;
00278     }
00279 }
00280 
00281 
00282 int ifprintf(IFILE output, const char * format, ...)
00283 {
00284     String buffer;
00285 
00286     va_list  ap;
00287     va_start(ap, format);
00288 
00289     buffer.vprintf(format, ap);
00290 
00291     va_end(ap);
00292 
00293     return ::ifwrite(output, (const char *) buffer, buffer.Length());
00294 }
00295 
00296 
Generated on Tue Aug 23 18:19:05 2011 for libStatGen Software by  doxygen 1.6.3