InputFile.h

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef __INPUTFILE_H__
00019 #define __INPUTFILE_H__
00020 
00021 #ifdef  __gnu_linux__
00022 #ifndef __ZLIB_AVAILABLE__
00023 #define __ZLIB_AVAILABLE__
00024 #endif
00025 #endif
00026 
00027 #include <stdio.h>
00028 #include <iostream>
00029 #include <cstring>
00030 
00031 #include "FileType.h"
00032 
00033 class InputFile
00034 {
00035 public:
00036 
00037     // DEFAULT means to use the default type to open a file for write.
00038     // The default type is equivalent to UNCOMPRESSED.
00039     enum ifileCompression {DEFAULT, UNCOMPRESSED, GZIP, BGZF};
00040 
00041     InputFile()
00042     {
00043         myFileTypePtr = NULL;
00044         myBufferIndex = 0;
00045         myCurrentBufferSize = 0;
00046     }
00047 
00048     // Destructor
00049     ~InputFile();
00050 
00051 
00052     InputFile(const char * filename, const char * mode,
00053               InputFile::ifileCompression compressionMode = InputFile::DEFAULT);
00054 
00055     // Close the file.
00056     inline int ifclose()
00057     {
00058         if (myFileTypePtr == NULL)
00059         {
00060             return EOF;
00061         }
00062         int result = myFileTypePtr->close();
00063         delete myFileTypePtr;
00064         myFileTypePtr = NULL;
00065         return result;
00066     }
00067 
00068     inline int ifread(void * buffer, unsigned int size)
00069     {
00070         // There are 3 cases:
00071         //  1) There are no bytes available in buffer.
00072         //  2) There are already size available bytes in buffer.
00073         //  3) There are bytes in buffer, but less than size.
00074 
00075         // Determine the number of available bytes in the buffer.
00076         unsigned int availableBytes = myCurrentBufferSize - myBufferIndex;
00077 
00078         // Case 1: There are no bytes available in buffer.
00079         if (availableBytes == 0)
00080         {
00081             // There are no bytes available, so just read directly from the
00082             // file into the passed in buffer.
00083             return(readFromFile(buffer, size));
00084         }
00085         // Case 2: There are already size available bytes in buffer.
00086         else if (size <= availableBytes)
00087         {
00088             //   Just copy from the buffer, increment the index and return.
00089             memcpy(buffer, myFileBuffer+myBufferIndex, size);
00090             // Increment the buffer index.
00091             myBufferIndex += size;
00092             return size;
00093         }
00094         // Case 3: There are bytes in buffer, but less than size.
00095         else
00096         {
00097             // Size > availableBytes > 0
00098             // Copy the available bytes into the buffer.
00099             memcpy(buffer, myFileBuffer+myBufferIndex, availableBytes);
00100             // Increment the buffer index.
00101             myBufferIndex += availableBytes;
00102             // Now read the rest of the bytes directly into the buffer.
00103             int totalBytes = availableBytes;
00104             totalBytes +=
00105                 readFromFile((char*)buffer+availableBytes, size - availableBytes);
00106             return(totalBytes);
00107         }
00108     }
00109 
00110 
00111     // Get a character from the file.  Read a character from the internal
00112     // buffer, or if the end of the buffer has been reached, read from the
00113     // file into the buffer and return index 0.
00114     inline int ifgetc()
00115     {
00116         if (myBufferIndex >= myCurrentBufferSize)
00117         {
00118             // at the last index, read a new buffer.
00119             myCurrentBufferSize = readFromFile(myFileBuffer, MAX_BUFFER_SIZE);
00120             myBufferIndex = 0;
00121         }
00122         // If the buffer index is still greater than or equal to the
00123         // myCurrentBufferSize, then we failed to read the file - return EOF.
00124         if (myBufferIndex >= myCurrentBufferSize)
00125         {
00126             return(EOF);
00127         }
00128         return(myFileBuffer[myBufferIndex++]);
00129     }
00130 
00131     // Reset to the beginning of the file.
00132     inline void ifrewind()
00133     {
00134         // Just set the myBufferIndex and the myCurrentBufferSize to 0 to simulate
00135         // clearing the buffer and call rewind to move to the beginning of the
00136         // file.
00137         if (myFileTypePtr == NULL)
00138         {
00139             // No pointer, so nothing to rewind.
00140             return;
00141         }
00142         myCurrentBufferSize = 0;
00143         myBufferIndex = 0;
00144         myFileTypePtr->rewind();
00145     }
00146 
00147 
00148     // Check to see if we have reached the EOF.
00149     inline int ifeof()
00150     {
00151         // Not EOF if we are not at the end of the buffer.
00152         if (myBufferIndex < myCurrentBufferSize)
00153         {
00154             // There are still available bytes in the buffer, so NOT EOF.
00155             return false;
00156         }
00157         else
00158         {
00159             if (myFileTypePtr == NULL)
00160             {
00161                 // No myFileTypePtr, so not eof (return 0).
00162                 return 0;
00163             }
00164             // exhausted our buffer, so check the file for eof.
00165             return myFileTypePtr->eof();
00166         }
00167     }
00168 
00169     // We do not buffer the write call, so just leave this as normal.
00170     inline unsigned int ifwrite(const void * buffer, unsigned int size)
00171     {
00172         if (myFileTypePtr == NULL)
00173         {
00174             // No myFileTypePtr, so return 0 - nothing written.
00175             return 0;
00176         }
00177         return myFileTypePtr->write(buffer, size);
00178     }
00179 
00180     // Returns whether or not the file was successfully opened.
00181     inline bool isOpen()
00182     {
00183         // It is open if the myFileTypePtr is set and says it is open.
00184         if ((myFileTypePtr != NULL) && myFileTypePtr->isOpen())
00185         {
00186             return true;
00187         }
00188         // File was not successfully opened.
00189         return false;
00190     }
00191 
00192     // Get current position in the file.
00193     // -1 return value indicates an error.
00194     inline long int iftell()
00195     {
00196         if (myFileTypePtr == NULL)
00197         {
00198             // No myFileTypePtr, so return false - could not seek.
00199             return -1;
00200         }
00201         return myFileTypePtr->tell();
00202     }
00203 
00204 
00205     // Seek to the specified offset from the origin.
00206     // origin can be any of the following:
00207     // Note: not all are valid for all filetypes.
00208     //   SEEK_SET - Beginning of file
00209     //   SEEK_CUR - Current position of the file pointer
00210     //   SEEK_END - End of file
00211     // Returns true on successful seek and false on a failed seek.
00212     inline bool ifseek(long int offset, int origin)
00213     {
00214         if (myFileTypePtr == NULL)
00215         {
00216             // No myFileTypePtr, so return false - could not seek.
00217             return false;
00218         }
00219         return myFileTypePtr->seek(offset, origin);
00220     }
00221 
00222 protected:
00223     // Open a file. Called by the constructor.
00224     // Returns true if the file was successfully opened, false otherwise.
00225     bool openFile(const char * filename, const char * mode,
00226                   InputFile::ifileCompression compressionMode);
00227 
00228     // Read into a buffer from the file.  Since the buffer is passed in and
00229     // this would bypass the myFileBuffer used by this class, this method must
00230     // be protected.
00231     inline int readFromFile(void * buffer, unsigned int size)
00232     {
00233         // If no myFileTypePtr, return 0 - nothing read.
00234         if (myFileTypePtr == NULL)
00235         {
00236             return 0;
00237         }
00238         return myFileTypePtr->read(buffer, size);
00239     }
00240 
00241 #ifdef __ZLIB_AVAILABLE__
00242     // Only necessary with zlib to determine what file type on a new
00243     // file.  Without zlib, there are only uncompressed files, so a special
00244     // method is not needed to determine the type of file to open.
00245     // Open a file.  This method will open a file with the specified name and
00246     // mode with the fileTypePtr associated with the specified compressionMode.
00247     void openFileUsingMode(const char* filename, const char* mode,
00248                            InputFile::ifileCompression compressionMode);
00249 #endif
00250 
00251     // The size of the buffer used by this class.
00252     static const int MAX_BUFFER_SIZE = 1048576;
00253 
00254     // Pointer to a class that interfaces with different file types.
00255     FileType* myFileTypePtr;
00256 
00257     // Buffer used to do large reads rather than 1 by 1 character reads
00258     // from the file.  The class is then managed to iterate through the buffer.
00259     char myFileBuffer[MAX_BUFFER_SIZE];
00260 
00261     // Current index into the buffer.  Used to track where we are in reading the
00262     // file from the buffer.
00263     int myBufferIndex;
00264 
00265     // Current number of entries in the buffer.  Used to ensure that
00266     // if a read did not fill the buffer, we stop before hitting the
00267     // end of what was read.
00268     int myCurrentBufferSize;
00269 
00270 };
00271 
00272 typedef InputFile* IFILE;
00273 
00274 
00275 
00276 
00277 inline IFILE ifopen(const char * filename, const char * mode,
00278                     InputFile::ifileCompression compressionMode = InputFile::DEFAULT)
00279 {
00280     IFILE file = new InputFile(filename, mode, compressionMode);
00281     if (!file->isOpen())
00282     {
00283 
00284         // Not open, so delete the file, and return null.
00285         delete file;
00286         file = NULL;
00287     }
00288     return file;
00289 }
00290 
00291 
00292 inline int ifclose(IFILE file)
00293 {
00294     int result = file->ifclose();
00295     delete file;
00296     file = NULL;
00297     return(result);
00298 }
00299 
00300 inline unsigned int ifread(IFILE file, void * buffer, unsigned int size)
00301 {
00302     return(file->ifread(buffer, size));
00303 }
00304 
00305 inline int ifgetc(IFILE file)
00306 {
00307     return(file->ifgetc());
00308 }
00309 
00310 inline void ifrewind(IFILE file)
00311 {
00312     file->ifrewind();
00313 }
00314 
00315 inline int ifeof(IFILE file)
00316 {
00317     return(file->ifeof());
00318 }
00319 
00320 inline unsigned int ifwrite(IFILE file, const void * buffer, unsigned int size)
00321 {
00322     return(file->ifwrite(buffer, size));
00323 }
00324 
00325 // Get current position in the file.
00326 // -1 return value indicates an error.
00327 inline long int iftell(IFILE file)
00328 {
00329     return (file->iftell());
00330 }
00331 
00332 // Seek to the specified offset from the origin.
00333 // origin can be any of the following:
00334 // Note: not all are valid for all filetypes.
00335 //   SEEK_SET - Beginning of file
00336 //   SEEK_CUR - Current position of the file pointer
00337 //   SEEK_END - End of file
00338 // Returns true on successful seek and false on a failed seek.
00339 inline bool ifseek(IFILE file, long int offset, int origin)
00340 {
00341     return (file->ifseek(offset, origin));
00342 }
00343 
00344 int ifprintf(IFILE output, char * format, ...);
00345 
00346 inline IFILE operator >> (IFILE stream, std::string &str)
00347 {
00348     str.clear();
00349     int ch;
00350     // not safe... newline handling?
00351     while ((ch = stream->ifgetc())!=EOF && (ch != '\n')) str.push_back(ch);
00352     return stream;
00353 }
00354 
00355 #endif
00356 
Generated on Wed Nov 17 15:38:28 2010 for StatGen Software by  doxygen 1.6.3