libStatGen Software  1
bgzf.h
00001 /* The MIT License
00002 
00003    Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
00004                  2011 Attractive Chaos <attractor@live.co.uk>
00005 
00006    Permission is hereby granted, free of charge, to any person obtaining a copy
00007    of this software and associated documentation files (the "Software"), to deal
00008    in the Software without restriction, including without limitation the rights
00009    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00010    copies of the Software, and to permit persons to whom the Software is
00011    furnished to do so, subject to the following conditions:
00012 
00013    The above copyright notice and this permission notice shall be included in
00014    all copies or substantial portions of the Software.
00015 
00016    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00017    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00018    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00019    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00020    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00021    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00022    THE SOFTWARE.
00023 */
00024 
00025 /* The BGZF library was originally written by Bob Handsaker from the Broad
00026  * Institute. It was later improved by the SAMtools developers. */
00027 
00028 #ifndef __BGZF_H
00029 #define __BGZF_H
00030 
00031 #include <stdint.h>
00032 #include <stdio.h>
00033 #ifdef __ZLIB_AVAILABLE__
00034 #include <zlib.h>
00035 #endif
00036 
00037 #define BGZF_BLOCK_SIZE 0x10000 // 64k
00038 
00039 #define BGZF_ERR_ZLIB   1
00040 #define BGZF_ERR_HEADER 2
00041 #define BGZF_ERR_IO     4
00042 #define BGZF_ERR_MISUSE 8
00043 
00044 typedef struct {
00045     int open_mode:8, compress_level:8, errcode:16;
00046     int cache_size;
00047     int block_length, block_offset;
00048     int64_t block_address;
00049     void *uncompressed_block, *compressed_block;
00050     void *cache; // a pointer to a hash table
00051     void *fp; // actual file handler; FILE* on writing; FILE* or knetFile* on reading
00052 } BGZF;
00053 
00054 #ifndef KSTRING_T
00055 #define KSTRING_T kstring_t
00056 typedef struct __kstring_t {
00057     size_t l, m;
00058     char *s;
00059 } kstring_t;
00060 #endif
00061 
00062 #ifdef __cplusplus
00063 extern "C" {
00064 #endif
00065 
00066 BGZF* dummy();
00067 
00068     /******************
00069      * Basic routines *
00070      ******************/
00071 
00072     /**
00073      * Open an existing file descriptor for reading or writing.
00074      *
00075      * @param fd    file descriptor
00076      * @param mode  mode matching /[rwu0-9]+/: 'r' for reading, 'w' for writing and a digit specifies
00077      *              the zlib compression level; if both 'r' and 'w' are present, 'w' is ignored.
00078      * @return      BGZF file handler; 0 on error
00079      */
00080     BGZF* bgzf_dopen(int fd, const char *mode);
00081 
00082     /**
00083      * Open the specified file for reading or writing.
00084      */
00085     BGZF* bgzf_open(const char* path, const char *mode);
00086 
00087     /**
00088      * Close the BGZF and free all associated resources.
00089      *
00090      * @param fp    BGZF file handler
00091      * @return      0 on success and -1 on error
00092      */
00093     int bgzf_close(BGZF *fp);
00094 
00095     /**
00096      * Read up to _length_ bytes from the file storing into _data_.
00097      *
00098      * @param fp     BGZF file handler
00099      * @param data   data array to read into
00100      * @param length size of data to read
00101      * @return       number of bytes actually read; 0 on end-of-file and -1 on error
00102      */
00103     ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length);
00104 
00105     /**
00106      * Write _length_ bytes from _data_ to the file.
00107      *
00108      * @param fp     BGZF file handler
00109      * @param data   data array to write
00110      * @param length size of data to write
00111      * @return       number of bytes actually written; -1 on error
00112      */
00113     ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length);
00114 
00115     /**
00116      * Write the data in the buffer to the file.
00117      */
00118     int bgzf_flush(BGZF *fp);
00119 
00120     /**
00121      * Return a virtual file pointer to the current location in the file.
00122      * No interpetation of the value should be made, other than a subsequent
00123      * call to bgzf_seek can be used to position the file at the same point.
00124      * Return value is non-negative on success.
00125      */
00126     #define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF))
00127 
00128     /**
00129      * Set the file to read from the location specified by _pos_.
00130      *
00131      * @param fp     BGZF file handler
00132      * @param pos    virtual file offset returned by bgzf_tell()
00133      * @param whence must be SEEK_SET
00134      * @return       0 on success and -1 on error
00135      */
00136     int64_t bgzf_seek(BGZF *fp, int64_t pos, int whence);
00137 
00138     /**
00139      * Check if the BGZF end-of-file (EOF) marker is present
00140      *
00141      * @param fp    BGZF file handler opened for reading
00142      * @return      1 if EOF is present; 0 if not or on I/O error
00143      */
00144     int bgzf_check_EOF(BGZF *fp);
00145 
00146     /**
00147      * Check if a file is in the BGZF format
00148      *
00149      * @param fn    file name
00150      * @return      1 if _fn_ is BGZF; 0 if not or on I/O error
00151      */
00152      int bgzf_is_bgzf(const char *fn);
00153 
00154     /*********************
00155      * Advanced routines *
00156      *********************/
00157 
00158     /**
00159      * Set the cache size. Only effective when compiled with -DBGZF_CACHE.
00160      *
00161      * @param fp    BGZF file handler
00162      * @param size  size of cache in bytes; 0 to disable caching (default)
00163      */
00164     void bgzf_set_cache_size(BGZF *fp, int size);
00165 
00166     /**
00167      * Flush the file if the remaining buffer size is smaller than _size_ 
00168      */
00169     int bgzf_flush_try(BGZF *fp, ssize_t size);
00170 
00171     /**
00172      * Read one byte from a BGZF file. It is faster than bgzf_read()
00173      * @param fp     BGZF file handler
00174      * @return       byte read; -1 on end-of-file or error
00175      */
00176     int bgzf_getc(BGZF *fp);
00177 
00178     /**
00179      * Read one line from a BGZF file. It is faster than bgzf_getc()
00180      *
00181      * @param fp     BGZF file handler
00182      * @param delim  delimitor
00183      * @param str    string to write to; must be initialized
00184      * @return       length of the string; 0 on end-of-file; negative on error
00185      */
00186     int bgzf_getline(BGZF *fp, int delim, kstring_t *str);
00187 
00188     /**
00189      * Read the next BGZF block.
00190      */
00191     int bgzf_read_block(BGZF *fp);
00192 
00193 #ifdef __cplusplus
00194 }
00195 #endif
00196 
00197 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends