libStatGen Software
1
|
00001 /* The MIT License 00002 00003 Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology 00004 2011 Attractive Chaos <attractor@live.co.uk> 00005 00006 Permission is hereby granted, free of charge, to any person obtaining a copy 00007 of this software and associated documentation files (the "Software"), to deal 00008 in the Software without restriction, including without limitation the rights 00009 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 00010 copies of the Software, and to permit persons to whom the Software is 00011 furnished to do so, subject to the following conditions: 00012 00013 The above copyright notice and this permission notice shall be included in 00014 all copies or substantial portions of the Software. 00015 00016 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 00017 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00018 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 00019 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00020 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 00021 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 00022 THE SOFTWARE. 00023 */ 00024 00025 /* The BGZF library was originally written by Bob Handsaker from the Broad 00026 * Institute. It was later improved by the SAMtools developers. */ 00027 00028 #ifndef __BGZF_H 00029 #define __BGZF_H 00030 00031 #include <stdint.h> 00032 #include <stdio.h> 00033 #ifdef __ZLIB_AVAILABLE__ 00034 #include <zlib.h> 00035 #endif 00036 00037 #define BGZF_BLOCK_SIZE 0x10000 // 64k 00038 00039 #define BGZF_ERR_ZLIB 1 00040 #define BGZF_ERR_HEADER 2 00041 #define BGZF_ERR_IO 4 00042 #define BGZF_ERR_MISUSE 8 00043 00044 typedef struct { 00045 int open_mode:8, compress_level:8, errcode:16; 00046 int cache_size; 00047 int block_length, block_offset; 00048 int64_t block_address; 00049 void *uncompressed_block, *compressed_block; 00050 void *cache; // a pointer to a hash table 00051 void *fp; // actual file handler; FILE* on writing; FILE* or knetFile* on reading 00052 } BGZF; 00053 00054 #ifndef KSTRING_T 00055 #define KSTRING_T kstring_t 00056 typedef struct __kstring_t { 00057 size_t l, m; 00058 char *s; 00059 } kstring_t; 00060 #endif 00061 00062 #ifdef __cplusplus 00063 extern "C" { 00064 #endif 00065 00066 BGZF* dummy(); 00067 00068 /****************** 00069 * Basic routines * 00070 ******************/ 00071 00072 /** 00073 * Open an existing file descriptor for reading or writing. 00074 * 00075 * @param fd file descriptor 00076 * @param mode mode matching /[rwu0-9]+/: 'r' for reading, 'w' for writing and a digit specifies 00077 * the zlib compression level; if both 'r' and 'w' are present, 'w' is ignored. 00078 * @return BGZF file handler; 0 on error 00079 */ 00080 BGZF* bgzf_dopen(int fd, const char *mode); 00081 00082 /** 00083 * Open the specified file for reading or writing. 00084 */ 00085 BGZF* bgzf_open(const char* path, const char *mode); 00086 00087 /** 00088 * Close the BGZF and free all associated resources. 00089 * 00090 * @param fp BGZF file handler 00091 * @return 0 on success and -1 on error 00092 */ 00093 int bgzf_close(BGZF *fp); 00094 00095 /** 00096 * Read up to _length_ bytes from the file storing into _data_. 00097 * 00098 * @param fp BGZF file handler 00099 * @param data data array to read into 00100 * @param length size of data to read 00101 * @return number of bytes actually read; 0 on end-of-file and -1 on error 00102 */ 00103 ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length); 00104 00105 /** 00106 * Write _length_ bytes from _data_ to the file. 00107 * 00108 * @param fp BGZF file handler 00109 * @param data data array to write 00110 * @param length size of data to write 00111 * @return number of bytes actually written; -1 on error 00112 */ 00113 ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length); 00114 00115 /** 00116 * Write the data in the buffer to the file. 00117 */ 00118 int bgzf_flush(BGZF *fp); 00119 00120 /** 00121 * Return a virtual file pointer to the current location in the file. 00122 * No interpetation of the value should be made, other than a subsequent 00123 * call to bgzf_seek can be used to position the file at the same point. 00124 * Return value is non-negative on success. 00125 */ 00126 #define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF)) 00127 00128 /** 00129 * Set the file to read from the location specified by _pos_. 00130 * 00131 * @param fp BGZF file handler 00132 * @param pos virtual file offset returned by bgzf_tell() 00133 * @param whence must be SEEK_SET 00134 * @return 0 on success and -1 on error 00135 */ 00136 int64_t bgzf_seek(BGZF *fp, int64_t pos, int whence); 00137 00138 /** 00139 * Check if the BGZF end-of-file (EOF) marker is present 00140 * 00141 * @param fp BGZF file handler opened for reading 00142 * @return 1 if EOF is present; 0 if not or on I/O error 00143 */ 00144 int bgzf_check_EOF(BGZF *fp); 00145 00146 /** 00147 * Check if a file is in the BGZF format 00148 * 00149 * @param fn file name 00150 * @return 1 if _fn_ is BGZF; 0 if not or on I/O error 00151 */ 00152 int bgzf_is_bgzf(const char *fn); 00153 00154 /********************* 00155 * Advanced routines * 00156 *********************/ 00157 00158 /** 00159 * Set the cache size. Only effective when compiled with -DBGZF_CACHE. 00160 * 00161 * @param fp BGZF file handler 00162 * @param size size of cache in bytes; 0 to disable caching (default) 00163 */ 00164 void bgzf_set_cache_size(BGZF *fp, int size); 00165 00166 /** 00167 * Flush the file if the remaining buffer size is smaller than _size_ 00168 */ 00169 int bgzf_flush_try(BGZF *fp, ssize_t size); 00170 00171 /** 00172 * Read one byte from a BGZF file. It is faster than bgzf_read() 00173 * @param fp BGZF file handler 00174 * @return byte read; -1 on end-of-file or error 00175 */ 00176 int bgzf_getc(BGZF *fp); 00177 00178 /** 00179 * Read one line from a BGZF file. It is faster than bgzf_getc() 00180 * 00181 * @param fp BGZF file handler 00182 * @param delim delimitor 00183 * @param str string to write to; must be initialized 00184 * @return length of the string; 0 on end-of-file; negative on error 00185 */ 00186 int bgzf_getline(BGZF *fp, int delim, kstring_t *str); 00187 00188 /** 00189 * Read the next BGZF block. 00190 */ 00191 int bgzf_read_block(BGZF *fp); 00192 00193 #ifdef __cplusplus 00194 } 00195 #endif 00196 00197 #endif