libStatGen Software  1
bgzf.c
00001 /* The MIT License
00002 
00003    Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
00004                  2011 Attractive Chaos <attractor@live.co.uk>
00005 
00006    Permission is hereby granted, free of charge, to any person obtaining a copy
00007    of this software and associated documentation files (the "Software"), to deal
00008    in the Software without restriction, including without limitation the rights
00009    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00010    copies of the Software, and to permit persons to whom the Software is
00011    furnished to do so, subject to the following conditions:
00012 
00013    The above copyright notice and this permission notice shall be included in
00014    all copies or substantial portions of the Software.
00015 
00016    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00017    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00018    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00019    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00020    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00021    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00022    THE SOFTWARE.
00023 */
00024 
00025 #ifdef __ZLIB_AVAILABLE__
00026 #include <stdio.h>
00027 #include <stdlib.h>
00028 #include <string.h>
00029 #include <unistd.h>
00030 #include <assert.h>
00031 #include <sys/types.h>
00032 #include "bgzf.h"
00033 
00034 #ifdef _USE_KNETFILE
00035 #include "knetfile.h"
00036 typedef knetFile *_bgzf_file_t;
00037 #define _bgzf_open(fn, mode) knet_open(fn, mode)
00038 #define _bgzf_dopen(fp, mode) knet_dopen(fp, mode)
00039 #define _bgzf_close(fp) knet_close(fp)
00040 #define _bgzf_fileno(fp) ((fp)->fd)
00041 #define _bgzf_tell(fp) knet_tell(fp)
00042 #define _bgzf_seek(fp, offset, whence) knet_seek(fp, offset, whence)
00043 #define _bgzf_read(fp, buf, len) knet_read(fp, buf, len)
00044 #define _bgzf_write(fp, buf, len) knet_write(fp, buf, len)
00045 #else // ~defined(_USE_KNETFILE)
00046 #if defined(_WIN32) || defined(_MSC_VER)
00047 #define ftello(fp) ftell(fp)
00048 #define fseeko(fp, offset, whence) fseek(fp, offset, whence)
00049 #else // ~defined(_WIN32)
00050 extern off_t ftello(FILE *stream);
00051 extern int fseeko(FILE *stream, off_t offset, int whence);
00052 #endif // ~defined(_WIN32)
00053 typedef FILE *_bgzf_file_t;
00054 #define _bgzf_open(fn, mode) fopen(fn, mode)
00055 #define _bgzf_dopen(fp, mode) fdopen(fp, mode)
00056 #define _bgzf_close(fp) fclose(fp)
00057 #define _bgzf_fileno(fp) fileno(fp)
00058 #define _bgzf_tell(fp) ftello(fp)
00059 #define _bgzf_seek(fp, offset, whence) fseeko(fp, offset, whence)
00060 #define _bgzf_read(fp, buf, len) fread(buf, 1, len, fp)
00061 #define _bgzf_write(fp, buf, len) fwrite(buf, 1, len, fp)
00062 #endif // ~define(_USE_KNETFILE)
00063 
00064 #define BLOCK_HEADER_LENGTH 18
00065 #define BLOCK_FOOTER_LENGTH 8
00066 
00067 /* BGZF/GZIP header (speciallized from RFC 1952; little endian):
00068  +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
00069  | 31|139|  8|  4|              0|  0|255|      6| 66| 67|      2|BLK_LEN|
00070  +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
00071 */
00072 static const uint8_t g_magic[19] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\0\0";
00073 
00074 #ifdef BGZF_CACHE
00075 typedef struct {
00076     int size;
00077     uint8_t *block;
00078     int64_t end_offset;
00079 } cache_t;
00080 #include "khash.h"
00081 KHASH_MAP_INIT_INT64(cache, cache_t)
00082 #endif
00083 
00084 static inline void packInt16(uint8_t *buffer, uint16_t value)
00085 {
00086     buffer[0] = value;
00087     buffer[1] = value >> 8;
00088 }
00089 
00090 static inline int unpackInt16(const uint8_t *buffer)
00091 {
00092     return buffer[0] | buffer[1] << 8;
00093 }
00094 
00095 static inline void packInt32(uint8_t *buffer, uint32_t value)
00096 {
00097     buffer[0] = value;
00098     buffer[1] = value >> 8;
00099     buffer[2] = value >> 16;
00100     buffer[3] = value >> 24;
00101 }
00102 
00103 static BGZF *bgzf_read_init()
00104 {
00105     BGZF *fp;
00106     fp = calloc(1, sizeof(BGZF));
00107     fp->open_mode = 'r';
00108     fp->uncompressed_block = malloc(BGZF_BLOCK_SIZE);
00109     fp->compressed_block = malloc(BGZF_BLOCK_SIZE);
00110 #ifdef BGZF_CACHE
00111     fp->cache = kh_init(cache);
00112 #endif
00113     return fp;
00114 }
00115 
00116 static BGZF *bgzf_write_init(int compress_level) // compress_level==-1 for the default level
00117 {
00118     BGZF *fp;
00119     fp = calloc(1, sizeof(BGZF));
00120     fp->open_mode = 'w';
00121     fp->uncompressed_block = malloc(BGZF_BLOCK_SIZE);
00122     fp->compressed_block = malloc(BGZF_BLOCK_SIZE);
00123     fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1
00124     if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION;
00125     return fp;
00126 }
00127 // get the compress level from the mode string
00128 static int mode2level(const char *__restrict mode)
00129 {
00130     int i, compress_level = -1;
00131     for (i = 0; mode[i]; ++i)
00132         if (mode[i] >= '0' && mode[i] <= '9') break;
00133     if (mode[i]) compress_level = (int)mode[i] - '0';
00134     if (strchr(mode, 'u')) compress_level = 0;
00135     return compress_level;
00136 }
00137 
00138 BGZF *bgzf_open(const char *path, const char *mode)
00139 {
00140     BGZF *fp = 0;
00141     if (strchr(mode, 'r') || strchr(mode, 'R')) {
00142         _bgzf_file_t fpr;
00143         if ((fpr = _bgzf_open(path, "r")) == 0) return 0;
00144         fp = bgzf_read_init();
00145         fp->fp = fpr;
00146     } else if (strchr(mode, 'w') || strchr(mode, 'W')) {
00147         FILE *fpw;
00148         if ((fpw = fopen(path, "w")) == 0) return 0;
00149         fp = bgzf_write_init(mode2level(mode));
00150         fp->fp = fpw;
00151     } else if (strchr(mode, 'a') || strchr(mode, 'A')) {
00152         FILE *fpw;
00153         if ((fpw = fopen(path, "r+")) == 0) return 0;
00154         fp = bgzf_write_init(mode2level(mode));
00155         fp->fp = fpw;
00156                 // Check for trailing EOF block.
00157                 if(bgzf_check_EOF(fp))
00158                 {
00159                     // Overwrite the trailing EOF.
00160                     _bgzf_seek(fp->fp, -28, SEEK_END);
00161                 }
00162                 else
00163                 {
00164                     // No trailing EOF block, so go to the end
00165                     _bgzf_seek(fp->fp, 0, SEEK_END);
00166                 }
00167     }
00168     return fp;
00169 }
00170 
00171 BGZF *bgzf_dopen(int fd, const char *mode)
00172 {
00173     BGZF *fp = 0;
00174     if (strchr(mode, 'r') || strchr(mode, 'R')) {
00175         _bgzf_file_t fpr;
00176         if ((fpr = _bgzf_dopen(fd, "r")) == 0) return 0;
00177         fp = bgzf_read_init();
00178         fp->fp = fpr;
00179     } else if (strchr(mode, 'w') || strchr(mode, 'W')) {
00180         FILE *fpw;
00181         if ((fpw = fdopen(fd, "w")) == 0) return 0;
00182         fp = bgzf_write_init(mode2level(mode));
00183         fp->fp = fpw;
00184     }
00185     return fp;
00186 }
00187 
00188 // Deflate the block in fp->uncompressed_block into fp->compressed_block. Also adds an extra field that stores the compressed block length.
00189 static int deflate_block(BGZF *fp, int block_length)
00190 {
00191     uint8_t *buffer = fp->compressed_block;
00192     int buffer_size = BGZF_BLOCK_SIZE;
00193     int input_length = block_length;
00194     int compressed_length = 0;
00195     int remaining;
00196     uint32_t crc;
00197 
00198     assert(block_length <= BGZF_BLOCK_SIZE); // guaranteed by the caller
00199     memcpy(buffer, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block
00200     while (1) { // loop to retry for blocks that do not compress enough
00201         int status;
00202         z_stream zs;
00203         zs.zalloc = NULL;
00204         zs.zfree = NULL;
00205         zs.next_in = fp->uncompressed_block;
00206         zs.avail_in = input_length;
00207         zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH];
00208         zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
00209         status = deflateInit2(&zs, fp->compress_level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); // -15 to disable zlib header/footer
00210         if (status != Z_OK) {
00211             fp->errcode |= BGZF_ERR_ZLIB;
00212             return -1;
00213         }
00214         status = deflate(&zs, Z_FINISH);
00215         if (status != Z_STREAM_END) { // not compressed enough
00216             deflateEnd(&zs); // reset the stream
00217             if (status == Z_OK) { // reduce the size and recompress
00218                 input_length -= 1024;
00219                 assert(input_length > 0); // logically, this should not happen
00220                 continue;
00221             }
00222             fp->errcode |= BGZF_ERR_ZLIB;
00223             return -1;
00224         }
00225         if (deflateEnd(&zs) != Z_OK) {
00226             fp->errcode |= BGZF_ERR_ZLIB;
00227             return -1;
00228         }
00229         compressed_length = zs.total_out;
00230         compressed_length += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
00231         assert(compressed_length <= BGZF_BLOCK_SIZE);
00232         break;
00233     }
00234 
00235     assert(compressed_length > 0);
00236     packInt16((uint8_t*)&buffer[16], compressed_length - 1); // write the compressed_length; -1 to fit 2 bytes
00237     crc = crc32(0L, NULL, 0L);
00238     crc = crc32(crc, fp->uncompressed_block, input_length);
00239     packInt32((uint8_t*)&buffer[compressed_length-8], crc);
00240     packInt32((uint8_t*)&buffer[compressed_length-4], input_length);
00241 
00242     remaining = block_length - input_length;
00243     if (remaining > 0) {
00244         assert(remaining <= input_length);
00245         memcpy(fp->uncompressed_block, fp->uncompressed_block + input_length, remaining);
00246     }
00247     fp->block_offset = remaining;
00248     return compressed_length;
00249 }
00250 
00251 // Inflate the block in fp->compressed_block into fp->uncompressed_block
00252 static int inflate_block(BGZF* fp, int block_length)
00253 {
00254     z_stream zs;
00255     zs.zalloc = NULL;
00256     zs.zfree = NULL;
00257     zs.next_in = fp->compressed_block + 18;
00258     zs.avail_in = block_length - 16;
00259     zs.next_out = fp->uncompressed_block;
00260     zs.avail_out = BGZF_BLOCK_SIZE;
00261 
00262     if (inflateInit2(&zs, -15) != Z_OK) {
00263         fp->errcode |= BGZF_ERR_ZLIB;
00264         return -1;
00265     }
00266     if (inflate(&zs, Z_FINISH) != Z_STREAM_END) {
00267         inflateEnd(&zs);
00268         fp->errcode |= BGZF_ERR_ZLIB;
00269         return -1;
00270     }
00271     if (inflateEnd(&zs) != Z_OK) {
00272         fp->errcode |= BGZF_ERR_ZLIB;
00273         return -1;
00274     }
00275     return zs.total_out;
00276 }
00277 
00278 static int check_header(const uint8_t *header)
00279 {
00280     return (header[0] == 31 && header[1] == 139 && header[2] == 8 && (header[3] & 4) != 0
00281             && unpackInt16((uint8_t*)&header[10]) == 6
00282             && header[12] == 'B' && header[13] == 'C'
00283             && unpackInt16((uint8_t*)&header[14]) == 2);
00284 }
00285 
00286 #ifdef BGZF_CACHE
00287 static void free_cache(BGZF *fp)
00288 {
00289     khint_t k;
00290     khash_t(cache) *h = (khash_t(cache)*)fp->cache;
00291     if (fp->open_mode != 'r') return;
00292     for (k = kh_begin(h); k < kh_end(h); ++k)
00293         if (kh_exist(h, k)) free(kh_val(h, k).block);
00294     kh_destroy(cache, h);
00295 }
00296 
00297 static int load_block_from_cache(BGZF *fp, int64_t block_address)
00298 {
00299     khint_t k;
00300     cache_t *p;
00301     khash_t(cache) *h = (khash_t(cache)*)fp->cache;
00302     k = kh_get(cache, h, block_address);
00303     if (k == kh_end(h)) return 0;
00304     p = &kh_val(h, k);
00305     if (fp->block_length != 0) fp->block_offset = 0;
00306     fp->block_address = block_address;
00307     fp->block_length = p->size;
00308     memcpy(fp->uncompressed_block, p->block, BGZF_BLOCK_SIZE);
00309     _bgzf_seek((_bgzf_file_t)fp->fp, p->end_offset, SEEK_SET);
00310     return p->size;
00311 }
00312 
00313 static void cache_block(BGZF *fp, int size)
00314 {
00315     int ret;
00316     khint_t k;
00317     cache_t *p;
00318     khash_t(cache) *h = (khash_t(cache)*)fp->cache;
00319     if (BGZF_BLOCK_SIZE >= fp->cache_size) return;
00320     if ((kh_size(h) + 1) * BGZF_BLOCK_SIZE > fp->cache_size) {
00321         /* A better way would be to remove the oldest block in the
00322          * cache, but here we remove a random one for simplicity. This
00323          * should not have a big impact on performance. */
00324         for (k = kh_begin(h); k < kh_end(h); ++k)
00325             if (kh_exist(h, k)) break;
00326         if (k < kh_end(h)) {
00327             free(kh_val(h, k).block);
00328             kh_del(cache, h, k);
00329         }
00330     }
00331     k = kh_put(cache, h, fp->block_address, &ret);
00332     if (ret == 0) return; // if this happens, a bug!
00333     p = &kh_val(h, k);
00334     p->size = fp->block_length;
00335     p->end_offset = fp->block_address + size;
00336     p->block = malloc(BGZF_BLOCK_SIZE);
00337     memcpy(kh_val(h, k).block, fp->uncompressed_block, BGZF_BLOCK_SIZE);
00338 }
00339 #else
00340 static void free_cache(BGZF *fp) {}
00341 static int load_block_from_cache(BGZF *fp, int64_t block_address) {return 0;}
00342 static void cache_block(BGZF *fp, int size) {}
00343 #endif
00344 
00345 int bgzf_read_block(BGZF *fp)
00346 {
00347     uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block;
00348     int count, size = 0, block_length, remaining;
00349     int64_t block_address;
00350     block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
00351     if (load_block_from_cache(fp, block_address)) return 0;
00352     count = _bgzf_read(fp->fp, header, sizeof(header));
00353     if (count == 0) { // no data read
00354         fp->block_length = 0;
00355         return 0;
00356     }
00357     if (count != sizeof(header) || !check_header(header)) {
00358         fp->errcode |= BGZF_ERR_HEADER;
00359         return -1;
00360     }
00361     size = count;
00362     block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1"
00363     compressed_block = (uint8_t*)fp->compressed_block;
00364     memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
00365     remaining = block_length - BLOCK_HEADER_LENGTH;
00366     count = _bgzf_read(fp->fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
00367     if (count != remaining) {
00368         fp->errcode |= BGZF_ERR_IO;
00369         return -1;
00370     }
00371     size += count;
00372     if ((count = inflate_block(fp, block_length)) < 0) return -1;
00373     if (fp->block_length != 0) fp->block_offset = 0; // Do not reset offset if this read follows a seek.
00374     fp->block_address = block_address;
00375     fp->block_length = count;
00376     cache_block(fp, size);
00377     return 0;
00378 }
00379 
00380 ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length)
00381 {
00382     ssize_t bytes_read = 0;
00383     uint8_t *output = data;
00384     if (length <= 0) return 0;
00385     assert(fp->open_mode == 'r');
00386     while (bytes_read < length) {
00387         int copy_length, available = fp->block_length - fp->block_offset;
00388         uint8_t *buffer;
00389         if (available <= 0) {
00390             if (bgzf_read_block(fp) != 0) return -1;
00391             available = fp->block_length - fp->block_offset;
00392             if (available <= 0) break;
00393         }
00394         copy_length = length - bytes_read < available? length - bytes_read : available;
00395         buffer = fp->uncompressed_block;
00396         memcpy(output, buffer + fp->block_offset, copy_length);
00397         fp->block_offset += copy_length;
00398         output += copy_length;
00399         bytes_read += copy_length;
00400     }
00401     if (fp->block_offset == fp->block_length) {
00402         fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
00403         fp->block_offset = fp->block_length = 0;
00404     }
00405     return bytes_read;
00406 }
00407 
00408 int bgzf_flush(BGZF *fp)
00409 {
00410     assert(fp->open_mode == 'w');
00411     while (fp->block_offset > 0) {
00412         int block_length;
00413         block_length = deflate_block(fp, fp->block_offset);
00414         if (block_length < 0) return -1;
00415         if (fwrite(fp->compressed_block, 1, block_length, fp->fp) != block_length) {
00416             fp->errcode |= BGZF_ERR_IO; // possibly truncated file
00417             return -1;
00418         }
00419         fp->block_address += block_length;
00420     }
00421     return 0;
00422 }
00423 
00424 int bgzf_flush_try(BGZF *fp, ssize_t size)
00425 {
00426     if (fp->block_offset + size > BGZF_BLOCK_SIZE)
00427         return bgzf_flush(fp);
00428     return -1;
00429 }
00430 
00431 ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length)
00432 {
00433     const uint8_t *input = data;
00434     int block_length = BGZF_BLOCK_SIZE, bytes_written;
00435     assert(fp->open_mode == 'w');
00436     input = data;
00437     bytes_written = 0;
00438     while (bytes_written < length) {
00439         uint8_t* buffer = fp->uncompressed_block;
00440         int copy_length = block_length - fp->block_offset < length - bytes_written? block_length - fp->block_offset : length - bytes_written;
00441         memcpy(buffer + fp->block_offset, input, copy_length);
00442         fp->block_offset += copy_length;
00443         input += copy_length;
00444         bytes_written += copy_length;
00445         if (fp->block_offset == block_length && bgzf_flush(fp)) break;
00446     }
00447     return bytes_written;
00448 }
00449 
00450 int bgzf_close(BGZF* fp)
00451 {
00452     int ret, count, block_length;
00453     if (fp == 0) return -1;
00454     if (fp->open_mode == 'w') {
00455         if (bgzf_flush(fp) != 0) return -1;
00456         block_length = deflate_block(fp, 0); // write an empty block
00457         count = fwrite(fp->compressed_block, 1, block_length, fp->fp);
00458                 if(count != 0)
00459                 {
00460                     // Something was written
00461                 }
00462         if (fflush(fp->fp) != 0) {
00463             fp->errcode |= BGZF_ERR_IO;
00464             return -1;
00465         }
00466     }
00467     ret = fp->open_mode == 'w'? fclose(fp->fp) : _bgzf_close(fp->fp);
00468     if (ret != 0) return -1;
00469     free(fp->uncompressed_block);
00470     free(fp->compressed_block);
00471     free_cache(fp);
00472     free(fp);
00473     return 0;
00474 }
00475 
00476 void bgzf_set_cache_size(BGZF *fp, int cache_size)
00477 {
00478     if (fp) fp->cache_size = cache_size;
00479 }
00480 
00481 int bgzf_check_EOF(BGZF *fp)
00482 {
00483     static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0";
00484         // Last 28 bytes of an uncompressed bgzf file which are different
00485         // from the last 28 bytes of compressed bgzf files.
00486         static uint8_t magic2[28] = "\4\0\0\0\0\0\377\6\0\102\103\2\0\036\0\1\0\0\377\377\0\0\0\0\0\0\0\0";
00487     uint8_t buf[28];
00488     off_t offset;
00489     offset = _bgzf_tell((_bgzf_file_t)fp->fp);
00490     if (_bgzf_seek(fp->fp, -28, SEEK_END) < 0) return 0;
00491     int count = _bgzf_read(fp->fp, buf, 28);
00492         if(count != 28)
00493         {
00494             fp->errcode |= BGZF_ERR_IO; // possibly truncated file
00495             return(0);
00496         }
00497     _bgzf_seek(fp->fp, offset, SEEK_SET);
00498         if((memcmp(magic, buf, 28) == 0) || (memcmp(magic2, buf, 28) == 0))
00499         {
00500             return(1);
00501         }
00502         return(0);
00503 }
00504 
00505 int64_t bgzf_seek(BGZF* fp, int64_t pos, int where)
00506 {
00507     int block_offset;
00508     int64_t block_address;
00509 
00510     if (fp->open_mode != 'r' || where != SEEK_SET) {
00511         fp->errcode |= BGZF_ERR_MISUSE;
00512         return -1;
00513     }
00514     block_offset = pos & 0xFFFF;
00515     block_address = pos >> 16;
00516     if (_bgzf_seek(fp->fp, block_address, SEEK_SET) < 0) {
00517         fp->errcode |= BGZF_ERR_IO;
00518         return -1;
00519     }
00520     fp->block_length = 0;  // indicates current block has not been loaded
00521     fp->block_address = block_address;
00522     fp->block_offset = block_offset;
00523     return 0;
00524 }
00525 
00526 int bgzf_is_bgzf(const char *fn)
00527 {
00528     uint8_t buf[16];
00529     int n;
00530     _bgzf_file_t fp;
00531     if ((fp = _bgzf_open(fn, "r")) == 0) return 0;
00532     n = _bgzf_read(fp, buf, 16);
00533     _bgzf_close(fp);
00534     if (n != 16) return 0;
00535     return memcmp(g_magic, buf, 16) == 0? 1 : 0;
00536 }
00537 
00538 int bgzf_getc(BGZF *fp)
00539 {
00540     int c;
00541     if (fp->block_offset >= fp->block_length) {
00542         if (bgzf_read_block(fp) != 0) return -2; /* error */
00543         if (fp->block_length == 0) return -1; /* end-of-file */
00544     }
00545     c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++];
00546     if (fp->block_offset == fp->block_length) {
00547         fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
00548         fp->block_offset = 0;
00549         fp->block_length = 0;
00550     }
00551     return c;
00552 }
00553 
00554 #ifndef kroundup32
00555 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
00556 #endif
00557 
00558 int bgzf_getline(BGZF *fp, int delim, kstring_t *str)
00559 {
00560     int l, state = 0;
00561     unsigned char *buf = (unsigned char*)fp->uncompressed_block;
00562     str->l = 0;
00563     do {
00564         if (fp->block_offset >= fp->block_length) {
00565             if (bgzf_read_block(fp) != 0) { state = -2; break; }
00566             if (fp->block_length == 0) { state = -1; break; }
00567         }
00568         for (l = fp->block_offset; l < fp->block_length && buf[l] != delim; ++l);
00569         if (l < fp->block_length) state = 1;
00570         l -= fp->block_offset;
00571         if (str->l + l + 1 >= str->m) {
00572             str->m = str->l + l + 2;
00573             kroundup32(str->m);
00574             str->s = (char*)realloc(str->s, str->m);
00575         }
00576         memcpy(str->s + str->l, buf + fp->block_offset, l);
00577         str->l += l;
00578         fp->block_offset += l + 1;
00579         if (fp->block_offset >= fp->block_length) {
00580             fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
00581             fp->block_offset = 0;
00582             fp->block_length = 0;
00583         } 
00584     } while (state == 0);
00585     if (str->l == 0 && state < 0) return state;
00586     str->s[str->l] = 0;
00587     return str->l;
00588 }
00589 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends