libStatGen Software
1
|
00001 /* The MIT License 00002 00003 Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology 00004 2011 Attractive Chaos <attractor@live.co.uk> 00005 00006 Permission is hereby granted, free of charge, to any person obtaining a copy 00007 of this software and associated documentation files (the "Software"), to deal 00008 in the Software without restriction, including without limitation the rights 00009 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 00010 copies of the Software, and to permit persons to whom the Software is 00011 furnished to do so, subject to the following conditions: 00012 00013 The above copyright notice and this permission notice shall be included in 00014 all copies or substantial portions of the Software. 00015 00016 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 00017 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00018 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 00019 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00020 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 00021 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 00022 THE SOFTWARE. 00023 */ 00024 00025 #ifdef __ZLIB_AVAILABLE__ 00026 #include <stdio.h> 00027 #include <stdlib.h> 00028 #include <string.h> 00029 #include <unistd.h> 00030 #include <assert.h> 00031 #include <sys/types.h> 00032 #include "bgzf.h" 00033 00034 #ifdef _USE_KNETFILE 00035 #include "knetfile.h" 00036 typedef knetFile *_bgzf_file_t; 00037 #define _bgzf_open(fn, mode) knet_open(fn, mode) 00038 #define _bgzf_dopen(fp, mode) knet_dopen(fp, mode) 00039 #define _bgzf_close(fp) knet_close(fp) 00040 #define _bgzf_fileno(fp) ((fp)->fd) 00041 #define _bgzf_tell(fp) knet_tell(fp) 00042 #define _bgzf_seek(fp, offset, whence) knet_seek(fp, offset, whence) 00043 #define _bgzf_read(fp, buf, len) knet_read(fp, buf, len) 00044 #define _bgzf_write(fp, buf, len) knet_write(fp, buf, len) 00045 #else // ~defined(_USE_KNETFILE) 00046 #if defined(_WIN32) || defined(_MSC_VER) 00047 #define ftello(fp) ftell(fp) 00048 #define fseeko(fp, offset, whence) fseek(fp, offset, whence) 00049 #else // ~defined(_WIN32) 00050 extern off_t ftello(FILE *stream); 00051 extern int fseeko(FILE *stream, off_t offset, int whence); 00052 #endif // ~defined(_WIN32) 00053 typedef FILE *_bgzf_file_t; 00054 #define _bgzf_open(fn, mode) fopen(fn, mode) 00055 #define _bgzf_dopen(fp, mode) fdopen(fp, mode) 00056 #define _bgzf_close(fp) fclose(fp) 00057 #define _bgzf_fileno(fp) fileno(fp) 00058 #define _bgzf_tell(fp) ftello(fp) 00059 #define _bgzf_seek(fp, offset, whence) fseeko(fp, offset, whence) 00060 #define _bgzf_read(fp, buf, len) fread(buf, 1, len, fp) 00061 #define _bgzf_write(fp, buf, len) fwrite(buf, 1, len, fp) 00062 #endif // ~define(_USE_KNETFILE) 00063 00064 #define BLOCK_HEADER_LENGTH 18 00065 #define BLOCK_FOOTER_LENGTH 8 00066 00067 /* BGZF/GZIP header (speciallized from RFC 1952; little endian): 00068 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ 00069 | 31|139| 8| 4| 0| 0|255| 6| 66| 67| 2|BLK_LEN| 00070 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ 00071 */ 00072 static const uint8_t g_magic[19] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\0\0"; 00073 00074 #ifdef BGZF_CACHE 00075 typedef struct { 00076 int size; 00077 uint8_t *block; 00078 int64_t end_offset; 00079 } cache_t; 00080 #include "khash.h" 00081 KHASH_MAP_INIT_INT64(cache, cache_t) 00082 #endif 00083 00084 static inline void packInt16(uint8_t *buffer, uint16_t value) 00085 { 00086 buffer[0] = value; 00087 buffer[1] = value >> 8; 00088 } 00089 00090 static inline int unpackInt16(const uint8_t *buffer) 00091 { 00092 return buffer[0] | buffer[1] << 8; 00093 } 00094 00095 static inline void packInt32(uint8_t *buffer, uint32_t value) 00096 { 00097 buffer[0] = value; 00098 buffer[1] = value >> 8; 00099 buffer[2] = value >> 16; 00100 buffer[3] = value >> 24; 00101 } 00102 00103 static BGZF *bgzf_read_init() 00104 { 00105 BGZF *fp; 00106 fp = calloc(1, sizeof(BGZF)); 00107 fp->open_mode = 'r'; 00108 fp->uncompressed_block = malloc(BGZF_BLOCK_SIZE); 00109 fp->compressed_block = malloc(BGZF_BLOCK_SIZE); 00110 #ifdef BGZF_CACHE 00111 fp->cache = kh_init(cache); 00112 #endif 00113 return fp; 00114 } 00115 00116 static BGZF *bgzf_write_init(int compress_level) // compress_level==-1 for the default level 00117 { 00118 BGZF *fp; 00119 fp = calloc(1, sizeof(BGZF)); 00120 fp->open_mode = 'w'; 00121 fp->uncompressed_block = malloc(BGZF_BLOCK_SIZE); 00122 fp->compressed_block = malloc(BGZF_BLOCK_SIZE); 00123 fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1 00124 if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION; 00125 return fp; 00126 } 00127 // get the compress level from the mode string 00128 static int mode2level(const char *__restrict mode) 00129 { 00130 int i, compress_level = -1; 00131 for (i = 0; mode[i]; ++i) 00132 if (mode[i] >= '0' && mode[i] <= '9') break; 00133 if (mode[i]) compress_level = (int)mode[i] - '0'; 00134 if (strchr(mode, 'u')) compress_level = 0; 00135 return compress_level; 00136 } 00137 00138 BGZF *bgzf_open(const char *path, const char *mode) 00139 { 00140 BGZF *fp = 0; 00141 if (strchr(mode, 'r') || strchr(mode, 'R')) { 00142 _bgzf_file_t fpr; 00143 if ((fpr = _bgzf_open(path, "r")) == 0) return 0; 00144 fp = bgzf_read_init(); 00145 fp->fp = fpr; 00146 } else if (strchr(mode, 'w') || strchr(mode, 'W')) { 00147 FILE *fpw; 00148 if ((fpw = fopen(path, "w")) == 0) return 0; 00149 fp = bgzf_write_init(mode2level(mode)); 00150 fp->fp = fpw; 00151 } else if (strchr(mode, 'a') || strchr(mode, 'A')) { 00152 FILE *fpw; 00153 if ((fpw = fopen(path, "r+")) == 0) return 0; 00154 fp = bgzf_write_init(mode2level(mode)); 00155 fp->fp = fpw; 00156 // Check for trailing EOF block. 00157 if(bgzf_check_EOF(fp)) 00158 { 00159 // Overwrite the trailing EOF. 00160 _bgzf_seek(fp->fp, -28, SEEK_END); 00161 } 00162 else 00163 { 00164 // No trailing EOF block, so go to the end 00165 _bgzf_seek(fp->fp, 0, SEEK_END); 00166 } 00167 } 00168 return fp; 00169 } 00170 00171 BGZF *bgzf_dopen(int fd, const char *mode) 00172 { 00173 BGZF *fp = 0; 00174 if (strchr(mode, 'r') || strchr(mode, 'R')) { 00175 _bgzf_file_t fpr; 00176 if ((fpr = _bgzf_dopen(fd, "r")) == 0) return 0; 00177 fp = bgzf_read_init(); 00178 fp->fp = fpr; 00179 } else if (strchr(mode, 'w') || strchr(mode, 'W')) { 00180 FILE *fpw; 00181 if ((fpw = fdopen(fd, "w")) == 0) return 0; 00182 fp = bgzf_write_init(mode2level(mode)); 00183 fp->fp = fpw; 00184 } 00185 return fp; 00186 } 00187 00188 // Deflate the block in fp->uncompressed_block into fp->compressed_block. Also adds an extra field that stores the compressed block length. 00189 static int deflate_block(BGZF *fp, int block_length) 00190 { 00191 uint8_t *buffer = fp->compressed_block; 00192 int buffer_size = BGZF_BLOCK_SIZE; 00193 int input_length = block_length; 00194 int compressed_length = 0; 00195 int remaining; 00196 uint32_t crc; 00197 00198 assert(block_length <= BGZF_BLOCK_SIZE); // guaranteed by the caller 00199 memcpy(buffer, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block 00200 while (1) { // loop to retry for blocks that do not compress enough 00201 int status; 00202 z_stream zs; 00203 zs.zalloc = NULL; 00204 zs.zfree = NULL; 00205 zs.next_in = fp->uncompressed_block; 00206 zs.avail_in = input_length; 00207 zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH]; 00208 zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH; 00209 status = deflateInit2(&zs, fp->compress_level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); // -15 to disable zlib header/footer 00210 if (status != Z_OK) { 00211 fp->errcode |= BGZF_ERR_ZLIB; 00212 return -1; 00213 } 00214 status = deflate(&zs, Z_FINISH); 00215 if (status != Z_STREAM_END) { // not compressed enough 00216 deflateEnd(&zs); // reset the stream 00217 if (status == Z_OK) { // reduce the size and recompress 00218 input_length -= 1024; 00219 assert(input_length > 0); // logically, this should not happen 00220 continue; 00221 } 00222 fp->errcode |= BGZF_ERR_ZLIB; 00223 return -1; 00224 } 00225 if (deflateEnd(&zs) != Z_OK) { 00226 fp->errcode |= BGZF_ERR_ZLIB; 00227 return -1; 00228 } 00229 compressed_length = zs.total_out; 00230 compressed_length += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; 00231 assert(compressed_length <= BGZF_BLOCK_SIZE); 00232 break; 00233 } 00234 00235 assert(compressed_length > 0); 00236 packInt16((uint8_t*)&buffer[16], compressed_length - 1); // write the compressed_length; -1 to fit 2 bytes 00237 crc = crc32(0L, NULL, 0L); 00238 crc = crc32(crc, fp->uncompressed_block, input_length); 00239 packInt32((uint8_t*)&buffer[compressed_length-8], crc); 00240 packInt32((uint8_t*)&buffer[compressed_length-4], input_length); 00241 00242 remaining = block_length - input_length; 00243 if (remaining > 0) { 00244 assert(remaining <= input_length); 00245 memcpy(fp->uncompressed_block, fp->uncompressed_block + input_length, remaining); 00246 } 00247 fp->block_offset = remaining; 00248 return compressed_length; 00249 } 00250 00251 // Inflate the block in fp->compressed_block into fp->uncompressed_block 00252 static int inflate_block(BGZF* fp, int block_length) 00253 { 00254 z_stream zs; 00255 zs.zalloc = NULL; 00256 zs.zfree = NULL; 00257 zs.next_in = fp->compressed_block + 18; 00258 zs.avail_in = block_length - 16; 00259 zs.next_out = fp->uncompressed_block; 00260 zs.avail_out = BGZF_BLOCK_SIZE; 00261 00262 if (inflateInit2(&zs, -15) != Z_OK) { 00263 fp->errcode |= BGZF_ERR_ZLIB; 00264 return -1; 00265 } 00266 if (inflate(&zs, Z_FINISH) != Z_STREAM_END) { 00267 inflateEnd(&zs); 00268 fp->errcode |= BGZF_ERR_ZLIB; 00269 return -1; 00270 } 00271 if (inflateEnd(&zs) != Z_OK) { 00272 fp->errcode |= BGZF_ERR_ZLIB; 00273 return -1; 00274 } 00275 return zs.total_out; 00276 } 00277 00278 static int check_header(const uint8_t *header) 00279 { 00280 return (header[0] == 31 && header[1] == 139 && header[2] == 8 && (header[3] & 4) != 0 00281 && unpackInt16((uint8_t*)&header[10]) == 6 00282 && header[12] == 'B' && header[13] == 'C' 00283 && unpackInt16((uint8_t*)&header[14]) == 2); 00284 } 00285 00286 #ifdef BGZF_CACHE 00287 static void free_cache(BGZF *fp) 00288 { 00289 khint_t k; 00290 khash_t(cache) *h = (khash_t(cache)*)fp->cache; 00291 if (fp->open_mode != 'r') return; 00292 for (k = kh_begin(h); k < kh_end(h); ++k) 00293 if (kh_exist(h, k)) free(kh_val(h, k).block); 00294 kh_destroy(cache, h); 00295 } 00296 00297 static int load_block_from_cache(BGZF *fp, int64_t block_address) 00298 { 00299 khint_t k; 00300 cache_t *p; 00301 khash_t(cache) *h = (khash_t(cache)*)fp->cache; 00302 k = kh_get(cache, h, block_address); 00303 if (k == kh_end(h)) return 0; 00304 p = &kh_val(h, k); 00305 if (fp->block_length != 0) fp->block_offset = 0; 00306 fp->block_address = block_address; 00307 fp->block_length = p->size; 00308 memcpy(fp->uncompressed_block, p->block, BGZF_BLOCK_SIZE); 00309 _bgzf_seek((_bgzf_file_t)fp->fp, p->end_offset, SEEK_SET); 00310 return p->size; 00311 } 00312 00313 static void cache_block(BGZF *fp, int size) 00314 { 00315 int ret; 00316 khint_t k; 00317 cache_t *p; 00318 khash_t(cache) *h = (khash_t(cache)*)fp->cache; 00319 if (BGZF_BLOCK_SIZE >= fp->cache_size) return; 00320 if ((kh_size(h) + 1) * BGZF_BLOCK_SIZE > fp->cache_size) { 00321 /* A better way would be to remove the oldest block in the 00322 * cache, but here we remove a random one for simplicity. This 00323 * should not have a big impact on performance. */ 00324 for (k = kh_begin(h); k < kh_end(h); ++k) 00325 if (kh_exist(h, k)) break; 00326 if (k < kh_end(h)) { 00327 free(kh_val(h, k).block); 00328 kh_del(cache, h, k); 00329 } 00330 } 00331 k = kh_put(cache, h, fp->block_address, &ret); 00332 if (ret == 0) return; // if this happens, a bug! 00333 p = &kh_val(h, k); 00334 p->size = fp->block_length; 00335 p->end_offset = fp->block_address + size; 00336 p->block = malloc(BGZF_BLOCK_SIZE); 00337 memcpy(kh_val(h, k).block, fp->uncompressed_block, BGZF_BLOCK_SIZE); 00338 } 00339 #else 00340 static void free_cache(BGZF *fp) {} 00341 static int load_block_from_cache(BGZF *fp, int64_t block_address) {return 0;} 00342 static void cache_block(BGZF *fp, int size) {} 00343 #endif 00344 00345 int bgzf_read_block(BGZF *fp) 00346 { 00347 uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block; 00348 int count, size = 0, block_length, remaining; 00349 int64_t block_address; 00350 block_address = _bgzf_tell((_bgzf_file_t)fp->fp); 00351 if (load_block_from_cache(fp, block_address)) return 0; 00352 count = _bgzf_read(fp->fp, header, sizeof(header)); 00353 if (count == 0) { // no data read 00354 fp->block_length = 0; 00355 return 0; 00356 } 00357 if (count != sizeof(header) || !check_header(header)) { 00358 fp->errcode |= BGZF_ERR_HEADER; 00359 return -1; 00360 } 00361 size = count; 00362 block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1" 00363 compressed_block = (uint8_t*)fp->compressed_block; 00364 memcpy(compressed_block, header, BLOCK_HEADER_LENGTH); 00365 remaining = block_length - BLOCK_HEADER_LENGTH; 00366 count = _bgzf_read(fp->fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining); 00367 if (count != remaining) { 00368 fp->errcode |= BGZF_ERR_IO; 00369 return -1; 00370 } 00371 size += count; 00372 if ((count = inflate_block(fp, block_length)) < 0) return -1; 00373 if (fp->block_length != 0) fp->block_offset = 0; // Do not reset offset if this read follows a seek. 00374 fp->block_address = block_address; 00375 fp->block_length = count; 00376 cache_block(fp, size); 00377 return 0; 00378 } 00379 00380 ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length) 00381 { 00382 ssize_t bytes_read = 0; 00383 uint8_t *output = data; 00384 if (length <= 0) return 0; 00385 assert(fp->open_mode == 'r'); 00386 while (bytes_read < length) { 00387 int copy_length, available = fp->block_length - fp->block_offset; 00388 uint8_t *buffer; 00389 if (available <= 0) { 00390 if (bgzf_read_block(fp) != 0) return -1; 00391 available = fp->block_length - fp->block_offset; 00392 if (available <= 0) break; 00393 } 00394 copy_length = length - bytes_read < available? length - bytes_read : available; 00395 buffer = fp->uncompressed_block; 00396 memcpy(output, buffer + fp->block_offset, copy_length); 00397 fp->block_offset += copy_length; 00398 output += copy_length; 00399 bytes_read += copy_length; 00400 } 00401 if (fp->block_offset == fp->block_length) { 00402 fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp); 00403 fp->block_offset = fp->block_length = 0; 00404 } 00405 return bytes_read; 00406 } 00407 00408 int bgzf_flush(BGZF *fp) 00409 { 00410 assert(fp->open_mode == 'w'); 00411 while (fp->block_offset > 0) { 00412 int block_length; 00413 block_length = deflate_block(fp, fp->block_offset); 00414 if (block_length < 0) return -1; 00415 if (fwrite(fp->compressed_block, 1, block_length, fp->fp) != block_length) { 00416 fp->errcode |= BGZF_ERR_IO; // possibly truncated file 00417 return -1; 00418 } 00419 fp->block_address += block_length; 00420 } 00421 return 0; 00422 } 00423 00424 int bgzf_flush_try(BGZF *fp, ssize_t size) 00425 { 00426 if (fp->block_offset + size > BGZF_BLOCK_SIZE) 00427 return bgzf_flush(fp); 00428 return -1; 00429 } 00430 00431 ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length) 00432 { 00433 const uint8_t *input = data; 00434 int block_length = BGZF_BLOCK_SIZE, bytes_written; 00435 assert(fp->open_mode == 'w'); 00436 input = data; 00437 bytes_written = 0; 00438 while (bytes_written < length) { 00439 uint8_t* buffer = fp->uncompressed_block; 00440 int copy_length = block_length - fp->block_offset < length - bytes_written? block_length - fp->block_offset : length - bytes_written; 00441 memcpy(buffer + fp->block_offset, input, copy_length); 00442 fp->block_offset += copy_length; 00443 input += copy_length; 00444 bytes_written += copy_length; 00445 if (fp->block_offset == block_length && bgzf_flush(fp)) break; 00446 } 00447 return bytes_written; 00448 } 00449 00450 int bgzf_close(BGZF* fp) 00451 { 00452 int ret, count, block_length; 00453 if (fp == 0) return -1; 00454 if (fp->open_mode == 'w') { 00455 if (bgzf_flush(fp) != 0) return -1; 00456 block_length = deflate_block(fp, 0); // write an empty block 00457 count = fwrite(fp->compressed_block, 1, block_length, fp->fp); 00458 if(count != 0) 00459 { 00460 // Something was written 00461 } 00462 if (fflush(fp->fp) != 0) { 00463 fp->errcode |= BGZF_ERR_IO; 00464 return -1; 00465 } 00466 } 00467 ret = fp->open_mode == 'w'? fclose(fp->fp) : _bgzf_close(fp->fp); 00468 if (ret != 0) return -1; 00469 free(fp->uncompressed_block); 00470 free(fp->compressed_block); 00471 free_cache(fp); 00472 free(fp); 00473 return 0; 00474 } 00475 00476 void bgzf_set_cache_size(BGZF *fp, int cache_size) 00477 { 00478 if (fp) fp->cache_size = cache_size; 00479 } 00480 00481 int bgzf_check_EOF(BGZF *fp) 00482 { 00483 static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0"; 00484 // Last 28 bytes of an uncompressed bgzf file which are different 00485 // from the last 28 bytes of compressed bgzf files. 00486 static uint8_t magic2[28] = "\4\0\0\0\0\0\377\6\0\102\103\2\0\036\0\1\0\0\377\377\0\0\0\0\0\0\0\0"; 00487 uint8_t buf[28]; 00488 off_t offset; 00489 offset = _bgzf_tell((_bgzf_file_t)fp->fp); 00490 if (_bgzf_seek(fp->fp, -28, SEEK_END) < 0) return 0; 00491 int count = _bgzf_read(fp->fp, buf, 28); 00492 if(count != 28) 00493 { 00494 fp->errcode |= BGZF_ERR_IO; // possibly truncated file 00495 return(0); 00496 } 00497 _bgzf_seek(fp->fp, offset, SEEK_SET); 00498 if((memcmp(magic, buf, 28) == 0) || (memcmp(magic2, buf, 28) == 0)) 00499 { 00500 return(1); 00501 } 00502 return(0); 00503 } 00504 00505 int64_t bgzf_seek(BGZF* fp, int64_t pos, int where) 00506 { 00507 int block_offset; 00508 int64_t block_address; 00509 00510 if (fp->open_mode != 'r' || where != SEEK_SET) { 00511 fp->errcode |= BGZF_ERR_MISUSE; 00512 return -1; 00513 } 00514 block_offset = pos & 0xFFFF; 00515 block_address = pos >> 16; 00516 if (_bgzf_seek(fp->fp, block_address, SEEK_SET) < 0) { 00517 fp->errcode |= BGZF_ERR_IO; 00518 return -1; 00519 } 00520 fp->block_length = 0; // indicates current block has not been loaded 00521 fp->block_address = block_address; 00522 fp->block_offset = block_offset; 00523 return 0; 00524 } 00525 00526 int bgzf_is_bgzf(const char *fn) 00527 { 00528 uint8_t buf[16]; 00529 int n; 00530 _bgzf_file_t fp; 00531 if ((fp = _bgzf_open(fn, "r")) == 0) return 0; 00532 n = _bgzf_read(fp, buf, 16); 00533 _bgzf_close(fp); 00534 if (n != 16) return 0; 00535 return memcmp(g_magic, buf, 16) == 0? 1 : 0; 00536 } 00537 00538 int bgzf_getc(BGZF *fp) 00539 { 00540 int c; 00541 if (fp->block_offset >= fp->block_length) { 00542 if (bgzf_read_block(fp) != 0) return -2; /* error */ 00543 if (fp->block_length == 0) return -1; /* end-of-file */ 00544 } 00545 c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; 00546 if (fp->block_offset == fp->block_length) { 00547 fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp); 00548 fp->block_offset = 0; 00549 fp->block_length = 0; 00550 } 00551 return c; 00552 } 00553 00554 #ifndef kroundup32 00555 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) 00556 #endif 00557 00558 int bgzf_getline(BGZF *fp, int delim, kstring_t *str) 00559 { 00560 int l, state = 0; 00561 unsigned char *buf = (unsigned char*)fp->uncompressed_block; 00562 str->l = 0; 00563 do { 00564 if (fp->block_offset >= fp->block_length) { 00565 if (bgzf_read_block(fp) != 0) { state = -2; break; } 00566 if (fp->block_length == 0) { state = -1; break; } 00567 } 00568 for (l = fp->block_offset; l < fp->block_length && buf[l] != delim; ++l); 00569 if (l < fp->block_length) state = 1; 00570 l -= fp->block_offset; 00571 if (str->l + l + 1 >= str->m) { 00572 str->m = str->l + l + 2; 00573 kroundup32(str->m); 00574 str->s = (char*)realloc(str->s, str->m); 00575 } 00576 memcpy(str->s + str->l, buf + fp->block_offset, l); 00577 str->l += l; 00578 fp->block_offset += l + 1; 00579 if (fp->block_offset >= fp->block_length) { 00580 fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp); 00581 fp->block_offset = 0; 00582 fp->block_length = 0; 00583 } 00584 } while (state == 0); 00585 if (str->l == 0 && state < 0) return state; 00586 str->s[str->l] = 0; 00587 return str->l; 00588 } 00589 #endif