bgzf.c

00001 /* The MIT License
00002 
00003    Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
00004 
00005    Permission is hereby granted, free of charge, to any person obtaining a copy
00006    of this software and associated documentation files (the "Software"), to deal
00007    in the Software without restriction, including without limitation the rights
00008    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00009    copies of the Software, and to permit persons to whom the Software is
00010    furnished to do so, subject to the following conditions:
00011 
00012    The above copyright notice and this permission notice shall be included in
00013    all copies or substantial portions of the Software.
00014 
00015    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00016    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00017    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00018    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00019    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00020    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00021    THE SOFTWARE.
00022 */
00023 
00024 /*
00025   2009-06-29 by lh3: cache recent uncompressed blocks.
00026   2009-06-25 by lh3: optionally use my knetfile library to access file on a FTP.
00027   2009-06-12 by lh3: support a mode string like "wu" where 'u' for uncompressed output */
00028 
00029 #include <stdio.h>
00030 #include <stdlib.h>
00031 #include <string.h>
00032 #include <unistd.h>
00033 #include <fcntl.h>
00034 #include <sys/types.h>
00035 #include <sys/stat.h>
00036 #include "bgzf.h"
00037 
00038 #include "khash.h"
00039 typedef struct {
00040     int size;
00041     uint8_t *block;
00042     int64_t end_offset;
00043 } cache_t;
00044 KHASH_MAP_INIT_INT64(cache, cache_t)
00045 
00046 #if defined(_WIN32) || defined(_MSC_VER)
00047 #define ftello(fp) ftell(fp)
00048 #define fseeko(fp, offset, whence) fseek(fp, offset, whence)
00049 #else
00050 extern off_t ftello(FILE *stream);
00051 extern int fseeko(FILE *stream, off_t offset, int whence);
00052 #endif
00053 
00054 typedef int8_t bgzf_byte_t;
00055 
00056 static const int DEFAULT_BLOCK_SIZE = 64 * 1024;
00057 static const int MAX_BLOCK_SIZE = 64 * 1024;
00058 
00059 static const int BLOCK_HEADER_LENGTH = 18;
00060 static const int BLOCK_FOOTER_LENGTH = 8;
00061 
00062 static const int GZIP_ID1 = 31;
00063 static const int GZIP_ID2 = 139;
00064 static const int CM_DEFLATE = 8;
00065 static const int FLG_FEXTRA = 4;
00066 static const int OS_UNKNOWN = 255;
00067 static const int BGZF_ID1 = 66; // 'B'
00068 static const int BGZF_ID2 = 67; // 'C'
00069 static const int BGZF_LEN = 2;
00070 static const int BGZF_XLEN = 6; // BGZF_LEN+4
00071 
00072 static const int GZIP_WINDOW_BITS = -15; // no zlib header
00073 static const int Z_DEFAULT_MEM_LEVEL = 8;
00074 
00075 
00076 inline
00077 void
00078 packInt16(uint8_t* buffer, uint16_t value)
00079 {
00080     buffer[0] = value;
00081     buffer[1] = value >> 8;
00082 }
00083 
00084 inline
00085 int
00086 unpackInt16(const uint8_t* buffer)
00087 {
00088     return (buffer[0] | (buffer[1] << 8));
00089 }
00090 
00091 inline
00092 void
00093 packInt32(uint8_t* buffer, uint32_t value)
00094 {
00095     buffer[0] = value;
00096     buffer[1] = value >> 8;
00097     buffer[2] = value >> 16;
00098     buffer[3] = value >> 24;
00099 }
00100 
00101 static inline
00102 int
00103 bgzf_min(int x, int y)
00104 {
00105     return (x < y) ? x : y;
00106 }
00107 
00108 static
00109 void
00110 report_error(BGZF* fp, const char* message) {
00111     fp->error = message;
00112 }
00113 
00114 int bgzf_check_bgzf(const char *fn)
00115 {
00116     BGZF *fp;
00117     uint8_t buf[10],magic[10]="\037\213\010\4\0\0\0\0\0\377";
00118     int n;
00119 
00120     if ((fp = bgzf_open(fn, "r")) == 0) 
00121     {
00122         fprintf(stderr, "[bgzf_check_bgzf] failed to open the file: %s\n",fn);
00123         return -1;
00124     }
00125 
00126 #ifdef _USE_KNETFILE
00127     n = knet_read(fp->x.fpr, buf, 10);
00128 #else
00129     n = fread(buf, 1, 10, fp->file);
00130 #endif
00131     bgzf_close(fp);
00132 
00133     if ( n!=10 ) 
00134         return -1;
00135 
00136     if ( !memcmp(magic, buf, 10) ) return 1;
00137     return 0;
00138 }
00139 
00140 static BGZF *bgzf_read_init()
00141 {
00142     BGZF *fp;
00143     fp = calloc(1, sizeof(BGZF));
00144     fp->uncompressed_block_size = MAX_BLOCK_SIZE;
00145     fp->uncompressed_block = malloc(MAX_BLOCK_SIZE);
00146     fp->compressed_block_size = MAX_BLOCK_SIZE;
00147     fp->compressed_block = malloc(MAX_BLOCK_SIZE);
00148     fp->cache_size = 0;
00149     fp->cache = kh_init(cache);
00150     return fp;
00151 }
00152 
00153 static
00154 BGZF*
00155 open_read(int fd)
00156 {
00157 #ifdef _USE_KNETFILE
00158     knetFile *file = knet_dopen(fd, "r");
00159 #else
00160     FILE* file = fdopen(fd, "r");
00161 #endif
00162     BGZF* fp;
00163     if (file == 0) return 0;
00164     fp = bgzf_read_init();
00165     fp->file_descriptor = fd;
00166     fp->open_mode = 'r';
00167 #ifdef _USE_KNETFILE
00168     fp->x.fpr = file;
00169 #else
00170     fp->file = file;
00171 #endif
00172     return fp;
00173 }
00174 
00175 static
00176 BGZF*
00177 open_write(int fd, int compress_level) // compress_level==-1 for the default level
00178 {
00179     FILE* file = fdopen(fd, "w");
00180     BGZF* fp;
00181     if (file == 0) return 0;
00182     fp = malloc(sizeof(BGZF));
00183     fp->file_descriptor = fd;
00184     fp->open_mode = 'w';
00185     fp->owned_file = 0;
00186     fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1
00187     if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION;
00188 #ifdef _USE_KNETFILE
00189     fp->x.fpw = file;
00190 #else
00191     fp->file = file;
00192 #endif
00193     fp->uncompressed_block_size = DEFAULT_BLOCK_SIZE;
00194     fp->uncompressed_block = NULL;
00195     fp->compressed_block_size = MAX_BLOCK_SIZE;
00196     fp->compressed_block = malloc(MAX_BLOCK_SIZE);
00197     fp->block_address = 0;
00198     fp->block_offset = 0;
00199     fp->block_length = 0;
00200     fp->error = NULL;
00201     return fp;
00202 }
00203 
00204 BGZF*
00205 bgzf_open(const char* __restrict path, const char* __restrict mode)
00206 {
00207     BGZF* fp = NULL;
00208     if (strchr(mode, 'r') || strchr(mode, 'R')) { /* The reading mode is preferred. */
00209 #ifdef _USE_KNETFILE
00210         knetFile *file = knet_open(path, mode);
00211         if (file == 0) return 0;
00212         fp = bgzf_read_init();
00213         fp->file_descriptor = -1;
00214         fp->open_mode = 'r';
00215         fp->x.fpr = file;
00216 #else
00217         int fd, oflag = O_RDONLY;
00218 #ifdef _WIN32
00219         oflag |= O_BINARY;
00220 #endif
00221         fd = open(path, oflag);
00222         if (fd == -1) return 0;
00223         fp = open_read(fd);
00224 #endif
00225     } else if (strchr(mode, 'w') || strchr(mode, 'W')) {
00226         int fd, compress_level = -1, oflag = O_WRONLY | O_CREAT | O_TRUNC;
00227 #ifdef _WIN32
00228         oflag |= O_BINARY;
00229 #endif
00230         fd = open(path, oflag, 0666);
00231         if (fd == -1) return 0;
00232         { // set compress_level
00233             int i;
00234             for (i = 0; mode[i]; ++i)
00235                 if (mode[i] >= '0' && mode[i] <= '9') break;
00236             if (mode[i]) compress_level = (int)mode[i] - '0';
00237             if (strchr(mode, 'u')) compress_level = 0;
00238         }
00239         fp = open_write(fd, compress_level);
00240     }
00241     if (fp != NULL) fp->owned_file = 1;
00242     return fp;
00243 }
00244 
00245 BGZF*
00246 bgzf_fdopen(int fd, const char * __restrict mode)
00247 {
00248     if (fd == -1) return 0;
00249     if (mode[0] == 'r' || mode[0] == 'R') {
00250         return open_read(fd);
00251     } else if (mode[0] == 'w' || mode[0] == 'W') {
00252         int i, compress_level = -1;
00253         for (i = 0; mode[i]; ++i)
00254             if (mode[i] >= '0' && mode[i] <= '9') break;
00255         if (mode[i]) compress_level = (int)mode[i] - '0';
00256         if (strchr(mode, 'u')) compress_level = 0;
00257         return open_write(fd, compress_level);
00258     } else {
00259         return NULL;
00260     }
00261 }
00262 
00263 static
00264 int
00265 deflate_block(BGZF* fp, int block_length)
00266 {
00267     // Deflate the block in fp->uncompressed_block into fp->compressed_block.
00268     // Also adds an extra field that stores the compressed block length.
00269 
00270     bgzf_byte_t* buffer = fp->compressed_block;
00271     int buffer_size = fp->compressed_block_size;
00272 
00273     // Init gzip header
00274     buffer[0] = GZIP_ID1;
00275     buffer[1] = GZIP_ID2;
00276     buffer[2] = CM_DEFLATE;
00277     buffer[3] = FLG_FEXTRA;
00278     buffer[4] = 0; // mtime
00279     buffer[5] = 0;
00280     buffer[6] = 0;
00281     buffer[7] = 0;
00282     buffer[8] = 0;
00283     buffer[9] = OS_UNKNOWN;
00284     buffer[10] = BGZF_XLEN;
00285     buffer[11] = 0;
00286     buffer[12] = BGZF_ID1;
00287     buffer[13] = BGZF_ID2;
00288     buffer[14] = BGZF_LEN;
00289     buffer[15] = 0;
00290     buffer[16] = 0; // placeholder for block length
00291     buffer[17] = 0;
00292 
00293     // loop to retry for blocks that do not compress enough
00294     int input_length = block_length;
00295     int compressed_length = 0;
00296     while (1) {
00297         z_stream zs;
00298         zs.zalloc = NULL;
00299         zs.zfree = NULL;
00300         zs.next_in = fp->uncompressed_block;
00301         zs.avail_in = input_length;
00302         zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH];
00303         zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
00304 
00305         int status = deflateInit2(&zs, fp->compress_level, Z_DEFLATED,
00306                                   GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
00307         if (status != Z_OK) {
00308             report_error(fp, "deflate init failed");
00309             return -1;
00310         }
00311         status = deflate(&zs, Z_FINISH);
00312         if (status != Z_STREAM_END) {
00313             deflateEnd(&zs);
00314             if (status == Z_OK) {
00315                 // Not enough space in buffer.
00316                 // Can happen in the rare case the input doesn't compress enough.
00317                 // Reduce the amount of input until it fits.
00318                 input_length -= 1024;
00319                 if (input_length <= 0) {
00320                     // should never happen
00321                     report_error(fp, "input reduction failed");
00322                     return -1;
00323                 }
00324                 continue;
00325             }
00326             report_error(fp, "deflate failed");
00327             return -1;
00328         }
00329         status = deflateEnd(&zs);
00330         if (status != Z_OK) {
00331             report_error(fp, "deflate end failed");
00332             return -1;
00333         }
00334         compressed_length = zs.total_out;
00335         compressed_length += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
00336         if (compressed_length > MAX_BLOCK_SIZE) {
00337             // should never happen
00338             report_error(fp, "deflate overflow");
00339             return -1;
00340         }
00341         break;
00342     }
00343 
00344     packInt16((uint8_t*)&buffer[16], compressed_length-1);
00345     uint32_t crc = crc32(0L, NULL, 0L);
00346     crc = crc32(crc, fp->uncompressed_block, input_length);
00347     packInt32((uint8_t*)&buffer[compressed_length-8], crc);
00348     packInt32((uint8_t*)&buffer[compressed_length-4], input_length);
00349 
00350     int remaining = block_length - input_length;
00351     if (remaining > 0) {
00352         if (remaining > input_length) {
00353             // should never happen (check so we can use memcpy)
00354             report_error(fp, "remainder too large");
00355             return -1;
00356         }
00357         memcpy(fp->uncompressed_block,
00358                fp->uncompressed_block + input_length,
00359                remaining);
00360     }
00361     fp->block_offset = remaining;
00362     return compressed_length;
00363 }
00364 
00365 static
00366 int
00367 inflate_block(BGZF* fp, int block_length)
00368 {
00369     // Inflate the block in fp->compressed_block into fp->uncompressed_block
00370 
00371     z_stream zs;
00372     int status;
00373     zs.zalloc = NULL;
00374     zs.zfree = NULL;
00375     zs.next_in = fp->compressed_block + 18;
00376     zs.avail_in = block_length - 16;
00377     zs.next_out = fp->uncompressed_block;
00378     zs.avail_out = fp->uncompressed_block_size;
00379 
00380     status = inflateInit2(&zs, GZIP_WINDOW_BITS);
00381     if (status != Z_OK) {
00382         report_error(fp, "inflate init failed");
00383         return -1;
00384     }
00385     status = inflate(&zs, Z_FINISH);
00386     if (status != Z_STREAM_END) {
00387         inflateEnd(&zs);
00388         report_error(fp, "inflate failed");
00389         return -1;
00390     }
00391     status = inflateEnd(&zs);
00392     if (status != Z_OK) {
00393         report_error(fp, "inflate failed");
00394         return -1;
00395     }
00396     return zs.total_out;
00397 }
00398 
00399 static
00400 int
00401 check_header(const bgzf_byte_t* header)
00402 {
00403     return (header[0] == GZIP_ID1 &&
00404             header[1] == (bgzf_byte_t) GZIP_ID2 &&
00405             header[2] == Z_DEFLATED &&
00406             (header[3] & FLG_FEXTRA) != 0 &&
00407             unpackInt16((uint8_t*)&header[10]) == BGZF_XLEN &&
00408             header[12] == BGZF_ID1 &&
00409             header[13] == BGZF_ID2 &&
00410             unpackInt16((uint8_t*)&header[14]) == BGZF_LEN);
00411 }
00412 
00413 static void free_cache(BGZF *fp)
00414 {
00415     khint_t k;
00416     khash_t(cache) *h = (khash_t(cache)*)fp->cache;
00417     if (fp->open_mode != 'r') return;
00418     for (k = kh_begin(h); k < kh_end(h); ++k)
00419         if (kh_exist(h, k)) free(kh_val(h, k).block);
00420     kh_destroy(cache, h);
00421 }
00422 
00423 static int load_block_from_cache(BGZF *fp, int64_t block_address)
00424 {
00425     khint_t k;
00426     cache_t *p;
00427     khash_t(cache) *h = (khash_t(cache)*)fp->cache;
00428     k = kh_get(cache, h, block_address);
00429     if (k == kh_end(h)) return 0;
00430     p = &kh_val(h, k);
00431     if (fp->block_length != 0) fp->block_offset = 0;
00432     fp->block_address = block_address;
00433     fp->block_length = p->size;
00434     memcpy(fp->uncompressed_block, p->block, MAX_BLOCK_SIZE);
00435 #ifdef _USE_KNETFILE
00436     knet_seek(fp->x.fpr, p->end_offset, SEEK_SET);
00437 #else
00438     fseeko(fp->file, p->end_offset, SEEK_SET);
00439 #endif
00440     return p->size;
00441 }
00442 
00443 static void cache_block(BGZF *fp, int size)
00444 {
00445     int ret;
00446     khint_t k;
00447     cache_t *p;
00448     khash_t(cache) *h = (khash_t(cache)*)fp->cache;
00449     if (MAX_BLOCK_SIZE >= fp->cache_size) return;
00450     if ((kh_size(h) + 1) * MAX_BLOCK_SIZE > fp->cache_size) {
00451         /* A better way would be to remove the oldest block in the
00452          * cache, but here we remove a random one for simplicity. This
00453          * should not have a big impact on performance. */
00454         for (k = kh_begin(h); k < kh_end(h); ++k)
00455             if (kh_exist(h, k)) break;
00456         if (k < kh_end(h)) {
00457             free(kh_val(h, k).block);
00458             kh_del(cache, h, k);
00459         }
00460     }
00461     k = kh_put(cache, h, fp->block_address, &ret);
00462     if (ret == 0) return; // if this happens, a bug!
00463     p = &kh_val(h, k);
00464     p->size = fp->block_length;
00465     p->end_offset = fp->block_address + size;
00466     p->block = malloc(MAX_BLOCK_SIZE);
00467     memcpy(kh_val(h, k).block, fp->uncompressed_block, MAX_BLOCK_SIZE);
00468 }
00469 
00470 int
00471 bgzf_read_block(BGZF* fp)
00472 {
00473     bgzf_byte_t header[BLOCK_HEADER_LENGTH];
00474     int count, size = 0, block_length, remaining;
00475 #ifdef _USE_KNETFILE
00476     int64_t block_address = knet_tell(fp->x.fpr);
00477     if (load_block_from_cache(fp, block_address)) return 0;
00478     count = knet_read(fp->x.fpr, header, sizeof(header));
00479 #else
00480     int64_t block_address = ftello(fp->file);
00481     if (load_block_from_cache(fp, block_address)) return 0;
00482     count = fread(header, 1, sizeof(header), fp->file);
00483 #endif
00484     if (count == 0) {
00485         fp->block_length = 0;
00486         return 0;
00487     }
00488     size = count;
00489     if (count != sizeof(header)) {
00490         report_error(fp, "read failed");
00491         return -1;
00492     }
00493     if (!check_header(header)) {
00494         report_error(fp, "invalid block header");
00495         return -1;
00496     }
00497     block_length = unpackInt16((uint8_t*)&header[16]) + 1;
00498     bgzf_byte_t* compressed_block = (bgzf_byte_t*) fp->compressed_block;
00499     memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
00500     remaining = block_length - BLOCK_HEADER_LENGTH;
00501 #ifdef _USE_KNETFILE
00502     count = knet_read(fp->x.fpr, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
00503 #else
00504     count = fread(&compressed_block[BLOCK_HEADER_LENGTH], 1, remaining, fp->file);
00505 #endif
00506     if (count != remaining) {
00507         report_error(fp, "read failed");
00508         return -1;
00509     }
00510     size += count;
00511     count = inflate_block(fp, block_length);
00512     if (count < 0) return -1;
00513     if (fp->block_length != 0) {
00514         // Do not reset offset if this read follows a seek.
00515         fp->block_offset = 0;
00516     }
00517     fp->block_address = block_address;
00518     fp->block_length = count;
00519     cache_block(fp, size);
00520     return 0;
00521 }
00522 
00523 int
00524 bgzf_read(BGZF* fp, void* data, int length)
00525 {
00526     if (length <= 0) {
00527         return 0;
00528     }
00529     if (fp->open_mode != 'r') {
00530         report_error(fp, "file not open for reading");
00531         return -1;
00532     }
00533 
00534     int bytes_read = 0;
00535     bgzf_byte_t* output = data;
00536     while (bytes_read < length) {
00537         int copy_length, available = fp->block_length - fp->block_offset;
00538         bgzf_byte_t *buffer;
00539         if (available <= 0) {
00540             if (bgzf_read_block(fp) != 0) {
00541                 return -1;
00542             }
00543             available = fp->block_length - fp->block_offset;
00544             if (available <= 0) {
00545                 break;
00546             }
00547         }
00548         copy_length = bgzf_min(length-bytes_read, available);
00549         buffer = fp->uncompressed_block;
00550         memcpy(output, buffer + fp->block_offset, copy_length);
00551         fp->block_offset += copy_length;
00552         output += copy_length;
00553         bytes_read += copy_length;
00554     }
00555     if (fp->block_offset == fp->block_length) {
00556 #ifdef _USE_KNETFILE
00557         fp->block_address = knet_tell(fp->x.fpr);
00558 #else
00559         fp->block_address = ftello(fp->file);
00560 #endif
00561         fp->block_offset = 0;
00562         fp->block_length = 0;
00563     }
00564     return bytes_read;
00565 }
00566 
00567 int bgzf_flush(BGZF* fp)
00568 {
00569     while (fp->block_offset > 0) {
00570         int count, block_length;
00571         block_length = deflate_block(fp, fp->block_offset);
00572         if (block_length < 0) return -1;
00573 #ifdef _USE_KNETFILE
00574         count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
00575 #else
00576         count = fwrite(fp->compressed_block, 1, block_length, fp->file);
00577 #endif
00578         if (count != block_length) {
00579             report_error(fp, "write failed");
00580             return -1;
00581         }
00582         fp->block_address += block_length;
00583     }
00584     return 0;
00585 }
00586 
00587 int bgzf_flush_try(BGZF *fp, int size)
00588 {
00589     if (fp->block_offset + size > fp->uncompressed_block_size)
00590         return bgzf_flush(fp);
00591     return -1;
00592 }
00593 
00594 int bgzf_write(BGZF* fp, const void* data, int length)
00595 {
00596     const bgzf_byte_t *input = data;
00597     int block_length, bytes_written;
00598     if (fp->open_mode != 'w') {
00599         report_error(fp, "file not open for writing");
00600         return -1;
00601     }
00602 
00603     if (fp->uncompressed_block == NULL)
00604         fp->uncompressed_block = malloc(fp->uncompressed_block_size);
00605 
00606     input = data;
00607     block_length = fp->uncompressed_block_size;
00608     bytes_written = 0;
00609     while (bytes_written < length) {
00610         int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written);
00611         bgzf_byte_t* buffer = fp->uncompressed_block;
00612         memcpy(buffer + fp->block_offset, input, copy_length);
00613         fp->block_offset += copy_length;
00614         input += copy_length;
00615         bytes_written += copy_length;
00616         if (fp->block_offset == block_length) {
00617             if (bgzf_flush(fp) != 0) {
00618                 break;
00619             }
00620         }
00621     }
00622     return bytes_written;
00623 }
00624 
00625 int bgzf_close(BGZF* fp)
00626 {
00627     if (fp->open_mode == 'w') {
00628         if (bgzf_flush(fp) != 0) return -1;
00629         { // add an empty block
00630             int count, block_length = deflate_block(fp, 0);
00631 #ifdef _USE_KNETFILE
00632             count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
00633 #else
00634             count = fwrite(fp->compressed_block, 1, block_length, fp->file);
00635 #endif
00636         }
00637 #ifdef _USE_KNETFILE
00638         if (fflush(fp->x.fpw) != 0) {
00639 #else
00640         if (fflush(fp->file) != 0) {
00641 #endif
00642             report_error(fp, "flush failed");
00643             return -1;
00644         }
00645     }
00646     if (fp->owned_file) {
00647 #ifdef _USE_KNETFILE
00648         int ret;
00649         if (fp->open_mode == 'w') ret = fclose(fp->x.fpw);
00650         else ret = knet_close(fp->x.fpr);
00651         if (ret != 0) return -1;
00652 #else
00653         if (fclose(fp->file) != 0) return -1;
00654 #endif
00655     }
00656     free(fp->uncompressed_block);
00657     free(fp->compressed_block);
00658     free_cache(fp);
00659     free(fp);
00660     return 0;
00661 }
00662 
00663 void bgzf_set_cache_size(BGZF *fp, int cache_size)
00664 {
00665     if (fp) fp->cache_size = cache_size;
00666 }
00667 
00668 int bgzf_check_EOF(BGZF *fp)
00669 {
00670     static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0";
00671         // Last 28 bytes of an uncompressed bgzf file which are different
00672         // from the last 28 bytes of compressed bgzf files.
00673     static uint8_t magic2[28] = "\4\0\0\0\0\0\377\6\0\102\103\2\0\036\0\1\0\0\377\377\0\0\0\0\0\0\0\0";
00674     uint8_t buf[28];
00675     off_t offset;
00676 #ifdef _USE_KNETFILE
00677     offset = knet_tell(fp->x.fpr);
00678     if (knet_seek(fp->x.fpr, -28, SEEK_END) != 0) return -1;
00679     knet_read(fp->x.fpr, buf, 28);
00680     knet_seek(fp->x.fpr, offset, SEEK_SET);
00681 #else
00682     offset = ftello(fp->file);
00683     if (fseeko(fp->file, -28, SEEK_END) != 0) return -1;
00684     fread(buf, 1, 28, fp->file);
00685     fseeko(fp->file, offset, SEEK_SET);
00686 #endif
00687     if((memcmp(magic, buf, 28) == 0) || (memcmp(magic2, buf, 28) == 0))
00688         {
00689             return(1);
00690         }
00691         return(0);
00692 }
00693 
00694 int64_t bgzf_seek(BGZF* fp, int64_t pos, int where)
00695 {
00696     int block_offset;
00697     int64_t block_address;
00698 
00699     if (fp->open_mode != 'r') {
00700         report_error(fp, "file not open for read");
00701         return -1;
00702     }
00703     if (where != SEEK_SET) {
00704         report_error(fp, "unimplemented seek option");
00705         return -1;
00706     }
00707     block_offset = pos & 0xFFFF;
00708     block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;
00709 #ifdef _USE_KNETFILE
00710     if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) {
00711 #else
00712     if (fseeko(fp->file, block_address, SEEK_SET) != 0) {
00713 #endif
00714         report_error(fp, "seek failed");
00715         return -1;
00716     }
00717     fp->block_length = 0;  // indicates current block is not loaded
00718     fp->block_address = block_address;
00719     fp->block_offset = block_offset;
00720     return 0;
00721 }
Generated on Tue Aug 23 18:19:06 2011 for libStatGen Software by  doxygen 1.6.3