bgzf.c

00001 /* The MIT License
00002 
00003    Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
00004 
00005    Permission is hereby granted, free of charge, to any person obtaining a copy
00006    of this software and associated documentation files (the "Software"), to deal
00007    in the Software without restriction, including without limitation the rights
00008    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00009    copies of the Software, and to permit persons to whom the Software is
00010    furnished to do so, subject to the following conditions:
00011 
00012    The above copyright notice and this permission notice shall be included in
00013    all copies or substantial portions of the Software.
00014 
00015    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00016    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00017    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00018    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00019    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00020    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00021    THE SOFTWARE.
00022 */
00023 
00024 /*
00025   2009-06-29 by lh3: cache recent uncompressed blocks.
00026   2009-06-25 by lh3: optionally use my knetfile library to access file on a FTP.
00027   2009-06-12 by lh3: support a mode string like "wu" where 'u' for uncompressed output */
00028 
00029 #ifdef __ZLIB_AVAILABLE__
00030 #include <stdio.h>
00031 #include <stdlib.h>
00032 #include <string.h>
00033 #include <unistd.h>
00034 #include <fcntl.h>
00035 #include <sys/types.h>
00036 #include <sys/stat.h>
00037 #include "bgzf.h"
00038 
00039 #include "khash.h"
00040 typedef struct {
00041     int size;
00042     uint8_t *block;
00043     int64_t end_offset;
00044 } cache_t;
00045 KHASH_MAP_INIT_INT64(cache, cache_t)
00046 
00047 #if defined(_WIN32) || defined(_MSC_VER)
00048 #define ftello(fp) ftell(fp)
00049 #define fseeko(fp, offset, whence) fseek(fp, offset, whence)
00050 #else
00051 extern off_t ftello(FILE *stream);
00052 extern int fseeko(FILE *stream, off_t offset, int whence);
00053 #endif
00054 
00055 typedef int8_t bgzf_byte_t;
00056 
00057 static const int DEFAULT_BLOCK_SIZE = 64 * 1024;
00058 static const int MAX_BLOCK_SIZE = 64 * 1024;
00059 
00060 static const int BLOCK_HEADER_LENGTH = 18;
00061 static const int BLOCK_FOOTER_LENGTH = 8;
00062 
00063 static const int GZIP_ID1 = 31;
00064 static const int GZIP_ID2 = 139;
00065 static const int CM_DEFLATE = 8;
00066 static const int FLG_FEXTRA = 4;
00067 static const int OS_UNKNOWN = 255;
00068 static const int BGZF_ID1 = 66; // 'B'
00069 static const int BGZF_ID2 = 67; // 'C'
00070 static const int BGZF_LEN = 2;
00071 static const int BGZF_XLEN = 6; // BGZF_LEN+4
00072 
00073 static const int GZIP_WINDOW_BITS = -15; // no zlib header
00074 static const int Z_DEFAULT_MEM_LEVEL = 8;
00075 
00076 
00077 inline
00078 void
00079 packInt16(uint8_t* buffer, uint16_t value)
00080 {
00081     buffer[0] = value;
00082     buffer[1] = value >> 8;
00083 }
00084 
00085 inline
00086 int
00087 unpackInt16(const uint8_t* buffer)
00088 {
00089     return (buffer[0] | (buffer[1] << 8));
00090 }
00091 
00092 inline
00093 void
00094 packInt32(uint8_t* buffer, uint32_t value)
00095 {
00096     buffer[0] = value;
00097     buffer[1] = value >> 8;
00098     buffer[2] = value >> 16;
00099     buffer[3] = value >> 24;
00100 }
00101 
00102 static inline
00103 int
00104 bgzf_min(int x, int y)
00105 {
00106     return (x < y) ? x : y;
00107 }
00108 
00109 static
00110 void
00111 report_error(BGZF* fp, const char* message) {
00112     fp->error = message;
00113 }
00114 
00115 int bgzf_check_bgzf(const char *fn)
00116 {
00117     BGZF *fp;
00118     uint8_t buf[10],magic[10]="\037\213\010\4\0\0\0\0\0\377";
00119     int n;
00120 
00121     if ((fp = bgzf_open(fn, "r")) == 0) 
00122     {
00123         fprintf(stderr, "[bgzf_check_bgzf] failed to open the file: %s\n",fn);
00124         return -1;
00125     }
00126 
00127 #ifdef _USE_KNETFILE
00128     n = knet_read(fp->x.fpr, buf, 10);
00129 #else
00130     n = fread(buf, 1, 10, fp->file);
00131 #endif
00132     bgzf_close(fp);
00133 
00134     if ( n!=10 ) 
00135         return -1;
00136 
00137     if ( !memcmp(magic, buf, 10) ) return 1;
00138     return 0;
00139 }
00140 
00141 static BGZF *bgzf_read_init()
00142 {
00143     BGZF *fp;
00144     fp = calloc(1, sizeof(BGZF));
00145     fp->uncompressed_block_size = MAX_BLOCK_SIZE;
00146     fp->uncompressed_block = malloc(MAX_BLOCK_SIZE);
00147     fp->compressed_block_size = MAX_BLOCK_SIZE;
00148     fp->compressed_block = malloc(MAX_BLOCK_SIZE);
00149     fp->cache_size = 0;
00150     fp->cache = kh_init(cache);
00151     return fp;
00152 }
00153 
00154 static
00155 BGZF*
00156 open_read(int fd)
00157 {
00158 #ifdef _USE_KNETFILE
00159     knetFile *file = knet_dopen(fd, "r");
00160 #else
00161     FILE* file = fdopen(fd, "r");
00162 #endif
00163     BGZF* fp;
00164     if (file == 0) return 0;
00165     fp = bgzf_read_init();
00166     fp->file_descriptor = fd;
00167     fp->open_mode = 'r';
00168 #ifdef _USE_KNETFILE
00169     fp->x.fpr = file;
00170 #else
00171     fp->file = file;
00172 #endif
00173     return fp;
00174 }
00175 
00176 static
00177 BGZF*
00178 open_write(int fd, int compress_level) // compress_level==-1 for the default level
00179 {
00180     FILE* file = fdopen(fd, "w");
00181     BGZF* fp;
00182     if (file == 0) return 0;
00183     fp = malloc(sizeof(BGZF));
00184     fp->file_descriptor = fd;
00185     fp->open_mode = 'w';
00186     fp->owned_file = 0;
00187     fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1
00188     if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION;
00189 #ifdef _USE_KNETFILE
00190     fp->x.fpw = file;
00191 #else
00192     fp->file = file;
00193 #endif
00194     fp->uncompressed_block_size = DEFAULT_BLOCK_SIZE;
00195     fp->uncompressed_block = NULL;
00196     fp->compressed_block_size = MAX_BLOCK_SIZE;
00197     fp->compressed_block = malloc(MAX_BLOCK_SIZE);
00198     fp->block_address = 0;
00199     fp->block_offset = 0;
00200     fp->block_length = 0;
00201     fp->error = NULL;
00202     return fp;
00203 }
00204 
00205 BGZF*
00206 bgzf_open(const char* __restrict path, const char* __restrict mode)
00207 {
00208     BGZF* fp = NULL;
00209     if (strchr(mode, 'r') || strchr(mode, 'R')) { /* The reading mode is preferred. */
00210 #ifdef _USE_KNETFILE
00211         knetFile *file = knet_open(path, mode);
00212         if (file == 0) return 0;
00213         fp = bgzf_read_init();
00214         fp->file_descriptor = -1;
00215         fp->open_mode = 'r';
00216         fp->x.fpr = file;
00217 #else
00218         int fd, oflag = O_RDONLY;
00219 #ifdef _WIN32
00220         oflag |= O_BINARY;
00221 #endif
00222         fd = open(path, oflag);
00223         if (fd == -1) return 0;
00224         fp = open_read(fd);
00225 #endif
00226     } else if (strchr(mode, 'w') || strchr(mode, 'W')) {
00227         int fd, compress_level = -1, oflag = O_WRONLY | O_CREAT | O_TRUNC;
00228 #ifdef _WIN32
00229         oflag |= O_BINARY;
00230 #endif
00231         fd = open(path, oflag, 0666);
00232         if (fd == -1) return 0;
00233         { // set compress_level
00234             int i;
00235             for (i = 0; mode[i]; ++i)
00236                 if (mode[i] >= '0' && mode[i] <= '9') break;
00237             if (mode[i]) compress_level = (int)mode[i] - '0';
00238             if (strchr(mode, 'u')) compress_level = 0;
00239         }
00240         fp = open_write(fd, compress_level);
00241     }
00242     if (fp != NULL) fp->owned_file = 1;
00243     return fp;
00244 }
00245 
00246 BGZF*
00247 bgzf_fdopen(int fd, const char * __restrict mode)
00248 {
00249     if (fd == -1) return 0;
00250     if (mode[0] == 'r' || mode[0] == 'R') {
00251         return open_read(fd);
00252     } else if (mode[0] == 'w' || mode[0] == 'W') {
00253         int i, compress_level = -1;
00254         for (i = 0; mode[i]; ++i)
00255             if (mode[i] >= '0' && mode[i] <= '9') break;
00256         if (mode[i]) compress_level = (int)mode[i] - '0';
00257         if (strchr(mode, 'u')) compress_level = 0;
00258         return open_write(fd, compress_level);
00259     } else {
00260         return NULL;
00261     }
00262 }
00263 
00264 static
00265 int
00266 deflate_block(BGZF* fp, int block_length)
00267 {
00268     // Deflate the block in fp->uncompressed_block into fp->compressed_block.
00269     // Also adds an extra field that stores the compressed block length.
00270 
00271     bgzf_byte_t* buffer = fp->compressed_block;
00272     int buffer_size = fp->compressed_block_size;
00273 
00274     // Init gzip header
00275     buffer[0] = GZIP_ID1;
00276     buffer[1] = GZIP_ID2;
00277     buffer[2] = CM_DEFLATE;
00278     buffer[3] = FLG_FEXTRA;
00279     buffer[4] = 0; // mtime
00280     buffer[5] = 0;
00281     buffer[6] = 0;
00282     buffer[7] = 0;
00283     buffer[8] = 0;
00284     buffer[9] = OS_UNKNOWN;
00285     buffer[10] = BGZF_XLEN;
00286     buffer[11] = 0;
00287     buffer[12] = BGZF_ID1;
00288     buffer[13] = BGZF_ID2;
00289     buffer[14] = BGZF_LEN;
00290     buffer[15] = 0;
00291     buffer[16] = 0; // placeholder for block length
00292     buffer[17] = 0;
00293 
00294     // loop to retry for blocks that do not compress enough
00295     int input_length = block_length;
00296     int compressed_length = 0;
00297     while (1) {
00298         z_stream zs;
00299         zs.zalloc = NULL;
00300         zs.zfree = NULL;
00301         zs.next_in = fp->uncompressed_block;
00302         zs.avail_in = input_length;
00303         zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH];
00304         zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
00305 
00306         int status = deflateInit2(&zs, fp->compress_level, Z_DEFLATED,
00307                                   GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
00308         if (status != Z_OK) {
00309             report_error(fp, "deflate init failed");
00310             return -1;
00311         }
00312         status = deflate(&zs, Z_FINISH);
00313         if (status != Z_STREAM_END) {
00314             deflateEnd(&zs);
00315             if (status == Z_OK) {
00316                 // Not enough space in buffer.
00317                 // Can happen in the rare case the input doesn't compress enough.
00318                 // Reduce the amount of input until it fits.
00319                 input_length -= 1024;
00320                 if (input_length <= 0) {
00321                     // should never happen
00322                     report_error(fp, "input reduction failed");
00323                     return -1;
00324                 }
00325                 continue;
00326             }
00327             report_error(fp, "deflate failed");
00328             return -1;
00329         }
00330         status = deflateEnd(&zs);
00331         if (status != Z_OK) {
00332             report_error(fp, "deflate end failed");
00333             return -1;
00334         }
00335         compressed_length = zs.total_out;
00336         compressed_length += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
00337         if (compressed_length > MAX_BLOCK_SIZE) {
00338             // should never happen
00339             report_error(fp, "deflate overflow");
00340             return -1;
00341         }
00342         break;
00343     }
00344 
00345     packInt16((uint8_t*)&buffer[16], compressed_length-1);
00346     uint32_t crc = crc32(0L, NULL, 0L);
00347     crc = crc32(crc, fp->uncompressed_block, input_length);
00348     packInt32((uint8_t*)&buffer[compressed_length-8], crc);
00349     packInt32((uint8_t*)&buffer[compressed_length-4], input_length);
00350 
00351     int remaining = block_length - input_length;
00352     if (remaining > 0) {
00353         if (remaining > input_length) {
00354             // should never happen (check so we can use memcpy)
00355             report_error(fp, "remainder too large");
00356             return -1;
00357         }
00358         memcpy(fp->uncompressed_block,
00359                fp->uncompressed_block + input_length,
00360                remaining);
00361     }
00362     fp->block_offset = remaining;
00363     return compressed_length;
00364 }
00365 
00366 static
00367 int
00368 inflate_block(BGZF* fp, int block_length)
00369 {
00370     // Inflate the block in fp->compressed_block into fp->uncompressed_block
00371 
00372     z_stream zs;
00373     int status;
00374     zs.zalloc = NULL;
00375     zs.zfree = NULL;
00376     zs.next_in = fp->compressed_block + 18;
00377     zs.avail_in = block_length - 16;
00378     zs.next_out = fp->uncompressed_block;
00379     zs.avail_out = fp->uncompressed_block_size;
00380 
00381     status = inflateInit2(&zs, GZIP_WINDOW_BITS);
00382     if (status != Z_OK) {
00383         report_error(fp, "inflate init failed");
00384         return -1;
00385     }
00386     status = inflate(&zs, Z_FINISH);
00387     if (status != Z_STREAM_END) {
00388         inflateEnd(&zs);
00389         report_error(fp, "inflate failed");
00390         return -1;
00391     }
00392     status = inflateEnd(&zs);
00393     if (status != Z_OK) {
00394         report_error(fp, "inflate failed");
00395         return -1;
00396     }
00397     return zs.total_out;
00398 }
00399 
00400 static
00401 int
00402 check_header(const bgzf_byte_t* header)
00403 {
00404     return (header[0] == GZIP_ID1 &&
00405             header[1] == (bgzf_byte_t) GZIP_ID2 &&
00406             header[2] == Z_DEFLATED &&
00407             (header[3] & FLG_FEXTRA) != 0 &&
00408             unpackInt16((uint8_t*)&header[10]) == BGZF_XLEN &&
00409             header[12] == BGZF_ID1 &&
00410             header[13] == BGZF_ID2 &&
00411             unpackInt16((uint8_t*)&header[14]) == BGZF_LEN);
00412 }
00413 
00414 static void free_cache(BGZF *fp)
00415 {
00416     khint_t k;
00417     khash_t(cache) *h = (khash_t(cache)*)fp->cache;
00418     if (fp->open_mode != 'r') return;
00419     for (k = kh_begin(h); k < kh_end(h); ++k)
00420         if (kh_exist(h, k)) free(kh_val(h, k).block);
00421     kh_destroy(cache, h);
00422 }
00423 
00424 static int load_block_from_cache(BGZF *fp, int64_t block_address)
00425 {
00426     khint_t k;
00427     cache_t *p;
00428     khash_t(cache) *h = (khash_t(cache)*)fp->cache;
00429     k = kh_get(cache, h, block_address);
00430     if (k == kh_end(h)) return 0;
00431     p = &kh_val(h, k);
00432     if (fp->block_length != 0) fp->block_offset = 0;
00433     fp->block_address = block_address;
00434     fp->block_length = p->size;
00435     memcpy(fp->uncompressed_block, p->block, MAX_BLOCK_SIZE);
00436 #ifdef _USE_KNETFILE
00437     knet_seek(fp->x.fpr, p->end_offset, SEEK_SET);
00438 #else
00439     fseeko(fp->file, p->end_offset, SEEK_SET);
00440 #endif
00441     return p->size;
00442 }
00443 
00444 static void cache_block(BGZF *fp, int size)
00445 {
00446     int ret;
00447     khint_t k;
00448     cache_t *p;
00449     khash_t(cache) *h = (khash_t(cache)*)fp->cache;
00450     if (MAX_BLOCK_SIZE >= fp->cache_size) return;
00451     if ((kh_size(h) + 1) * MAX_BLOCK_SIZE > fp->cache_size) {
00452         /* A better way would be to remove the oldest block in the
00453          * cache, but here we remove a random one for simplicity. This
00454          * should not have a big impact on performance. */
00455         for (k = kh_begin(h); k < kh_end(h); ++k)
00456             if (kh_exist(h, k)) break;
00457         if (k < kh_end(h)) {
00458             free(kh_val(h, k).block);
00459             kh_del(cache, h, k);
00460         }
00461     }
00462     k = kh_put(cache, h, fp->block_address, &ret);
00463     if (ret == 0) return; // if this happens, a bug!
00464     p = &kh_val(h, k);
00465     p->size = fp->block_length;
00466     p->end_offset = fp->block_address + size;
00467     p->block = malloc(MAX_BLOCK_SIZE);
00468     memcpy(kh_val(h, k).block, fp->uncompressed_block, MAX_BLOCK_SIZE);
00469 }
00470 
00471 int
00472 bgzf_read_block(BGZF* fp)
00473 {
00474     bgzf_byte_t header[BLOCK_HEADER_LENGTH];
00475     int count, size = 0, block_length, remaining;
00476 #ifdef _USE_KNETFILE
00477     int64_t block_address = knet_tell(fp->x.fpr);
00478     if (load_block_from_cache(fp, block_address)) return 0;
00479     count = knet_read(fp->x.fpr, header, sizeof(header));
00480 #else
00481     int64_t block_address = ftello(fp->file);
00482     if (load_block_from_cache(fp, block_address)) return 0;
00483     count = fread(header, 1, sizeof(header), fp->file);
00484 #endif
00485     if (count == 0) {
00486         fp->block_length = 0;
00487         return 0;
00488     }
00489     size = count;
00490     if (count != sizeof(header)) {
00491         report_error(fp, "read failed");
00492         return -1;
00493     }
00494     if (!check_header(header)) {
00495         report_error(fp, "invalid block header");
00496         return -1;
00497     }
00498     block_length = unpackInt16((uint8_t*)&header[16]) + 1;
00499     bgzf_byte_t* compressed_block = (bgzf_byte_t*) fp->compressed_block;
00500     memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
00501     remaining = block_length - BLOCK_HEADER_LENGTH;
00502 #ifdef _USE_KNETFILE
00503     count = knet_read(fp->x.fpr, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
00504 #else
00505     count = fread(&compressed_block[BLOCK_HEADER_LENGTH], 1, remaining, fp->file);
00506 #endif
00507     if (count != remaining) {
00508         report_error(fp, "read failed");
00509         return -1;
00510     }
00511     size += count;
00512     count = inflate_block(fp, block_length);
00513     if (count < 0) return -1;
00514     if (fp->block_length != 0) {
00515         // Do not reset offset if this read follows a seek.
00516         fp->block_offset = 0;
00517     }
00518     fp->block_address = block_address;
00519     fp->block_length = count;
00520     cache_block(fp, size);
00521     return 0;
00522 }
00523 
00524 int
00525 bgzf_read(BGZF* fp, void* data, int length)
00526 {
00527     if (length <= 0) {
00528         return 0;
00529     }
00530     if (fp->open_mode != 'r') {
00531         report_error(fp, "file not open for reading");
00532         return -1;
00533     }
00534 
00535     int bytes_read = 0;
00536     bgzf_byte_t* output = data;
00537     while (bytes_read < length) {
00538         int copy_length, available = fp->block_length - fp->block_offset;
00539         bgzf_byte_t *buffer;
00540         if (available <= 0) {
00541             if (bgzf_read_block(fp) != 0) {
00542                 return -1;
00543             }
00544             available = fp->block_length - fp->block_offset;
00545             if (available <= 0) {
00546                 break;
00547             }
00548         }
00549         copy_length = bgzf_min(length-bytes_read, available);
00550         buffer = fp->uncompressed_block;
00551         memcpy(output, buffer + fp->block_offset, copy_length);
00552         fp->block_offset += copy_length;
00553         output += copy_length;
00554         bytes_read += copy_length;
00555     }
00556     if (fp->block_offset == fp->block_length) {
00557 #ifdef _USE_KNETFILE
00558         fp->block_address = knet_tell(fp->x.fpr);
00559 #else
00560         fp->block_address = ftello(fp->file);
00561 #endif
00562         fp->block_offset = 0;
00563         fp->block_length = 0;
00564     }
00565     return bytes_read;
00566 }
00567 
00568 int bgzf_flush(BGZF* fp)
00569 {
00570     while (fp->block_offset > 0) {
00571         int count, block_length;
00572         block_length = deflate_block(fp, fp->block_offset);
00573         if (block_length < 0) return -1;
00574 #ifdef _USE_KNETFILE
00575         count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
00576 #else
00577         count = fwrite(fp->compressed_block, 1, block_length, fp->file);
00578 #endif
00579         if (count != block_length) {
00580             report_error(fp, "write failed");
00581             return -1;
00582         }
00583         fp->block_address += block_length;
00584     }
00585     return 0;
00586 }
00587 
00588 int bgzf_flush_try(BGZF *fp, int size)
00589 {
00590     if (fp->block_offset + size > fp->uncompressed_block_size)
00591         return bgzf_flush(fp);
00592     return -1;
00593 }
00594 
00595 int bgzf_write(BGZF* fp, const void* data, int length)
00596 {
00597     const bgzf_byte_t *input = data;
00598     int block_length, bytes_written;
00599     if (fp->open_mode != 'w') {
00600         report_error(fp, "file not open for writing");
00601         return -1;
00602     }
00603 
00604     if (fp->uncompressed_block == NULL)
00605         fp->uncompressed_block = malloc(fp->uncompressed_block_size);
00606 
00607     input = data;
00608     block_length = fp->uncompressed_block_size;
00609     bytes_written = 0;
00610     while (bytes_written < length) {
00611         int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written);
00612         bgzf_byte_t* buffer = fp->uncompressed_block;
00613         memcpy(buffer + fp->block_offset, input, copy_length);
00614         fp->block_offset += copy_length;
00615         input += copy_length;
00616         bytes_written += copy_length;
00617         if (fp->block_offset == block_length) {
00618             if (bgzf_flush(fp) != 0) {
00619                 break;
00620             }
00621         }
00622     }
00623     return bytes_written;
00624 }
00625 
00626 int bgzf_close(BGZF* fp)
00627 {
00628     if (fp->open_mode == 'w') {
00629         if (bgzf_flush(fp) != 0) return -1;
00630         { // add an empty block
00631                     int count, block_length = deflate_block(fp, 0);
00632 #ifdef _USE_KNETFILE
00633             count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
00634 #else
00635             count = fwrite(fp->compressed_block, 1, block_length, fp->file);
00636 #endif
00637                         if(count != 0)
00638                         {// something was written
00639                         }
00640         }
00641 #ifdef _USE_KNETFILE
00642         if (fflush(fp->x.fpw) != 0) {
00643 #else
00644         if (fflush(fp->file) != 0) {
00645 #endif
00646             report_error(fp, "flush failed");
00647             return -1;
00648         }
00649     }
00650     if (fp->owned_file) {
00651 #ifdef _USE_KNETFILE
00652         int ret;
00653         if (fp->open_mode == 'w') ret = fclose(fp->x.fpw);
00654         else ret = knet_close(fp->x.fpr);
00655         if (ret != 0) return -1;
00656 #else
00657         if (fclose(fp->file) != 0) return -1;
00658 #endif
00659     }
00660     free(fp->uncompressed_block);
00661     free(fp->compressed_block);
00662     free_cache(fp);
00663     free(fp);
00664     return 0;
00665 }
00666 
00667 void bgzf_set_cache_size(BGZF *fp, int cache_size)
00668 {
00669     if (fp) fp->cache_size = cache_size;
00670 }
00671 
00672 int bgzf_check_EOF(BGZF *fp)
00673 {
00674     static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0";
00675         // Last 28 bytes of an uncompressed bgzf file which are different
00676         // from the last 28 bytes of compressed bgzf files.
00677     static uint8_t magic2[28] = "\4\0\0\0\0\0\377\6\0\102\103\2\0\036\0\1\0\0\377\377\0\0\0\0\0\0\0\0";
00678     uint8_t buf[28];
00679     off_t offset;
00680 #ifdef _USE_KNETFILE
00681     offset = knet_tell(fp->x.fpr);
00682     if (knet_seek(fp->x.fpr, -28, SEEK_END) != 0) return -1;
00683     knet_read(fp->x.fpr, buf, 28);
00684     knet_seek(fp->x.fpr, offset, SEEK_SET);
00685 #else
00686     offset = ftello(fp->file);
00687     if (fseeko(fp->file, -28, SEEK_END) != 0) return -1;
00688     int count = fread(buf, 1, 28, fp->file);
00689         if(count != 28)
00690         {
00691             report_error(fp, "bgzf_check_EOF failed");
00692         }
00693     fseeko(fp->file, offset, SEEK_SET);
00694 #endif
00695     if((memcmp(magic, buf, 28) == 0) || (memcmp(magic2, buf, 28) == 0))
00696         {
00697             return(1);
00698         }
00699         return(0);
00700 }
00701 
00702 int64_t bgzf_seek(BGZF* fp, int64_t pos, int where)
00703 {
00704     int block_offset;
00705     int64_t block_address;
00706 
00707     if (fp->open_mode != 'r') {
00708         report_error(fp, "file not open for read");
00709         return -1;
00710     }
00711     if (where != SEEK_SET) {
00712         report_error(fp, "unimplemented seek option");
00713         return -1;
00714     }
00715     block_offset = pos & 0xFFFF;
00716     block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;
00717 #ifdef _USE_KNETFILE
00718     if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) {
00719 #else
00720     if (fseeko(fp->file, block_address, SEEK_SET) != 0) {
00721 #endif
00722         report_error(fp, "seek failed");
00723         return -1;
00724     }
00725     fp->block_length = 0;  // indicates current block is not loaded
00726     fp->block_address = block_address;
00727     fp->block_offset = block_offset;
00728     return 0;
00729 }
00730 #endif
Generated on Mon Feb 11 13:45:19 2013 for libStatGen Software by  doxygen 1.6.3