libStatGen Software  1
Tabix Class Reference
Inheritance diagram for Tabix:
Collaboration diagram for Tabix:

List of all members.

Classes

struct  TabixFormat

Public Types

enum  Format { FORMAT_GENERIC = 0, FORMAT_SAM = 1, FORMAT_VCF = 2 }

Public Member Functions

void resetIndex ()
 Reset the member data for a new index file.
StatGenStatus::Status readIndex (const char *filename)
bool getStartPos (const char *refName, int32_t start, uint64_t &fileStartPos) const
 Get the starting file offset to look for the specified start position.
const char * getRefName (unsigned int indexNum) const
 Return the reference name at the specified index or throws an exception if out of range.
int32_t getFormat () const

Detailed Description

Definition at line 31 of file Tabix.h.


Member Function Documentation

const char * Tabix::getRefName ( unsigned int  indexNum) const

Return the reference name at the specified index or throws an exception if out of range.

Definition at line 247 of file Tabix.cpp.

{
    if(indexNum >= myChromNamesVector.size())
    {
        String message = "ERROR: Out of range on Tabix::getRefName(";
        message += indexNum;
        message += ")";
        throw(std::runtime_error(message.c_str()));
        return(NULL);
    }
    return(myChromNamesVector[indexNum]);
}
bool Tabix::getStartPos ( const char *  refName,
int32_t  start,
uint64_t &  fileStartPos 
) const

Get the starting file offset to look for the specified start position.

For an entire reference ID, set start to -1. To start at the beginning of the region, set start to 0/-1.

Definition at line 218 of file Tabix.cpp.

{
    // Look for the reference name in the list.
    int refID = 0;
    for(refID = 0; refID < n_ref; refID++)
    {
        if(strcmp(refName, myChromNamesVector[refID]) == 0)
        {
            // found the reference
            break;
        }
    }
    if(refID >= n_ref)
    {
        // Didn't find the refName, so return false.
        return(false);
    }

    // Look up in the linear index.
    if(start < 0)
    {
        // Negative index, so start at 0.
        start = 0;
    }
    return(getMinOffsetFromLinearIndex(refID, start, fileStartPos));
}
StatGenStatus::Status Tabix::readIndex ( const char *  filename) [virtual]
Parameters:
filenamethe bam index file to be read.
Returns:
the status of the read.

Implements IndexBase.

Definition at line 52 of file Tabix.cpp.

References StatGenStatus::FAIL_IO, StatGenStatus::FAIL_PARSE, ifopen(), ifread(), resetIndex(), and StatGenStatus::SUCCESS.

{
    // Reset the index from anything that may previously be set.
    resetIndex();

    IFILE indexFile = ifopen(filename, "rb");

    // Failed to open the index file.
    if(indexFile == NULL)
    {
        return(StatGenStatus::FAIL_IO);
    }

    // read the tabix index structure.

    // Read the magic string.
    char magic[4];
    if(ifread(indexFile, magic, 4) != 4)
    {
        // Failed to read the magic
        return(StatGenStatus::FAIL_IO);
    }

    // If this is not an index file, set num references to 0. 
    if (magic[0] != 'T' || magic[1] != 'B' || magic[2] != 'I' || magic[3] != 1)
    {
        // Not a Tabix Index file.
        return(StatGenStatus::FAIL_PARSE);
    }

    // It is a tabix index file.
    // Read the number of reference sequences.
    if(ifread(indexFile, &n_ref, 4) != 4)
    {
        // Failed to read.
        return(StatGenStatus::FAIL_IO);
    }

    // Size the references.
    myRefs.resize(n_ref);

    // Read the Format configuration.
    if(ifread(indexFile, &myFormat, sizeof(myFormat)) != sizeof(myFormat))
    {
        // Failed to read.
        return(StatGenStatus::FAIL_IO);
    }

    // Read the length of the chromosome names.
    uint32_t l_nm;

    if(ifread(indexFile, &l_nm, sizeof(l_nm)) != sizeof(l_nm))
    {
        // Failed to read.
        return(StatGenStatus::FAIL_IO);
    }

    // Read the chromosome names.
    myChromNamesBuffer = new char[l_nm];
    if(ifread(indexFile, myChromNamesBuffer, l_nm) != l_nm)
    {
        return(StatGenStatus::FAIL_IO);
    }
    myChromNamesVector.resize(n_ref);

    // Parse out the chromosome names.
    bool prevNull = true;
    int chromIndex = 0;
    for(uint32_t i = 0; i < l_nm; i++)
    {
        if(chromIndex >= n_ref)
        {
            // already set the pointer for the last chromosome name, 
            // so stop looping.
            break;
        }
        if(prevNull == true)
        {
            myChromNamesVector[chromIndex++] = myChromNamesBuffer + i;
            prevNull = false;
        }
        if(myChromNamesBuffer[i] == '\0')
        {
            prevNull = true;
        }
    }

    for(int refIndex = 0; refIndex < n_ref; refIndex++)
    {
        // Read each reference.
        Reference* ref = &(myRefs[refIndex]);
        
        // Resize the bins so they can be indexed by bin number.
        ref->bins.resize(MAX_NUM_BINS + 1);
        
        // Read the number of bins.
        if(ifread(indexFile, &(ref->n_bin), 4) != 4)
        {
            // Failed to read the number of bins.
            // Return failure.
            return(StatGenStatus::FAIL_PARSE);
        }

        // Read each bin.
        for(int binIndex = 0; binIndex < ref->n_bin; binIndex++)
        {
            uint32_t binNumber;

            // Read in the bin number.
            if(ifread(indexFile, &(binNumber), 4) != 4)
            {
                // Failed to read the bin number.
                // Return failure.
                return(StatGenStatus::FAIL_IO);
            }

            // Add the bin to the reference and get the
            // pointer back so the values can be set in it.
            Bin* binPtr = &(ref->bins[binNumber]);
            binPtr->bin = binNumber;
         
            // Read in the number of chunks.
            if(ifread(indexFile, &(binPtr->n_chunk), 4) != 4)
            {
                // Failed to read number of chunks.
                // Return failure.
                return(StatGenStatus::FAIL_IO);
            }

            // Read in the chunks.
            // Allocate space for the chunks.
            uint32_t sizeOfChunkList = binPtr->n_chunk * sizeof(Chunk);
            binPtr->chunks = (Chunk*)malloc(sizeOfChunkList);
            if(ifread(indexFile, binPtr->chunks, sizeOfChunkList) != sizeOfChunkList)
            {
                // Failed to read the chunks.
                // Return failure.
                return(StatGenStatus::FAIL_IO);
            }
        }

        // Read the number of intervals.
        if(ifread(indexFile, &(ref->n_intv), 4) != 4)
        {
            // Failed to read, set to 0.
            ref->n_intv = 0;
            // Return failure.
            return(StatGenStatus::FAIL_IO);
        }

        // Allocate space for the intervals and read them.
        uint32_t linearIndexSize = ref->n_intv * sizeof(uint64_t);
        ref->ioffsets = (uint64_t*)malloc(linearIndexSize);
        if(ifread(indexFile, ref->ioffsets, linearIndexSize) != linearIndexSize)
        {
            // Failed to read the linear index.
            // Return failure.
            return(StatGenStatus::FAIL_IO);
        }
    }

    // Successfully read teh bam index file.
    return(StatGenStatus::SUCCESS);
}

The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends