libStatGen Software
1
|
Classes | |
struct | TabixFormat |
Public Types | |
enum | Format { FORMAT_GENERIC = 0, FORMAT_SAM = 1, FORMAT_VCF = 2 } |
Public Member Functions | |
void | resetIndex () |
Reset the member data for a new index file. | |
StatGenStatus::Status | readIndex (const char *filename) |
bool | getStartPos (const char *refName, int32_t start, uint64_t &fileStartPos) const |
Get the starting file offset to look for the specified start position. | |
const char * | getRefName (unsigned int indexNum) const |
Return the reference name at the specified index or throws an exception if out of range. | |
int32_t | getFormat () const |
const char * Tabix::getRefName | ( | unsigned int | indexNum | ) | const |
Return the reference name at the specified index or throws an exception if out of range.
Definition at line 247 of file Tabix.cpp.
{ if(indexNum >= myChromNamesVector.size()) { String message = "ERROR: Out of range on Tabix::getRefName("; message += indexNum; message += ")"; throw(std::runtime_error(message.c_str())); return(NULL); } return(myChromNamesVector[indexNum]); }
bool Tabix::getStartPos | ( | const char * | refName, |
int32_t | start, | ||
uint64_t & | fileStartPos | ||
) | const |
Get the starting file offset to look for the specified start position.
For an entire reference ID, set start to -1. To start at the beginning of the region, set start to 0/-1.
Definition at line 218 of file Tabix.cpp.
{ // Look for the reference name in the list. int refID = 0; for(refID = 0; refID < n_ref; refID++) { if(strcmp(refName, myChromNamesVector[refID]) == 0) { // found the reference break; } } if(refID >= n_ref) { // Didn't find the refName, so return false. return(false); } // Look up in the linear index. if(start < 0) { // Negative index, so start at 0. start = 0; } return(getMinOffsetFromLinearIndex(refID, start, fileStartPos)); }
StatGenStatus::Status Tabix::readIndex | ( | const char * | filename | ) | [virtual] |
filename | the bam index file to be read. |
Implements IndexBase.
Definition at line 52 of file Tabix.cpp.
References StatGenStatus::FAIL_IO, StatGenStatus::FAIL_PARSE, ifopen(), ifread(), resetIndex(), and StatGenStatus::SUCCESS.
{ // Reset the index from anything that may previously be set. resetIndex(); IFILE indexFile = ifopen(filename, "rb"); // Failed to open the index file. if(indexFile == NULL) { return(StatGenStatus::FAIL_IO); } // read the tabix index structure. // Read the magic string. char magic[4]; if(ifread(indexFile, magic, 4) != 4) { // Failed to read the magic return(StatGenStatus::FAIL_IO); } // If this is not an index file, set num references to 0. if (magic[0] != 'T' || magic[1] != 'B' || magic[2] != 'I' || magic[3] != 1) { // Not a Tabix Index file. return(StatGenStatus::FAIL_PARSE); } // It is a tabix index file. // Read the number of reference sequences. if(ifread(indexFile, &n_ref, 4) != 4) { // Failed to read. return(StatGenStatus::FAIL_IO); } // Size the references. myRefs.resize(n_ref); // Read the Format configuration. if(ifread(indexFile, &myFormat, sizeof(myFormat)) != sizeof(myFormat)) { // Failed to read. return(StatGenStatus::FAIL_IO); } // Read the length of the chromosome names. uint32_t l_nm; if(ifread(indexFile, &l_nm, sizeof(l_nm)) != sizeof(l_nm)) { // Failed to read. return(StatGenStatus::FAIL_IO); } // Read the chromosome names. myChromNamesBuffer = new char[l_nm]; if(ifread(indexFile, myChromNamesBuffer, l_nm) != l_nm) { return(StatGenStatus::FAIL_IO); } myChromNamesVector.resize(n_ref); // Parse out the chromosome names. bool prevNull = true; int chromIndex = 0; for(uint32_t i = 0; i < l_nm; i++) { if(chromIndex >= n_ref) { // already set the pointer for the last chromosome name, // so stop looping. break; } if(prevNull == true) { myChromNamesVector[chromIndex++] = myChromNamesBuffer + i; prevNull = false; } if(myChromNamesBuffer[i] == '\0') { prevNull = true; } } for(int refIndex = 0; refIndex < n_ref; refIndex++) { // Read each reference. Reference* ref = &(myRefs[refIndex]); // Resize the bins so they can be indexed by bin number. ref->bins.resize(MAX_NUM_BINS + 1); // Read the number of bins. if(ifread(indexFile, &(ref->n_bin), 4) != 4) { // Failed to read the number of bins. // Return failure. return(StatGenStatus::FAIL_PARSE); } // Read each bin. for(int binIndex = 0; binIndex < ref->n_bin; binIndex++) { uint32_t binNumber; // Read in the bin number. if(ifread(indexFile, &(binNumber), 4) != 4) { // Failed to read the bin number. // Return failure. return(StatGenStatus::FAIL_IO); } // Add the bin to the reference and get the // pointer back so the values can be set in it. Bin* binPtr = &(ref->bins[binNumber]); binPtr->bin = binNumber; // Read in the number of chunks. if(ifread(indexFile, &(binPtr->n_chunk), 4) != 4) { // Failed to read number of chunks. // Return failure. return(StatGenStatus::FAIL_IO); } // Read in the chunks. // Allocate space for the chunks. uint32_t sizeOfChunkList = binPtr->n_chunk * sizeof(Chunk); binPtr->chunks = (Chunk*)malloc(sizeOfChunkList); if(ifread(indexFile, binPtr->chunks, sizeOfChunkList) != sizeOfChunkList) { // Failed to read the chunks. // Return failure. return(StatGenStatus::FAIL_IO); } } // Read the number of intervals. if(ifread(indexFile, &(ref->n_intv), 4) != 4) { // Failed to read, set to 0. ref->n_intv = 0; // Return failure. return(StatGenStatus::FAIL_IO); } // Allocate space for the intervals and read them. uint32_t linearIndexSize = ref->n_intv * sizeof(uint64_t); ref->ioffsets = (uint64_t*)malloc(linearIndexSize); if(ifread(indexFile, ref->ioffsets, linearIndexSize) != linearIndexSize) { // Failed to read the linear index. // Return failure. return(StatGenStatus::FAIL_IO); } } // Successfully read teh bam index file. return(StatGenStatus::SUCCESS); }