libStatGen Software
1
|
Public Member Functions | |
virtual void | resetIndex () |
Reset the member data for a new index file. | |
SamStatus::Status | readIndex (const char *filename) |
bool | getChunksForRegion (int32_t refID, int32_t start, int32_t end, SortedChunkList &chunkList) |
Get the list of chunks associated with this region. | |
uint64_t | getMaxOffset () const |
bool | getReferenceMinMax (int32_t refID, uint64_t &minOffset, uint64_t &maxOffset) const |
Get the minimum and maximum file offsets for the specfied reference ID. | |
int32_t | getNumMappedReads (int32_t refID) |
Get the number of mapped reads for this reference id. | |
int32_t | getNumUnMappedReads (int32_t refID) |
Get the number of unmapped reads for this reference id. | |
void | printIndex (int32_t refID, bool summary=false) |
Print the index information. | |
Static Public Attributes | |
static const int32_t | UNKNOWN_NUM_READS = -1 |
The number used for an unknown number of reads. | |
static const int32_t | REF_ID_UNMAPPED = -1 |
The number used for the reference id of unmapped reads. | |
static const int32_t | REF_ID_ALL = -2 |
The number used to indicate that all reference ids should be used. |
Definition at line 31 of file BamIndex.h.
bool BamIndex::getChunksForRegion | ( | int32_t | refID, |
int32_t | start, | ||
int32_t | end, | ||
SortedChunkList & | chunkList | ||
) |
Get the list of chunks associated with this region.
For an entire reference ID, set start and end to -1. To start at the beginning of the region, set start to 0/-1. To go to the end of the region, set end to -1.
Definition at line 218 of file BamIndex.cpp.
References REF_ID_UNMAPPED.
{ chunkList.clear(); // If start is >= to end, there will be no sections, return no // regions. if((start >= end) && (end != -1)) { std::cerr << "Warning, requesting region where start <= end, so " << "no values will be returned.\n"; return(false); } // Handle REF_ID_UNMAPPED. This uses a default chunk which covers // from the max offset to the end of the file. if(refID == REF_ID_UNMAPPED) { Chunk refChunk; // The start of the unmapped region is the max offset found // in the index file. refChunk.chunk_beg = getMaxOffset(); // The end of the unmapped region is the end of the file, so // set chunk end to the max value. refChunk.chunk_end = Chunk::MAX_CHUNK_VALUE; return(chunkList.insert(refChunk)); } if((refID < 0) || (refID >= n_ref)) { // The specified refID is out of range, return false. std::cerr << "Warning, requesting refID is out of range, so " << "no values will be returned.\n"; return(false); } const Reference* ref = &(myRefs[refID]); // Handle where start/end are defaults. if(start == -1) { if(end == -1) { // This is whole chromosome, so take a shortcut. if(ref->maxChunkOffset == 0) { // No chunks for this region, but this is not an error. return(true); } Chunk refChunk; refChunk.chunk_beg = ref->minChunkOffset; refChunk.chunk_end = ref->maxChunkOffset; return(chunkList.insert(refChunk)); } else { start = 0; } } if(end == -1) { // MAX_POSITION is inclusive, but end is exclusive, so add 1. end = MAX_POSITION + 1; } // Determine the minimum offset for the given start position. This // is done by using the linear index for the specified start position. uint64_t minOffset = 0; getMinOffsetFromLinearIndex(refID, start, minOffset); uint16_t binInRangeList[MAX_NUM_BINS + 1]; int numBins = getBinsForRegion(start, end, binInRangeList); // loop through the bins in the range and get the chunks. for(int i = 0; i < numBins; ++i) { int binNum = binInRangeList[i]; const Bin* bin = &(ref->bins[binNum]); // Add each chunk in the bin to the map. for(int chunkIndex = 0; chunkIndex < bin->n_chunk; chunkIndex++) { // If the end of the chunk is less than the minimum offset // for the 16K block that starts our region, then no // records in this chunk will cross our region, so do // not add it to the chunks we need to use. if(bin->chunks[chunkIndex].chunk_end < minOffset) { continue; } // Add the chunk to the map. if(!chunkList.insert(bin->chunks[chunkIndex])) { // Failed to add to the map, return false. std::cerr << "Warning, Failed to add a chunk, so " << "no values will be returned.\n"; return(false); } } } // Now that all chunks have been added to the list, // handle overlapping chunks. return(chunkList.mergeOverlapping()); }
int32_t BamIndex::getNumMappedReads | ( | int32_t | refID | ) |
Get the number of mapped reads for this reference id.
Returns -1 for out of range refIDs.
refID | reference ID for which to extract the number of mapped reads. |
Definition at line 351 of file BamIndex.cpp.
References REF_ID_UNMAPPED.
Referenced by SamFile::getNumMappedReadsFromIndex().
{ // If it is the reference id of unmapped reads, return // that there are no mapped reads. if(refID == REF_ID_UNMAPPED) { // These are by definition all unmapped reads. return(0); } if((refID < 0) || (refID >= (int32_t)myRefs.size())) { // Reference ID is out of range for this index file. return(-1); } // Get this reference. return(myRefs[refID].n_mapped); }
int32_t BamIndex::getNumUnMappedReads | ( | int32_t | refID | ) |
Get the number of unmapped reads for this reference id.
Returns -1 for out of range refIDs.
refID | reference ID for which to extract the number of unmapped reads. |
Definition at line 373 of file BamIndex.cpp.
References REF_ID_UNMAPPED.
Referenced by SamFile::getNumUnMappedReadsFromIndex().
{ // If it is the reference id of unmapped reads, return // that value. if(refID == REF_ID_UNMAPPED) { return(myUnMappedNumReads); } if((refID < 0) || (refID >= (int32_t)myRefs.size())) { // Reference ID is out of range for this index file. return(-1); } // Get this reference. return(myRefs[refID].n_unmapped); }
bool BamIndex::getReferenceMinMax | ( | int32_t | refID, |
uint64_t & | minOffset, | ||
uint64_t & | maxOffset | ||
) | const |
Get the minimum and maximum file offsets for the specfied reference ID.
refID | the reference ID to locate in the file. |
minOffset | returns the min file offset for the specified reference |
maxOffset | returns the max file offset for the specified reference |
Definition at line 333 of file BamIndex.cpp.
{ if((refID < 0) || (refID >= (int32_t)myRefs.size())) { // Reference ID is out of range for this index file. return(false); } // Get this reference. minOffset = myRefs[refID].minChunkOffset; maxOffset = myRefs[refID].maxChunkOffset; return(true); }
void BamIndex::printIndex | ( | int32_t | refID, |
bool | summary = false |
||
) |
Print the index information.
refID | reference ID for which to print info for. -1 means print for all references. |
summary | whether or not to just print a summary (defaults to false). The summary just contains summary info for each reference and not every bin/chunk. |
Definition at line 394 of file BamIndex.cpp.
{ std::cout << "BAM Index: " << std::endl; std::cout << "# Reference Sequences: " << n_ref << std::endl; unsigned int startRef = 0; unsigned int endRef = myRefs.size() - 1; std::vector<Reference> refsToProcess; if(refID != -1) { // Set start and end ref to the specified reference id. startRef = refID; endRef = refID; } // Print out the information for each bin. for(unsigned int i = startRef; i <= endRef; ++i) { std::cout << std::dec << "\tReference ID: " << std::setw(4) << i << "; #Bins: "<< std::setw(6) << myRefs[i].n_bin << "; #Linear Index Entries: " << std::setw(6) << myRefs[i].n_intv << "; Min Chunk Offset: " << std::setw(18) << std::hex << std::showbase << myRefs[i].minChunkOffset << "; Max Chunk Offset: " << std::setw(18) << myRefs[i].maxChunkOffset << std::dec; // Print the mapped/unmapped if set. if(myRefs[i].n_mapped != Reference::UNKNOWN_MAP_INFO) { std::cout << "; " << myRefs[i].n_mapped << " Mapped Reads"; } if(myRefs[i].n_mapped != Reference::UNKNOWN_MAP_INFO) { std::cout << "; " << myRefs[i].n_unmapped << " Unmapped Reads"; } std::cout << std::endl; // Only print more details if not summary. if(!summary) { std::vector<Bin>::iterator binIter; for(binIter = myRefs[i].bins.begin(); binIter != myRefs[i].bins.end(); ++binIter) { Bin* binPtr = &(*binIter); if(binPtr->bin == Bin::NOT_USED_BIN) { // This bin is not used, continue. continue; } // Print the bin info. std::cout << "\t\t\tBin Name: " << binPtr->bin << std::endl; std::cout << "\t\t\t# Chunks: " << binPtr->n_chunk << std::endl; std::cout << std::hex << std::showbase; for(int chunkIndex = 0; chunkIndex < binPtr->n_chunk; ++chunkIndex) { // If this is the last chunk of the MAX_NUM_BINS - it // contains a mapped/unmapped count rather than the regular // chunk addresses. if((binPtr->bin != MAX_NUM_BINS) || (chunkIndex != (binPtr->n_chunk - 1))) { std::cout << "\t\t\t\tchunk_beg: " << binPtr->chunks[chunkIndex].chunk_beg << std::endl; std::cout << "\t\t\t\tchunk_end: " << binPtr->chunks[chunkIndex].chunk_end << std::endl; } } } std::cout << std::dec; // Print the linear index. for(int linearIndex = 0; linearIndex < myRefs[i].n_intv; ++linearIndex) { if(myRefs[i].ioffsets[linearIndex] != 0) { std::cout << "\t\t\tLinearIndex[" << std::dec << linearIndex << "] Offset: " << std::hex << myRefs[i].ioffsets[linearIndex] << std::endl; } } } } }
SamStatus::Status BamIndex::readIndex | ( | const char * | filename | ) | [virtual] |
filename | the bam index file to be read. |
Implements IndexBase.
Definition at line 45 of file BamIndex.cpp.
References StatGenStatus::FAIL_IO, StatGenStatus::FAIL_PARSE, ifclose(), ifopen(), ifread(), resetIndex(), and StatGenStatus::SUCCESS.
Referenced by SamFile::ReadBamIndex().
{ // Reset the index from anything that may previously be set. resetIndex(); IFILE indexFile = ifopen(filename, "rb"); // Failed to open the index file. if(indexFile == NULL) { return(SamStatus::FAIL_IO); } // generate the bam index structure. // Read the magic string. char magic[4]; if(ifread(indexFile, magic, 4) != 4) { // Failed to read the magic ifclose(indexFile); return(SamStatus::FAIL_IO); } // If this is not an index file, set num references to 0. if (magic[0] != 'B' || magic[1] != 'A' || magic[2] != 'I' || magic[3] != 1) { // Not a BAM Index file. ifclose(indexFile); return(SamStatus::FAIL_PARSE); } // It is a bam index file. // Read the number of reference sequences. if(ifread(indexFile, &n_ref, 4) != 4) { // Failed to read. ifclose(indexFile); return(SamStatus::FAIL_IO); } // Size the references. myRefs.resize(n_ref); for(int refIndex = 0; refIndex < n_ref; refIndex++) { // Read each reference. Reference* ref = &(myRefs[refIndex]); // Resize the bins so they can be indexed by bin number. ref->bins.resize(MAX_NUM_BINS + 1); // Read the number of bins. if(ifread(indexFile, &(ref->n_bin), 4) != 4) { // Failed to read the number of bins. // Return failure. ifclose(indexFile); return(SamStatus::FAIL_PARSE); } // If there are no bins, then there are no // mapped/unmapped reads. if(ref->n_bin == 0) { ref->n_mapped = 0; ref->n_unmapped = 0; } // Read each bin. for(int binIndex = 0; binIndex < ref->n_bin; binIndex++) { uint32_t binNumber; // Read in the bin number. if(ifread(indexFile, &(binNumber), 4) != 4) { // Failed to read the bin number. // Return failure. ifclose(indexFile); return(SamStatus::FAIL_IO); } // Add the bin to the reference and get the // pointer back so the values can be set in it. Bin* binPtr = &(ref->bins[binNumber]); binPtr->bin = binNumber; // Read in the number of chunks. if(ifread(indexFile, &(binPtr->n_chunk), 4) != 4) { // Failed to read number of chunks. // Return failure. ifclose(indexFile); return(SamStatus::FAIL_IO); } // Read in the chunks. // Allocate space for the chunks. uint32_t sizeOfChunkList = binPtr->n_chunk * sizeof(Chunk); binPtr->chunks = (Chunk*)malloc(sizeOfChunkList); if(ifread(indexFile, binPtr->chunks, sizeOfChunkList) != sizeOfChunkList) { // Failed to read the chunks. // Return failure. ifclose(indexFile); return(SamStatus::FAIL_IO); } // Determine the min/max for this bin if it is not the max bin. if(binPtr->bin != MAX_NUM_BINS) { for(int i = 0; i < binPtr->n_chunk; i++) { if(binPtr->chunks[i].chunk_beg < ref->minChunkOffset) { ref->minChunkOffset = binPtr->chunks[i].chunk_beg; } if(binPtr->chunks[i].chunk_end > ref->maxChunkOffset) { ref->maxChunkOffset = binPtr->chunks[i].chunk_end; } if(binPtr->chunks[i].chunk_end > maxOverallOffset) { maxOverallOffset = binPtr->chunks[i].chunk_end; } } } else { // Mapped/unmapped are the last chunk of the // MAX BIN ref->n_mapped = binPtr->chunks[binPtr->n_chunk - 1].chunk_beg; ref->n_unmapped = binPtr->chunks[binPtr->n_chunk - 1].chunk_end; } } // Read the number of intervals. if(ifread(indexFile, &(ref->n_intv), 4) != 4) { // Failed to read, set to 0. ref->n_intv = 0; // Return failure. ifclose(indexFile); return(SamStatus::FAIL_IO); } // Allocate space for the intervals and read them. uint32_t linearIndexSize = ref->n_intv * sizeof(uint64_t); ref->ioffsets = (uint64_t*)malloc(linearIndexSize); if(ifread(indexFile, ref->ioffsets, linearIndexSize) != linearIndexSize) { // Failed to read the linear index. // Return failure. ifclose(indexFile); return(SamStatus::FAIL_IO); } } int32_t numUnmapped = 0; if(ifread(indexFile, &numUnmapped, sizeof(int32_t)) == sizeof(int32_t)) { myUnMappedNumReads = numUnmapped; } // Successfully read the bam index file. ifclose(indexFile); return(SamStatus::SUCCESS); }