|
libStatGen Software
1
|


Public Member Functions | |
| virtual void | resetIndex () |
| Reset the member data for a new index file. | |
| SamStatus::Status | readIndex (const char *filename) |
| bool | getChunksForRegion (int32_t refID, int32_t start, int32_t end, SortedChunkList &chunkList) |
| Get the list of chunks associated with this region. | |
| uint64_t | getMaxOffset () const |
| bool | getReferenceMinMax (int32_t refID, uint64_t &minOffset, uint64_t &maxOffset) const |
| Get the minimum and maximum file offsets for the specfied reference ID. | |
| int32_t | getNumMappedReads (int32_t refID) |
| Get the number of mapped reads for this reference id. | |
| int32_t | getNumUnMappedReads (int32_t refID) |
| Get the number of unmapped reads for this reference id. | |
| void | printIndex (int32_t refID, bool summary=false) |
| Print the index information. | |
Static Public Attributes | |
| static const int32_t | UNKNOWN_NUM_READS = -1 |
| The number used for an unknown number of reads. | |
| static const int32_t | REF_ID_UNMAPPED = -1 |
| The number used for the reference id of unmapped reads. | |
| static const int32_t | REF_ID_ALL = -2 |
| The number used to indicate that all reference ids should be used. | |
Definition at line 31 of file BamIndex.h.
| bool BamIndex::getChunksForRegion | ( | int32_t | refID, |
| int32_t | start, | ||
| int32_t | end, | ||
| SortedChunkList & | chunkList | ||
| ) |
Get the list of chunks associated with this region.
For an entire reference ID, set start and end to -1. To start at the beginning of the region, set start to 0/-1. To go to the end of the region, set end to -1.
Definition at line 218 of file BamIndex.cpp.
References REF_ID_UNMAPPED.
{
chunkList.clear();
// If start is >= to end, there will be no sections, return no
// regions.
if((start >= end) && (end != -1))
{
std::cerr << "Warning, requesting region where start <= end, so "
<< "no values will be returned.\n";
return(false);
}
// Handle REF_ID_UNMAPPED. This uses a default chunk which covers
// from the max offset to the end of the file.
if(refID == REF_ID_UNMAPPED)
{
Chunk refChunk;
// The start of the unmapped region is the max offset found
// in the index file.
refChunk.chunk_beg = getMaxOffset();
// The end of the unmapped region is the end of the file, so
// set chunk end to the max value.
refChunk.chunk_end = Chunk::MAX_CHUNK_VALUE;
return(chunkList.insert(refChunk));
}
if((refID < 0) || (refID >= n_ref))
{
// The specified refID is out of range, return false.
std::cerr << "Warning, requesting refID is out of range, so "
<< "no values will be returned.\n";
return(false);
}
const Reference* ref = &(myRefs[refID]);
// Handle where start/end are defaults.
if(start == -1)
{
if(end == -1)
{
// This is whole chromosome, so take a shortcut.
if(ref->maxChunkOffset == 0)
{
// No chunks for this region, but this is not an error.
return(true);
}
Chunk refChunk;
refChunk.chunk_beg = ref->minChunkOffset;
refChunk.chunk_end = ref->maxChunkOffset;
return(chunkList.insert(refChunk));
}
else
{
start = 0;
}
}
if(end == -1)
{
// MAX_POSITION is inclusive, but end is exclusive, so add 1.
end = MAX_POSITION + 1;
}
// Determine the minimum offset for the given start position. This
// is done by using the linear index for the specified start position.
uint64_t minOffset = 0;
getMinOffsetFromLinearIndex(refID, start, minOffset);
uint16_t binInRangeList[MAX_NUM_BINS + 1];
int numBins = getBinsForRegion(start, end, binInRangeList);
// loop through the bins in the range and get the chunks.
for(int i = 0; i < numBins; ++i)
{
int binNum = binInRangeList[i];
const Bin* bin = &(ref->bins[binNum]);
// Add each chunk in the bin to the map.
for(int chunkIndex = 0; chunkIndex < bin->n_chunk; chunkIndex++)
{
// If the end of the chunk is less than the minimum offset
// for the 16K block that starts our region, then no
// records in this chunk will cross our region, so do
// not add it to the chunks we need to use.
if(bin->chunks[chunkIndex].chunk_end < minOffset)
{
continue;
}
// Add the chunk to the map.
if(!chunkList.insert(bin->chunks[chunkIndex]))
{
// Failed to add to the map, return false.
std::cerr << "Warning, Failed to add a chunk, so "
<< "no values will be returned.\n";
return(false);
}
}
}
// Now that all chunks have been added to the list,
// handle overlapping chunks.
return(chunkList.mergeOverlapping());
}
| int32_t BamIndex::getNumMappedReads | ( | int32_t | refID | ) |
Get the number of mapped reads for this reference id.
Returns -1 for out of range refIDs.
| refID | reference ID for which to extract the number of mapped reads. |
Definition at line 351 of file BamIndex.cpp.
References REF_ID_UNMAPPED.
Referenced by SamFile::getNumMappedReadsFromIndex().
{
// If it is the reference id of unmapped reads, return
// that there are no mapped reads.
if(refID == REF_ID_UNMAPPED)
{
// These are by definition all unmapped reads.
return(0);
}
if((refID < 0) || (refID >= (int32_t)myRefs.size()))
{
// Reference ID is out of range for this index file.
return(-1);
}
// Get this reference.
return(myRefs[refID].n_mapped);
}
| int32_t BamIndex::getNumUnMappedReads | ( | int32_t | refID | ) |
Get the number of unmapped reads for this reference id.
Returns -1 for out of range refIDs.
| refID | reference ID for which to extract the number of unmapped reads. |
Definition at line 373 of file BamIndex.cpp.
References REF_ID_UNMAPPED.
Referenced by SamFile::getNumUnMappedReadsFromIndex().
{
// If it is the reference id of unmapped reads, return
// that value.
if(refID == REF_ID_UNMAPPED)
{
return(myUnMappedNumReads);
}
if((refID < 0) || (refID >= (int32_t)myRefs.size()))
{
// Reference ID is out of range for this index file.
return(-1);
}
// Get this reference.
return(myRefs[refID].n_unmapped);
}
| bool BamIndex::getReferenceMinMax | ( | int32_t | refID, |
| uint64_t & | minOffset, | ||
| uint64_t & | maxOffset | ||
| ) | const |
Get the minimum and maximum file offsets for the specfied reference ID.
| refID | the reference ID to locate in the file. |
| minOffset | returns the min file offset for the specified reference |
| maxOffset | returns the max file offset for the specified reference |
Definition at line 333 of file BamIndex.cpp.
{
if((refID < 0) || (refID >= (int32_t)myRefs.size()))
{
// Reference ID is out of range for this index file.
return(false);
}
// Get this reference.
minOffset = myRefs[refID].minChunkOffset;
maxOffset = myRefs[refID].maxChunkOffset;
return(true);
}
| void BamIndex::printIndex | ( | int32_t | refID, |
| bool | summary = false |
||
| ) |
Print the index information.
| refID | reference ID for which to print info for. -1 means print for all references. |
| summary | whether or not to just print a summary (defaults to false). The summary just contains summary info for each reference and not every bin/chunk. |
Definition at line 394 of file BamIndex.cpp.
{
std::cout << "BAM Index: " << std::endl;
std::cout << "# Reference Sequences: " << n_ref << std::endl;
unsigned int startRef = 0;
unsigned int endRef = myRefs.size() - 1;
std::vector<Reference> refsToProcess;
if(refID != -1)
{
// Set start and end ref to the specified reference id.
startRef = refID;
endRef = refID;
}
// Print out the information for each bin.
for(unsigned int i = startRef; i <= endRef; ++i)
{
std::cout << std::dec
<< "\tReference ID: " << std::setw(4) << i
<< "; #Bins: "<< std::setw(6) << myRefs[i].n_bin
<< "; #Linear Index Entries: "
<< std::setw(6) << myRefs[i].n_intv
<< "; Min Chunk Offset: "
<< std::setw(18) << std::hex << std::showbase << myRefs[i].minChunkOffset
<< "; Max Chunk Offset: "
<< std::setw(18) << myRefs[i].maxChunkOffset
<< std::dec;
// Print the mapped/unmapped if set.
if(myRefs[i].n_mapped != Reference::UNKNOWN_MAP_INFO)
{
std::cout << "; " << myRefs[i].n_mapped << " Mapped Reads";
}
if(myRefs[i].n_mapped != Reference::UNKNOWN_MAP_INFO)
{
std::cout << "; " << myRefs[i].n_unmapped << " Unmapped Reads";
}
std::cout << std::endl;
// Only print more details if not summary.
if(!summary)
{
std::vector<Bin>::iterator binIter;
for(binIter = myRefs[i].bins.begin();
binIter != myRefs[i].bins.end();
++binIter)
{
Bin* binPtr = &(*binIter);
if(binPtr->bin == Bin::NOT_USED_BIN)
{
// This bin is not used, continue.
continue;
}
// Print the bin info.
std::cout << "\t\t\tBin Name: " << binPtr->bin << std::endl;
std::cout << "\t\t\t# Chunks: " << binPtr->n_chunk << std::endl;
std::cout << std::hex << std::showbase;
for(int chunkIndex = 0; chunkIndex < binPtr->n_chunk;
++chunkIndex)
{
// If this is the last chunk of the MAX_NUM_BINS - it
// contains a mapped/unmapped count rather than the regular
// chunk addresses.
if((binPtr->bin != MAX_NUM_BINS) ||
(chunkIndex != (binPtr->n_chunk - 1)))
{
std::cout << "\t\t\t\tchunk_beg: "
<< binPtr->chunks[chunkIndex].chunk_beg
<< std::endl;
std::cout << "\t\t\t\tchunk_end: "
<< binPtr->chunks[chunkIndex].chunk_end
<< std::endl;
}
}
}
std::cout << std::dec;
// Print the linear index.
for(int linearIndex = 0; linearIndex < myRefs[i].n_intv;
++linearIndex)
{
if(myRefs[i].ioffsets[linearIndex] != 0)
{
std::cout << "\t\t\tLinearIndex["
<< std::dec << linearIndex << "] Offset: "
<< std::hex << myRefs[i].ioffsets[linearIndex]
<< std::endl;
}
}
}
}
}
| SamStatus::Status BamIndex::readIndex | ( | const char * | filename | ) | [virtual] |
| filename | the bam index file to be read. |
Implements IndexBase.
Definition at line 45 of file BamIndex.cpp.
References StatGenStatus::FAIL_IO, StatGenStatus::FAIL_PARSE, ifclose(), ifopen(), ifread(), resetIndex(), and StatGenStatus::SUCCESS.
Referenced by SamFile::ReadBamIndex().
{
// Reset the index from anything that may previously be set.
resetIndex();
IFILE indexFile = ifopen(filename, "rb");
// Failed to open the index file.
if(indexFile == NULL)
{
return(SamStatus::FAIL_IO);
}
// generate the bam index structure.
// Read the magic string.
char magic[4];
if(ifread(indexFile, magic, 4) != 4)
{
// Failed to read the magic
ifclose(indexFile);
return(SamStatus::FAIL_IO);
}
// If this is not an index file, set num references to 0.
if (magic[0] != 'B' || magic[1] != 'A' || magic[2] != 'I' || magic[3] != 1)
{
// Not a BAM Index file.
ifclose(indexFile);
return(SamStatus::FAIL_PARSE);
}
// It is a bam index file.
// Read the number of reference sequences.
if(ifread(indexFile, &n_ref, 4) != 4)
{
// Failed to read.
ifclose(indexFile);
return(SamStatus::FAIL_IO);
}
// Size the references.
myRefs.resize(n_ref);
for(int refIndex = 0; refIndex < n_ref; refIndex++)
{
// Read each reference.
Reference* ref = &(myRefs[refIndex]);
// Resize the bins so they can be indexed by bin number.
ref->bins.resize(MAX_NUM_BINS + 1);
// Read the number of bins.
if(ifread(indexFile, &(ref->n_bin), 4) != 4)
{
// Failed to read the number of bins.
// Return failure.
ifclose(indexFile);
return(SamStatus::FAIL_PARSE);
}
// If there are no bins, then there are no
// mapped/unmapped reads.
if(ref->n_bin == 0)
{
ref->n_mapped = 0;
ref->n_unmapped = 0;
}
// Read each bin.
for(int binIndex = 0; binIndex < ref->n_bin; binIndex++)
{
uint32_t binNumber;
// Read in the bin number.
if(ifread(indexFile, &(binNumber), 4) != 4)
{
// Failed to read the bin number.
// Return failure.
ifclose(indexFile);
return(SamStatus::FAIL_IO);
}
// Add the bin to the reference and get the
// pointer back so the values can be set in it.
Bin* binPtr = &(ref->bins[binNumber]);
binPtr->bin = binNumber;
// Read in the number of chunks.
if(ifread(indexFile, &(binPtr->n_chunk), 4) != 4)
{
// Failed to read number of chunks.
// Return failure.
ifclose(indexFile);
return(SamStatus::FAIL_IO);
}
// Read in the chunks.
// Allocate space for the chunks.
uint32_t sizeOfChunkList = binPtr->n_chunk * sizeof(Chunk);
binPtr->chunks = (Chunk*)malloc(sizeOfChunkList);
if(ifread(indexFile, binPtr->chunks, sizeOfChunkList) != sizeOfChunkList)
{
// Failed to read the chunks.
// Return failure.
ifclose(indexFile);
return(SamStatus::FAIL_IO);
}
// Determine the min/max for this bin if it is not the max bin.
if(binPtr->bin != MAX_NUM_BINS)
{
for(int i = 0; i < binPtr->n_chunk; i++)
{
if(binPtr->chunks[i].chunk_beg < ref->minChunkOffset)
{
ref->minChunkOffset = binPtr->chunks[i].chunk_beg;
}
if(binPtr->chunks[i].chunk_end > ref->maxChunkOffset)
{
ref->maxChunkOffset = binPtr->chunks[i].chunk_end;
}
if(binPtr->chunks[i].chunk_end > maxOverallOffset)
{
maxOverallOffset = binPtr->chunks[i].chunk_end;
}
}
}
else
{
// Mapped/unmapped are the last chunk of the
// MAX BIN
ref->n_mapped = binPtr->chunks[binPtr->n_chunk - 1].chunk_beg;
ref->n_unmapped = binPtr->chunks[binPtr->n_chunk - 1].chunk_end;
}
}
// Read the number of intervals.
if(ifread(indexFile, &(ref->n_intv), 4) != 4)
{
// Failed to read, set to 0.
ref->n_intv = 0;
// Return failure.
ifclose(indexFile);
return(SamStatus::FAIL_IO);
}
// Allocate space for the intervals and read them.
uint32_t linearIndexSize = ref->n_intv * sizeof(uint64_t);
ref->ioffsets = (uint64_t*)malloc(linearIndexSize);
if(ifread(indexFile, ref->ioffsets, linearIndexSize) != linearIndexSize)
{
// Failed to read the linear index.
// Return failure.
ifclose(indexFile);
return(SamStatus::FAIL_IO);
}
}
int32_t numUnmapped = 0;
if(ifread(indexFile, &numUnmapped, sizeof(int32_t)) == sizeof(int32_t))
{
myUnMappedNumReads = numUnmapped;
}
// Successfully read the bam index file.
ifclose(indexFile);
return(SamStatus::SUCCESS);
}