|
libStatGen Software
1
|


Classes | |
| struct | TabixFormat |
Public Types | |
| enum | Format { FORMAT_GENERIC = 0, FORMAT_SAM = 1, FORMAT_VCF = 2 } |
Public Member Functions | |
| void | resetIndex () |
| Reset the member data for a new index file. | |
| StatGenStatus::Status | readIndex (const char *filename) |
| bool | getStartPos (const char *refName, int32_t start, uint64_t &fileStartPos) const |
| Get the starting file offset to look for the specified start position. | |
| const char * | getRefName (unsigned int indexNum) const |
| Return the reference name at the specified index or throws an exception if out of range. | |
| int32_t | getFormat () const |
| const char * Tabix::getRefName | ( | unsigned int | indexNum | ) | const |
Return the reference name at the specified index or throws an exception if out of range.
Definition at line 247 of file Tabix.cpp.
{
if(indexNum >= myChromNamesVector.size())
{
String message = "ERROR: Out of range on Tabix::getRefName(";
message += indexNum;
message += ")";
throw(std::runtime_error(message.c_str()));
return(NULL);
}
return(myChromNamesVector[indexNum]);
}
| bool Tabix::getStartPos | ( | const char * | refName, |
| int32_t | start, | ||
| uint64_t & | fileStartPos | ||
| ) | const |
Get the starting file offset to look for the specified start position.
For an entire reference ID, set start to -1. To start at the beginning of the region, set start to 0/-1.
Definition at line 218 of file Tabix.cpp.
{
// Look for the reference name in the list.
int refID = 0;
for(refID = 0; refID < n_ref; refID++)
{
if(strcmp(refName, myChromNamesVector[refID]) == 0)
{
// found the reference
break;
}
}
if(refID >= n_ref)
{
// Didn't find the refName, so return false.
return(false);
}
// Look up in the linear index.
if(start < 0)
{
// Negative index, so start at 0.
start = 0;
}
return(getMinOffsetFromLinearIndex(refID, start, fileStartPos));
}
| StatGenStatus::Status Tabix::readIndex | ( | const char * | filename | ) | [virtual] |
| filename | the bam index file to be read. |
Implements IndexBase.
Definition at line 52 of file Tabix.cpp.
References StatGenStatus::FAIL_IO, StatGenStatus::FAIL_PARSE, ifopen(), ifread(), resetIndex(), and StatGenStatus::SUCCESS.
{
// Reset the index from anything that may previously be set.
resetIndex();
IFILE indexFile = ifopen(filename, "rb");
// Failed to open the index file.
if(indexFile == NULL)
{
return(StatGenStatus::FAIL_IO);
}
// read the tabix index structure.
// Read the magic string.
char magic[4];
if(ifread(indexFile, magic, 4) != 4)
{
// Failed to read the magic
return(StatGenStatus::FAIL_IO);
}
// If this is not an index file, set num references to 0.
if (magic[0] != 'T' || magic[1] != 'B' || magic[2] != 'I' || magic[3] != 1)
{
// Not a Tabix Index file.
return(StatGenStatus::FAIL_PARSE);
}
// It is a tabix index file.
// Read the number of reference sequences.
if(ifread(indexFile, &n_ref, 4) != 4)
{
// Failed to read.
return(StatGenStatus::FAIL_IO);
}
// Size the references.
myRefs.resize(n_ref);
// Read the Format configuration.
if(ifread(indexFile, &myFormat, sizeof(myFormat)) != sizeof(myFormat))
{
// Failed to read.
return(StatGenStatus::FAIL_IO);
}
// Read the length of the chromosome names.
uint32_t l_nm;
if(ifread(indexFile, &l_nm, sizeof(l_nm)) != sizeof(l_nm))
{
// Failed to read.
return(StatGenStatus::FAIL_IO);
}
// Read the chromosome names.
myChromNamesBuffer = new char[l_nm];
if(ifread(indexFile, myChromNamesBuffer, l_nm) != l_nm)
{
return(StatGenStatus::FAIL_IO);
}
myChromNamesVector.resize(n_ref);
// Parse out the chromosome names.
bool prevNull = true;
int chromIndex = 0;
for(uint32_t i = 0; i < l_nm; i++)
{
if(chromIndex >= n_ref)
{
// already set the pointer for the last chromosome name,
// so stop looping.
break;
}
if(prevNull == true)
{
myChromNamesVector[chromIndex++] = myChromNamesBuffer + i;
prevNull = false;
}
if(myChromNamesBuffer[i] == '\0')
{
prevNull = true;
}
}
for(int refIndex = 0; refIndex < n_ref; refIndex++)
{
// Read each reference.
Reference* ref = &(myRefs[refIndex]);
// Resize the bins so they can be indexed by bin number.
ref->bins.resize(MAX_NUM_BINS + 1);
// Read the number of bins.
if(ifread(indexFile, &(ref->n_bin), 4) != 4)
{
// Failed to read the number of bins.
// Return failure.
return(StatGenStatus::FAIL_PARSE);
}
// Read each bin.
for(int binIndex = 0; binIndex < ref->n_bin; binIndex++)
{
uint32_t binNumber;
// Read in the bin number.
if(ifread(indexFile, &(binNumber), 4) != 4)
{
// Failed to read the bin number.
// Return failure.
return(StatGenStatus::FAIL_IO);
}
// Add the bin to the reference and get the
// pointer back so the values can be set in it.
Bin* binPtr = &(ref->bins[binNumber]);
binPtr->bin = binNumber;
// Read in the number of chunks.
if(ifread(indexFile, &(binPtr->n_chunk), 4) != 4)
{
// Failed to read number of chunks.
// Return failure.
return(StatGenStatus::FAIL_IO);
}
// Read in the chunks.
// Allocate space for the chunks.
uint32_t sizeOfChunkList = binPtr->n_chunk * sizeof(Chunk);
binPtr->chunks = (Chunk*)malloc(sizeOfChunkList);
if(ifread(indexFile, binPtr->chunks, sizeOfChunkList) != sizeOfChunkList)
{
// Failed to read the chunks.
// Return failure.
return(StatGenStatus::FAIL_IO);
}
}
// Read the number of intervals.
if(ifread(indexFile, &(ref->n_intv), 4) != 4)
{
// Failed to read, set to 0.
ref->n_intv = 0;
// Return failure.
return(StatGenStatus::FAIL_IO);
}
// Allocate space for the intervals and read them.
uint32_t linearIndexSize = ref->n_intv * sizeof(uint64_t);
ref->ioffsets = (uint64_t*)malloc(linearIndexSize);
if(ifread(indexFile, ref->ioffsets, linearIndexSize) != linearIndexSize)
{
// Failed to read the linear index.
// Return failure.
return(StatGenStatus::FAIL_IO);
}
}
// Successfully read teh bam index file.
return(StatGenStatus::SUCCESS);
}