Classes | |
struct | TabixFormat |
Public Member Functions | |
void | resetIndex () |
Reset the member data for a new index file. | |
StatGenStatus::Status | readIndex (const char *filename) |
bool | getStartPos (const char *refName, int32_t start, uint64_t &fileStartPos) |
Get the starting file offset to look for the specified start position. |
Definition at line 31 of file Tabix.h.
bool Tabix::getStartPos | ( | const char * | refName, | |
int32_t | start, | |||
uint64_t & | fileStartPos | |||
) |
Get the starting file offset to look for the specified start position.
For an entire reference ID, set start to -1. To start at the beginning of the region, set start to 0/-1.
Definition at line 216 of file Tabix.cpp.
00218 { 00219 // Look for the reference name in the list. 00220 int refID = 0; 00221 for(refID = 0; refID < n_ref; refID++) 00222 { 00223 if(strcmp(refName, myChromNamesVector[refID]) == 0) 00224 { 00225 // found the reference 00226 break; 00227 } 00228 } 00229 if(refID >= n_ref) 00230 { 00231 // Didn't find the refName, so return false. 00232 return(false); 00233 } 00234 00235 // Look up in the linear index. 00236 if(start < 0) 00237 { 00238 // Negative index, so start at 0. 00239 start = 0; 00240 } 00241 return(getMinOffsetFromLinearIndex(refID, start, fileStartPos)); 00242 }
StatGenStatus::Status Tabix::readIndex | ( | const char * | filename | ) | [virtual] |
filename | the bam index file to be read. |
Implements IndexBase.
Definition at line 50 of file Tabix.cpp.
References StatGenStatus::FAIL_IO, StatGenStatus::FAIL_PARSE, ifopen(), ifread(), resetIndex(), and StatGenStatus::SUCCESS.
00051 { 00052 // Reset the index from anything that may previously be set. 00053 resetIndex(); 00054 00055 IFILE indexFile = ifopen(filename, "rb"); 00056 00057 // Failed to open the index file. 00058 if(indexFile == NULL) 00059 { 00060 return(StatGenStatus::FAIL_IO); 00061 } 00062 00063 // read the tabix index structure. 00064 00065 // Read the magic string. 00066 char magic[4]; 00067 if(ifread(indexFile, magic, 4) != 4) 00068 { 00069 // Failed to read the magic 00070 return(StatGenStatus::FAIL_IO); 00071 } 00072 00073 // If this is not an index file, set num references to 0. 00074 if (magic[0] != 'T' || magic[1] != 'B' || magic[2] != 'I' || magic[3] != 1) 00075 { 00076 // Not a Tabix Index file. 00077 return(StatGenStatus::FAIL_PARSE); 00078 } 00079 00080 // It is a tabix index file. 00081 // Read the number of reference sequences. 00082 if(ifread(indexFile, &n_ref, 4) != 4) 00083 { 00084 // Failed to read. 00085 return(StatGenStatus::FAIL_IO); 00086 } 00087 00088 // Size the references. 00089 myRefs.resize(n_ref); 00090 00091 // Read the Format configuration. 00092 if(ifread(indexFile, &myFormat, sizeof(myFormat)) != sizeof(myFormat)) 00093 { 00094 // Failed to read. 00095 return(StatGenStatus::FAIL_IO); 00096 } 00097 00098 // Read the length of the chromosome names. 00099 uint32_t l_nm; 00100 00101 if(ifread(indexFile, &l_nm, sizeof(l_nm)) != sizeof(l_nm)) 00102 { 00103 // Failed to read. 00104 return(StatGenStatus::FAIL_IO); 00105 } 00106 00107 // Read the chromosome names. 00108 myChromNamesBuffer = new char[l_nm]; 00109 if(ifread(indexFile, myChromNamesBuffer, l_nm) != l_nm) 00110 { 00111 return(StatGenStatus::FAIL_IO); 00112 } 00113 myChromNamesVector.resize(n_ref); 00114 00115 // Parse out the chromosome names. 00116 bool prevNull = true; 00117 int chromIndex = 0; 00118 for(uint32_t i = 0; i < l_nm; i++) 00119 { 00120 if(chromIndex >= n_ref) 00121 { 00122 // already set the pointer for the last chromosome name, 00123 // so stop looping. 00124 break; 00125 } 00126 if(prevNull == true) 00127 { 00128 myChromNamesVector[chromIndex++] = myChromNamesBuffer + i; 00129 prevNull = false; 00130 } 00131 if(myChromNamesBuffer[i] == '\0') 00132 { 00133 prevNull = true; 00134 } 00135 } 00136 00137 for(int refIndex = 0; refIndex < n_ref; refIndex++) 00138 { 00139 // Read each reference. 00140 Reference* ref = &(myRefs[refIndex]); 00141 00142 // Resize the bins so they can be indexed by bin number. 00143 ref->bins.resize(MAX_NUM_BINS + 1); 00144 00145 // Read the number of bins. 00146 if(ifread(indexFile, &(ref->n_bin), 4) != 4) 00147 { 00148 // Failed to read the number of bins. 00149 // Return failure. 00150 return(StatGenStatus::FAIL_PARSE); 00151 } 00152 00153 // Read each bin. 00154 for(int binIndex = 0; binIndex < ref->n_bin; binIndex++) 00155 { 00156 uint32_t binNumber; 00157 00158 // Read in the bin number. 00159 if(ifread(indexFile, &(binNumber), 4) != 4) 00160 { 00161 // Failed to read the bin number. 00162 // Return failure. 00163 return(StatGenStatus::FAIL_IO); 00164 } 00165 00166 // Add the bin to the reference and get the 00167 // pointer back so the values can be set in it. 00168 Bin* binPtr = &(ref->bins[binNumber]); 00169 binPtr->bin = binNumber; 00170 00171 // Read in the number of chunks. 00172 if(ifread(indexFile, &(binPtr->n_chunk), 4) != 4) 00173 { 00174 // Failed to read number of chunks. 00175 // Return failure. 00176 return(StatGenStatus::FAIL_IO); 00177 } 00178 00179 // Read in the chunks. 00180 // Allocate space for the chunks. 00181 uint32_t sizeOfChunkList = binPtr->n_chunk * sizeof(Chunk); 00182 binPtr->chunks = (Chunk*)malloc(sizeOfChunkList); 00183 if(ifread(indexFile, binPtr->chunks, sizeOfChunkList) != sizeOfChunkList) 00184 { 00185 // Failed to read the chunks. 00186 // Return failure. 00187 return(StatGenStatus::FAIL_IO); 00188 } 00189 } 00190 00191 // Read the number of intervals. 00192 if(ifread(indexFile, &(ref->n_intv), 4) != 4) 00193 { 00194 // Failed to read, set to 0. 00195 ref->n_intv = 0; 00196 // Return failure. 00197 return(StatGenStatus::FAIL_IO); 00198 } 00199 00200 // Allocate space for the intervals and read them. 00201 uint32_t linearIndexSize = ref->n_intv * sizeof(uint64_t); 00202 ref->ioffsets = (uint64_t*)malloc(linearIndexSize); 00203 if(ifread(indexFile, ref->ioffsets, linearIndexSize) != linearIndexSize) 00204 { 00205 // Failed to read the linear index. 00206 // Return failure. 00207 return(StatGenStatus::FAIL_IO); 00208 } 00209 } 00210 00211 // Successfully read teh bam index file. 00212 return(StatGenStatus::SUCCESS); 00213 }