Allows the user to easily read/write a SAM/BAM file. More...
#include <SamFile.h>


Public Types | |
| enum | OpenType { READ, WRITE } |
Enum for indicating whether to open the file for read or write. More... | |
| enum | SortedType { UNSORTED = 0, FLAG, COORDINATE, QUERY_NAME } |
Enum for indicating the type of sort for the file. More... | |
Public Member Functions | |
| SamFile () | |
| Default Constructor. | |
| SamFile (ErrorHandler::HandlingType errorHandlingType) | |
| Constructor that sets the error handling type. | |
| SamFile (const char *filename, OpenType mode) | |
| Constructor that opens the specified file based on the specified mode (READ/WRITE). | |
| SamFile (const char *filename, OpenType mode, ErrorHandler::HandlingType errorHandlingType) | |
| Constructor that opens the specified file based on the specified mode (READ/WRITE) and handles errors per the specified handleType. | |
| SamFile (const char *filename, OpenType mode, SamFileHeader *header) | |
| Constructor that opens the specified file based on the specified mode (READ/WRITE). | |
| SamFile (const char *filename, OpenType mode, ErrorHandler::HandlingType errorHandlingType, SamFileHeader *header) | |
| Constructor that opens the specified file based on the specified mode (READ/WRITE) and handles errors per the specified handleType. | |
| bool | OpenForRead (const char *filename, SamFileHeader *header=NULL) |
| Open a sam/bam file for reading with the specified filename. | |
| bool | OpenForWrite (const char *filename, SamFileHeader *header=NULL) |
| Open a sam/bam file for writing with the specified filename. | |
| bool | ReadBamIndex (const char *filename) |
| Read the specified bam index file. | |
| bool | ReadBamIndex () |
| Read the bam index file using the BAM filename as a base. | |
| void | SetReference (GenomeSequence *reference) |
| Sets the reference to the specified genome sequence object. | |
| void | SetReadSequenceTranslation (SamRecord::SequenceTranslation translation) |
| Set the type of sequence translation to use when reading the sequence. | |
| void | SetWriteSequenceTranslation (SamRecord::SequenceTranslation translation) |
| Set the type of sequence translation to use when writing the sequence. | |
| void | Close () |
| Close the file if there is one open. | |
| bool | IsEOF () |
| Returns whether or not the end of the file has been reached. | |
| bool | ReadHeader (SamFileHeader &header) |
| Reads the header section from the file and stores it in the passed in header. | |
| bool | WriteHeader (SamFileHeader &header) |
| Writes the specified header into the file. | |
| bool | ReadRecord (SamFileHeader &header, SamRecord &record) |
| Reads the next record from the file & stores it in the passed in record. | |
| bool | WriteRecord (SamFileHeader &header, SamRecord &record) |
| Writes the specified record into the file. | |
| void | setSortedValidation (SortedType sortType) |
| Set the flag to validate that the file is sorted as it is read/written. | |
| uint32_t | GetCurrentRecordCount () |
| Return the number of records that have been read/written so far. | |
| SamStatus::Status | GetFailure () |
| Get the Status of the last call that sets status. | |
| SamStatus::Status | GetStatus () |
| Get the Status of the last call that sets status. | |
| const char * | GetStatusMessage () |
| Get the Status of the last call that sets status. | |
| bool | SetReadSection (int32_t refID) |
| Sets what part of the BAM file should be read. | |
| bool | SetReadSection (const char *refName) |
| Sets what part of the BAM file should be read. | |
| bool | SetReadSection (int32_t refID, int32_t start, int32_t end) |
| Sets what part of the BAM file should be read. | |
| bool | SetReadSection (const char *refName, int32_t start, int32_t end) |
| Sets what part of the BAM file should be read. | |
| int32_t | getNumMappedReadsFromIndex (int32_t refID) |
| Get the number of mapped reads in the specified reference id. | |
| int32_t | getNumUnMappedReadsFromIndex (int32_t refID) |
| Get the number of unmapped reads in the specified reference id. | |
| int32_t | getNumMappedReadsFromIndex (const char *refName, SamFileHeader &header) |
| Get the number of mapped reads in the specified reference name. | |
| int32_t | getNumUnMappedReadsFromIndex (const char *refName, SamFileHeader &header) |
| Get the number of unmapped reads in the specified reference name. | |
| uint32_t | GetNumOverlaps (SamRecord &samRecord) |
| Returns the number of bases in the passed in read that overlap the region that is currently set. | |
| void | GenerateStatistics (bool genStats) |
| Whether or not statistics should be generated for this file. | |
| void | PrintStatistics () |
Protected Member Functions | |
| void | init (const char *filename, OpenType mode, SamFileHeader *header) |
| void | resetFile () |
| Resets the file prepping for a new file. | |
| bool | validateSortOrder (SamRecord &record, SamFileHeader &header) |
| Validate that the record is sorted compared to the previously read record if there is one, according to the specified sort order. | |
| SortedType | getSortOrderFromHeader (SamFileHeader &header) |
| bool | readIndexedRecord (SamFileHeader &header, SamRecord &record) |
| Overwrites read record to read from the specific reference only. | |
| bool | processNewSection (SamFileHeader &header) |
Protected Attributes | |
| IFILE | myFilePtr |
| GenericSamInterface * | myInterfacePtr |
| bool | myIsOpenForRead |
| Flag to indicate if a file is open for reading. | |
| bool | myIsOpenForWrite |
| Flag to indicate if a file is open for writing. | |
| bool | myHasHeader |
| Flag to indicate if a header has been read/written - required before being able to read/write a record. | |
| SortedType | mySortedType |
| int32_t | myPrevCoord |
| Previous values used for checking if the file is sorted. | |
| int32_t | myPrevRefID |
| std::string | myPrevReadName |
| uint32_t | myRecordCount |
| Keep a count of the number of records that have been read/written so far. | |
| SamStatistics * | myStatistics |
| Pointer to the statistics for this file. | |
| SamStatus | myStatus |
| The status of the last SamFile command. | |
| bool | myIsBamOpenForRead |
| Values for reading Sorted BAM files via the index. | |
| bool | myNewSection |
| int32_t | myRefID |
| int32_t | myStartPos |
| int32_t | myEndPos |
| uint64_t | myCurrentChunkEnd |
| SortedChunkList | myChunksToRead |
| BamIndex * | myBamIndex |
| GenomeSequence * | myRefPtr |
| SamRecord::SequenceTranslation | myReadTranslation |
| SamRecord::SequenceTranslation | myWriteTranslation |
| std::string | myRefName |
Allows the user to easily read/write a SAM/BAM file.
Definition at line 30 of file SamFile.h.
| enum SamFile::OpenType |
| enum SamFile::SortedType |
Enum for indicating the type of sort for the file.
| UNSORTED |
file is not sorted. |
| FLAG |
SO flag from the header indicates the sort type. |
| COORDINATE |
file is sorted by coordinate. |
| QUERY_NAME |
file is sorted by queryname. |
Definition at line 41 of file SamFile.h.
00041 { 00042 UNSORTED = 0, ///< file is not sorted. 00043 FLAG, ///< SO flag from the header indicates the sort type. 00044 COORDINATE, ///< file is sorted by coordinate. 00045 QUERY_NAME ///< file is sorted by queryname. 00046 };
| SamFile::SamFile | ( | ErrorHandler::HandlingType | errorHandlingType | ) |
Constructor that sets the error handling type.
| errorHandlingType | how to handle errors. |
Definition at line 40 of file SamFile.cpp.
References resetFile().
00041 : myFilePtr(NULL), 00042 myInterfacePtr(NULL), 00043 myStatistics(NULL), 00044 myStatus(errorHandlingType), 00045 myBamIndex(NULL), 00046 myRefPtr(NULL), 00047 myReadTranslation(SamRecord::NONE), 00048 myWriteTranslation(SamRecord::NONE) 00049 { 00050 resetFile(); 00051 }
| SamFile::SamFile | ( | const char * | filename, | |
| OpenType | mode | |||
| ) |
Constructor that opens the specified file based on the specified mode (READ/WRITE).
| filename | name of the file to open. | |
| mode | mode to use for opening the file. |
Definition at line 56 of file SamFile.cpp.
00057 : myStatus() 00058 { 00059 init(filename, mode, NULL); 00060 }
| SamFile::SamFile | ( | const char * | filename, | |
| OpenType | mode, | |||
| ErrorHandler::HandlingType | errorHandlingType | |||
| ) |
Constructor that opens the specified file based on the specified mode (READ/WRITE) and handles errors per the specified handleType.
| filename | name of the file to open. | |
| mode | mode to use for opening the file. | |
| errorHandlingType | how to handle errors. |
Definition at line 65 of file SamFile.cpp.
00067 : myStatus(errorHandlingType) 00068 { 00069 init(filename, mode, NULL); 00070 }
| SamFile::SamFile | ( | const char * | filename, | |
| OpenType | mode, | |||
| SamFileHeader * | header | |||
| ) |
Constructor that opens the specified file based on the specified mode (READ/WRITE).
| filename | name of the file to open. | |
| mode | mode to use for opening the file. | |
| header | to read into or write from |
Definition at line 75 of file SamFile.cpp.
00076 : myStatus() 00077 { 00078 init(filename, mode, header); 00079 }
| SamFile::SamFile | ( | const char * | filename, | |
| OpenType | mode, | |||
| ErrorHandler::HandlingType | errorHandlingType, | |||
| SamFileHeader * | header | |||
| ) |
Constructor that opens the specified file based on the specified mode (READ/WRITE) and handles errors per the specified handleType.
| filename | name of the file to open. | |
| mode | mode to use for opening the file. | |
| errorHandlingType | how to handle errors. | |
| header | to read into or write from |
Definition at line 84 of file SamFile.cpp.
00087 : myStatus(errorHandlingType) 00088 { 00089 init(filename, mode, header); 00090 }
| void SamFile::GenerateStatistics | ( | bool | genStats | ) |
Whether or not statistics should be generated for this file.
The value is carried over between files and is not reset, but the statistics themselves are reset between files.
| genStats | set to true if statistics should be generated, false if not. |
Definition at line 798 of file SamFile.cpp.
References myStatistics.
00799 { 00800 if(genStats) 00801 { 00802 if(myStatistics == NULL) 00803 { 00804 // Want to generate statistics, but do not yet have the 00805 // structure for them, so create one. 00806 myStatistics = new SamStatistics(); 00807 } 00808 } 00809 else 00810 { 00811 // Do not generate statistics, so if myStatistics is not NULL, 00812 // delete it. 00813 if(myStatistics != NULL) 00814 { 00815 delete myStatistics; 00816 myStatistics = NULL; 00817 } 00818 } 00819 00820 }
| SamStatus::Status SamFile::GetFailure | ( | ) | [inline] |
Get the Status of the last call that sets status.
To remain backwards compatable - will be removed later.
Definition at line 171 of file SamFile.h.
References GetStatus().
00172 { 00173 return(GetStatus()); 00174 }
| int32_t SamFile::getNumMappedReadsFromIndex | ( | const char * | refName, | |
| SamFileHeader & | header | |||
| ) |
Get the number of mapped reads in the specified reference name.
Returns -1 for unknown reference names.
| refName | reference name for which to extract the number of mapped reads. | |
| header | header object containing the map from refName to refID |
Definition at line 740 of file SamFile.cpp.
References BamIndex::getNumMappedReads(), myStatus, and BamIndex::REF_ID_UNMAPPED.
00742 { 00743 // The bam index must have already been read. 00744 if(myBamIndex == NULL) 00745 { 00746 myStatus.setStatus(SamStatus::FAIL_ORDER, 00747 "Canot get num mapped reads from the index until it has been read."); 00748 return(false); 00749 } 00750 int32_t refID = BamIndex::REF_ID_UNMAPPED; 00751 if((strcmp(refName, "") != 0) && (strcmp(refName, "*") != 0)) 00752 { 00753 // Reference name specified, so read just the "-1" entries. 00754 refID = header.getReferenceID(refName); 00755 } 00756 return(myBamIndex->getNumMappedReads(refID)); 00757 }
| int32_t SamFile::getNumMappedReadsFromIndex | ( | int32_t | refID | ) |
Get the number of mapped reads in the specified reference id.
Returns -1 for out of range refIDs.
| refID | reference ID for which to extract the number of mapped reads. |
Definition at line 710 of file SamFile.cpp.
References BamIndex::getNumMappedReads(), and myStatus.
00711 { 00712 // The bam index must have already been read. 00713 if(myBamIndex == NULL) 00714 { 00715 myStatus.setStatus(SamStatus::FAIL_ORDER, 00716 "Canot get num mapped reads from the index until it has been read."); 00717 return(false); 00718 } 00719 return(myBamIndex->getNumMappedReads(refID)); 00720 }
| uint32_t SamFile::GetNumOverlaps | ( | SamRecord & | samRecord | ) |
Returns the number of bases in the passed in read that overlap the region that is currently set.
| samRecord | to check for overlapping bases. |
Definition at line 784 of file SamFile.cpp.
References SamRecord::getNumOverlaps(), SamRecord::setReference(), and SamRecord::setSequenceTranslation().
00785 { 00786 if(myRefPtr != NULL) 00787 { 00788 samRecord.setReference(myRefPtr); 00789 } 00790 samRecord.setSequenceTranslation(myReadTranslation); 00791 00792 // Get the overlaps in the sam record for the region currently set 00793 // for this file. 00794 return(samRecord.getNumOverlaps(myStartPos, myEndPos)); 00795 }
| int32_t SamFile::getNumUnMappedReadsFromIndex | ( | const char * | refName, | |
| SamFileHeader & | header | |||
| ) |
Get the number of unmapped reads in the specified reference name.
Returns -1 for unknown reference names.
| refName | reference name for which to extract the number of unmapped reads. | |
| header | header object containing the map from refName to refID |
Definition at line 762 of file SamFile.cpp.
References BamIndex::getNumUnMappedReads(), myStatus, and BamIndex::REF_ID_UNMAPPED.
00764 { 00765 // The bam index must have already been read. 00766 if(myBamIndex == NULL) 00767 { 00768 myStatus.setStatus(SamStatus::FAIL_ORDER, 00769 "Canot get num unmapped reads from the index until it has been read."); 00770 return(false); 00771 } 00772 int32_t refID = BamIndex::REF_ID_UNMAPPED; 00773 if((strcmp(refName, "") != 0) && (strcmp(refName, "*") != 0)) 00774 { 00775 // Reference name specified, so read just the "-1" entries. 00776 refID = header.getReferenceID(refName); 00777 } 00778 return(myBamIndex->getNumUnMappedReads(refID)); 00779 }
| int32_t SamFile::getNumUnMappedReadsFromIndex | ( | int32_t | refID | ) |
Get the number of unmapped reads in the specified reference id.
Returns -1 for out of range refIDs.
| refID | reference ID for which to extract the number of unmapped reads. |
Definition at line 725 of file SamFile.cpp.
References BamIndex::getNumUnMappedReads(), and myStatus.
00726 { 00727 // The bam index must have already been read. 00728 if(myBamIndex == NULL) 00729 { 00730 myStatus.setStatus(SamStatus::FAIL_ORDER, 00731 "Canot get num unmapped reads from the index until it has been read."); 00732 return(false); 00733 } 00734 return(myBamIndex->getNumUnMappedReads(refID)); 00735 }
| bool SamFile::IsEOF | ( | ) |
Returns whether or not the end of the file has been reached.
Definition at line 389 of file SamFile.cpp.
References ifeof().
00390 { 00391 if (myFilePtr != NULL) 00392 { 00393 // File Pointer is set, so return if eof. 00394 return(ifeof(myFilePtr)); 00395 } 00396 // File pointer is not set, so return true, eof. 00397 return true; 00398 }
| bool SamFile::OpenForRead | ( | const char * | filename, | |
| SamFileHeader * | header = NULL | |||
| ) |
Open a sam/bam file for reading with the specified filename.
| filename | the sam/bam file to open for reading. | |
| header | to read into or write from (optional) |
Definition at line 104 of file SamFile.cpp.
References InputFile::BGZF, InputFile::disableBuffering(), ifopen(), ifread(), ifrewind(), myIsBamOpenForRead, myIsOpenForRead, myStatus, ReadHeader(), resetFile(), and InputFile::UNCOMPRESSED.
00105 { 00106 // Reset for any previously operated on files. 00107 resetFile(); 00108 00109 int lastchar = 0; 00110 00111 while (filename[lastchar] != 0) lastchar++; 00112 00113 // If at least one character, check for '-'. 00114 if((lastchar >= 1) && (filename[0] == '-')) 00115 { 00116 // Read from stdin - determine type of file to read. 00117 // Determine if compressed bam. 00118 if(strcmp(filename, "-.bam") == 0) 00119 { 00120 // Compressed bam - open as bgzf. 00121 // -.bam is the filename, read compressed bam from stdin 00122 filename = "-"; 00123 myFilePtr = ifopen(filename, "rb", InputFile::BGZF); 00124 myFilePtr->disableBuffering(); 00125 myInterfacePtr = new BamInterface; 00126 00127 // Read the magic string. 00128 char magic[4]; 00129 ifread(myFilePtr, magic, 4); 00130 } 00131 else if(strcmp(filename, "-.ubam") == 0) 00132 { 00133 // uncompressed BAM File. 00134 // -.ubam is the filename, read uncompressed bam from stdin 00135 filename = "-"; 00136 myFilePtr = ifopen(filename, "rb", InputFile::UNCOMPRESSED); 00137 00138 myInterfacePtr = new BamInterface; 00139 00140 // Read the magic string. 00141 char magic[4]; 00142 ifread(myFilePtr, magic, 4); 00143 } 00144 else 00145 { 00146 // SAM File. 00147 // read sam from stdin 00148 filename = "-"; 00149 myFilePtr = ifopen(filename, "rb", InputFile::UNCOMPRESSED); 00150 myInterfacePtr = new SamInterface; 00151 } 00152 } 00153 else 00154 { 00155 // Not from stdin. Read the file to determine the type. 00156 myFilePtr = ifopen(filename, "rb"); 00157 00158 if (myFilePtr == NULL) 00159 { 00160 std::string errorMessage = "Failed to Open "; 00161 errorMessage += filename; 00162 errorMessage += " for reading"; 00163 myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str()); 00164 return(false); 00165 } 00166 00167 char magic[4]; 00168 ifread(myFilePtr, magic, 4); 00169 00170 if (magic[0] == 'B' && magic[1] == 'A' && magic[2] == 'M' && 00171 magic[3] == 1) 00172 { 00173 myInterfacePtr = new BamInterface; 00174 // Set that it is a bam file open for reading. This is needed to 00175 // determine if an index file can be used. 00176 myIsBamOpenForRead = true; 00177 } 00178 else 00179 { 00180 // Not a bam, so rewind to the beginning of the file so it 00181 // can be read. 00182 ifrewind(myFilePtr); 00183 myInterfacePtr = new SamInterface; 00184 } 00185 } 00186 00187 // File is open for reading. 00188 myIsOpenForRead = true; 00189 00190 // Read the header if one was passed in. 00191 if(header != NULL) 00192 { 00193 return(ReadHeader(*header)); 00194 } 00195 00196 // Successfully opened the file. 00197 myStatus = SamStatus::SUCCESS; 00198 return(true); 00199 }
| bool SamFile::OpenForWrite | ( | const char * | filename, | |
| SamFileHeader * | header = NULL | |||
| ) |
Open a sam/bam file for writing with the specified filename.
| filename | the sam/bam file to open for writing. | |
| header | to read into or write from (optional) |
Definition at line 203 of file SamFile.cpp.
References InputFile::BGZF, ifopen(), myIsOpenForWrite, myStatus, resetFile(), InputFile::UNCOMPRESSED, and WriteHeader().
00204 { 00205 // Reset for any previously operated on files. 00206 resetFile(); 00207 00208 int lastchar = 0; 00209 while (filename[lastchar] != 0) lastchar++; 00210 if (lastchar >= 4 && 00211 filename[lastchar - 4] == 'u' && 00212 filename[lastchar - 3] == 'b' && 00213 filename[lastchar - 2] == 'a' && 00214 filename[lastchar - 1] == 'm') 00215 { 00216 // BAM File. 00217 // if -.ubam is the filename, write uncompressed bam to stdout 00218 if((lastchar == 6) && (filename[0] == '-') && (filename[1] == '.')) 00219 { 00220 filename = "-"; 00221 } 00222 myFilePtr = ifopen(filename, "wb", InputFile::UNCOMPRESSED); 00223 00224 myInterfacePtr = new BamInterface; 00225 } 00226 else if (lastchar >= 3 && 00227 filename[lastchar - 3] == 'b' && 00228 filename[lastchar - 2] == 'a' && 00229 filename[lastchar - 1] == 'm') 00230 { 00231 // BAM File. 00232 // if -.bam is the filename, write compressed bam to stdout 00233 if((lastchar == 5) && (filename[0] == '-') && (filename[1] == '.')) 00234 { 00235 filename = "-"; 00236 } 00237 myFilePtr = ifopen(filename, "wb", InputFile::BGZF); 00238 00239 myInterfacePtr = new BamInterface; 00240 } 00241 else 00242 { 00243 // SAM File 00244 // if - (followed by anything is the filename, 00245 // write uncompressed sam to stdout 00246 if((lastchar >= 1) && (filename[0] == '-')) 00247 { 00248 filename = "-"; 00249 } 00250 myFilePtr = ifopen(filename, "wb", InputFile::UNCOMPRESSED); 00251 00252 myInterfacePtr = new SamInterface; 00253 } 00254 00255 if (myFilePtr == NULL) 00256 { 00257 std::string errorMessage = "Failed to Open "; 00258 errorMessage += filename; 00259 errorMessage += " for writing"; 00260 myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str()); 00261 return(false); 00262 } 00263 00264 myIsOpenForWrite = true; 00265 00266 // Write the header if one was passed in. 00267 if(header != NULL) 00268 { 00269 return(WriteHeader(*header)); 00270 } 00271 00272 // Successfully opened the file. 00273 myStatus = SamStatus::SUCCESS; 00274 return(true); 00275 }
| bool SamFile::ReadBamIndex | ( | ) |
Read the bam index file using the BAM filename as a base.
It must be read prior to setting a read section, for seeking and reading portions of a bam file. Must be read after opening the BAM file since it uses the BAM filename as a base name for the index file. First it tries filename.bam.bai. If that fails, it tries it without the .bam extension, filename.bai.
Definition at line 307 of file SamFile.cpp.
References InputFile::getFileName(), and myStatus.
00308 { 00309 if(myFilePtr == NULL) 00310 { 00311 // Can't read the bam index file because the BAM file has not yet been 00312 // opened, so we don't know the base filename for the index file. 00313 std::string errorMessage = "Failed to read the bam Index file -" 00314 " the BAM file needs to be read first in order to determine" 00315 " the index filename."; 00316 myStatus.setStatus(SamStatus::FAIL_ORDER, errorMessage.c_str()); 00317 return(false); 00318 } 00319 00320 const char* bamBaseName = myFilePtr->getFileName(); 00321 00322 std::string indexName = bamBaseName; 00323 indexName += ".bai"; 00324 00325 bool foundFile = true; 00326 try 00327 { 00328 if(ReadBamIndex(indexName.c_str()) == false) 00329 { 00330 foundFile = false; 00331 } 00332 } 00333 catch (std::exception& e) 00334 { 00335 foundFile = false; 00336 } 00337 00338 // Check to see if the index file was found. 00339 if(!foundFile) 00340 { 00341 // Not found - try without the bam extension. 00342 // Locate the start of the bam extension 00343 size_t startExt = indexName.find(".bam"); 00344 if(startExt == std::string::npos) 00345 { 00346 // Could not find the .bam extension, so just return false since the 00347 // call to ReadBamIndex set the status. 00348 return(false); 00349 } 00350 // Remove ".bam" and try reading the index again. 00351 indexName.erase(startExt, 4); 00352 return(ReadBamIndex(indexName.c_str())); 00353 } 00354 return(true); 00355 }
| bool SamFile::ReadBamIndex | ( | const char * | filename | ) |
Read the specified bam index file.
It must be read prior to setting a read section, for seeking and reading portions of a bam file.
| filename | the name of the bam index file to be read. |
Definition at line 279 of file SamFile.cpp.
References myStatus, and BamIndex::readIndex().
00280 { 00281 // Cleanup a previously setup index. 00282 if(myBamIndex != NULL) 00283 { 00284 delete myBamIndex; 00285 myBamIndex = NULL; 00286 } 00287 00288 // Create a new bam index. 00289 myBamIndex = new BamIndex(); 00290 SamStatus::Status indexStat = myBamIndex->readIndex(bamIndexFilename); 00291 00292 if(indexStat != SamStatus::SUCCESS) 00293 { 00294 std::string errorMessage = "Failed to read the bam Index file: "; 00295 errorMessage += bamIndexFilename; 00296 myStatus.setStatus(indexStat, errorMessage.c_str()); 00297 delete myBamIndex; 00298 myBamIndex = NULL; 00299 return(false); 00300 } 00301 myStatus = SamStatus::SUCCESS; 00302 return(true); 00303 }
| bool SamFile::ReadHeader | ( | SamFileHeader & | header | ) |
Reads the header section from the file and stores it in the passed in header.
Definition at line 402 of file SamFile.cpp.
References myHasHeader, myIsOpenForRead, and myStatus.
Referenced by OpenForRead().
00403 { 00404 if(myIsOpenForRead == false) 00405 { 00406 // File is not open for read 00407 myStatus.setStatus(SamStatus::FAIL_ORDER, 00408 "Cannot read header since the file is not open for reading"); 00409 return(false); 00410 } 00411 00412 if(myHasHeader == true) 00413 { 00414 // The header has already been read. 00415 myStatus.setStatus(SamStatus::FAIL_ORDER, 00416 "Cannot read header since it has already been read."); 00417 return(false); 00418 } 00419 00420 myStatus = myInterfacePtr->readHeader(myFilePtr, header); 00421 if(myStatus == SamStatus::SUCCESS) 00422 { 00423 // The header has now been successfully read. 00424 myHasHeader = true; 00425 return(true); 00426 } 00427 return(false); 00428 }
| bool SamFile::ReadRecord | ( | SamFileHeader & | header, | |
| SamRecord & | record | |||
| ) |
Reads the next record from the file & stores it in the passed in record.
Definition at line 466 of file SamFile.cpp.
References myHasHeader, myIsOpenForRead, myRecordCount, myStatistics, myStatus, readIndexedRecord(), BamIndex::REF_ID_ALL, SamRecord::setReference(), SamRecord::setSequenceTranslation(), and validateSortOrder().
00468 { 00469 myStatus = SamStatus::SUCCESS; 00470 00471 if(myIsOpenForRead == false) 00472 { 00473 // File is not open for read 00474 myStatus.setStatus(SamStatus::FAIL_ORDER, 00475 "Cannot read record since the file is not open for reading"); 00476 throw(std::runtime_error("SOFTWARE BUG: trying to read a SAM/BAM record prior to opening the file.")); 00477 return(false); 00478 } 00479 00480 if(myHasHeader == false) 00481 { 00482 // The header has not yet been read. 00483 // TODO - maybe just read the header. 00484 myStatus.setStatus(SamStatus::FAIL_ORDER, 00485 "Cannot read record since the header has not been read."); 00486 throw(std::runtime_error("SOFTWARE BUG: trying to read a SAM/BAM record prior to reading the header.")); 00487 return(false); 00488 } 00489 00490 // Check to see if a new region has been set. If so, determine the 00491 // chunks for that region. 00492 if(myNewSection) 00493 { 00494 if(!processNewSection(header)) 00495 { 00496 // Failed processing a new section. Could be an 00497 // order issue like the file not being open or the 00498 // indexed file not having been read. 00499 // processNewSection sets myStatus with the failure reason. 00500 return(false); 00501 } 00502 } 00503 00504 // Check to see if the file should be read by index. 00505 if(myRefID != BamIndex::REF_ID_ALL) 00506 { 00507 // Reference ID is set, so read by index. 00508 return(readIndexedRecord(header, record)); 00509 } 00510 00511 record.setReference(myRefPtr); 00512 record.setSequenceTranslation(myReadTranslation); 00513 00514 // File is open for reading and the header has been read, so read the next 00515 // record. 00516 myInterfacePtr->readRecord(myFilePtr, header, record, myStatus); 00517 if(myStatus == SamStatus::SUCCESS) 00518 { 00519 // A record was successfully read, so increment the record count. 00520 myRecordCount++; 00521 00522 if(myStatistics != NULL) 00523 { 00524 // Statistics should be updated. 00525 myStatistics->updateStatistics(record); 00526 } 00527 00528 // Successfully read the record, so check the sort order. 00529 if(!validateSortOrder(record, header)) 00530 { 00531 // ValidateSortOrder sets the status on a failure. 00532 return(false); 00533 } 00534 return(true); 00535 } 00536 // Failed to read the record. 00537 return(false); 00538 }
| bool SamFile::SetReadSection | ( | const char * | refName, | |
| int32_t | start, | |||
| int32_t | end | |||
| ) |
Sets what part of the BAM file should be read.
This version will set it to only read a specific reference name and start/end position. The records for this section will be retrieved on each ReadRecord call. When all records have been retrieved for the specified section, ReadRecord will return failure until a new read section is set. Must be called only after the file has been opened for reading. Sorting validation is reset everytime SetReadPosition is called since it can jump around in the file.
| refName | the reference name of the records to read from the file. | |
| start | inclusive 0-based start position of records that should be read for this refID. | |
| end | exclusive 0-based end position of records that should be read for this refID. |
Definition at line 661 of file SamFile.cpp.
References InputFile::disableBuffering(), myIsBamOpenForRead, myPrevCoord, myStatus, BamIndex::REF_ID_ALL, and BamIndex::REF_ID_UNMAPPED.
00662 { 00663 // If there is not a BAM file open for reading, return failure. 00664 // Opening a new file clears the read section, so it must be 00665 // set after the file is opened. 00666 if(!myIsBamOpenForRead) 00667 { 00668 // There is not a BAM file open for reading. 00669 myStatus.setStatus(SamStatus::FAIL_ORDER, 00670 "Canot set section since there is no bam file open"); 00671 return(false); 00672 } 00673 00674 // Indexed Bam open for read, so disable read buffering because iftell will be used. 00675 myFilePtr->disableBuffering(); 00676 00677 myNewSection = true; 00678 myStartPos = start; 00679 myEndPos = end; 00680 if((strcmp(refName, "") == 0) || (strcmp(refName, "*") == 0)) 00681 { 00682 // No Reference name specified, so read just the "-1" entries. 00683 myRefID = BamIndex::REF_ID_UNMAPPED; 00684 } 00685 else 00686 { 00687 // save the reference name and revert the reference ID to unknown 00688 // so it will be calculated later. 00689 myRefName = refName; 00690 myRefID = BamIndex::REF_ID_ALL; 00691 } 00692 myChunksToRead.clear(); 00693 // Reset the end of the current chunk. We are resetting our read, so 00694 // we no longer have a "current chunk" that we are reading. 00695 myCurrentChunkEnd = 0; 00696 myStatus = SamStatus::SUCCESS; 00697 00698 // Reset the sort order criteria since we moved around in the file. 00699 myPrevCoord = -1; 00700 myPrevRefID = 0; 00701 myPrevReadName.clear(); 00702 00703 return(true); 00704 }
| bool SamFile::SetReadSection | ( | int32_t | refID, | |
| int32_t | start, | |||
| int32_t | end | |||
| ) |
Sets what part of the BAM file should be read.
This version will set it to only read a specific reference id and start/end position. The records for this section will be retrieved on each ReadRecord call. When all records have been retrieved for the specified section, ReadRecord will return failure until a new read section is set. Must be called only after the file has been opened for reading. Sorting validation is reset everytime SetReadPosition is called since it can jump around in the file.
| refID | the reference ID of the records to read from the file. | |
| start | inclusive 0-based start position of records that should be read for this refID. | |
| end | exclusive 0-based end position of records that should be read for this refID. |
Definition at line 624 of file SamFile.cpp.
References InputFile::disableBuffering(), myIsBamOpenForRead, myPrevCoord, and myStatus.
00625 { 00626 // If there is not a BAM file open for reading, return failure. 00627 // Opening a new file clears the read section, so it must be 00628 // set after the file is opened. 00629 if(!myIsBamOpenForRead) 00630 { 00631 // There is not a BAM file open for reading. 00632 myStatus.setStatus(SamStatus::FAIL_ORDER, 00633 "Canot set section since there is no bam file open"); 00634 return(false); 00635 } 00636 00637 // Indexed Bam open for read, so disable read buffering because iftell will be used. 00638 myFilePtr->disableBuffering(); 00639 00640 myNewSection = true; 00641 myStartPos = start; 00642 myEndPos = end; 00643 myRefID = refID; 00644 myRefName.clear(); 00645 myChunksToRead.clear(); 00646 // Reset the end of the current chunk. We are resetting our read, so 00647 // we no longer have a "current chunk" that we are reading. 00648 myCurrentChunkEnd = 0; 00649 myStatus = SamStatus::SUCCESS; 00650 00651 // Reset the sort order criteria since we moved around in the file. 00652 myPrevCoord = -1; 00653 myPrevRefID = 0; 00654 myPrevReadName.clear(); 00655 00656 return(true); 00657 }
| bool SamFile::SetReadSection | ( | const char * | refName | ) |
Sets what part of the BAM file should be read.
This version will set it to only read a specific reference name. The records for that reference id will be retrieved on each ReadRecord call. When all records have been retrieved for the specified reference name, ReadRecord will return failure until a new read section is set. Must be called only after the file has been opened for reading. Sorting validation is reset everytime SetReadPosition is called since it can jump around in the file.
| refName | the reference name of the records to read from the file. |
Definition at line 616 of file SamFile.cpp.
References SetReadSection().
00617 { 00618 // No start/end specified, so set back to default -1. 00619 return(SetReadSection(refName, -1, -1)); 00620 }
| bool SamFile::SetReadSection | ( | int32_t | refID | ) |
Sets what part of the BAM file should be read.
This version will set it to only read a specific reference id. The records for that reference id will be retrieved on each ReadRecord call. When all records have been retrieved for the specified reference id, ReadRecord will return failure until a new read section is set. Must be called only after the file has been opened for reading. Sorting validation is reset everytime SetReadPosition is called since it can jump around in the file.
| refID | the reference ID of the records to read from the file. |
Definition at line 607 of file SamFile.cpp.
Referenced by SetReadSection().
00608 { 00609 // No start/end specified, so set back to default -1. 00610 return(SetReadSection(refID, -1, -1)); 00611 }
| void SamFile::SetReadSequenceTranslation | ( | SamRecord::SequenceTranslation | translation | ) |
Set the type of sequence translation to use when reading the sequence.
Passed down to the SamRecord when it is read. NONE (the sequence is left as-is).
| translation | type of sequence translation to use. |
Definition at line 366 of file SamFile.cpp.
| void SamFile::SetReference | ( | GenomeSequence * | reference | ) |
Sets the reference to the specified genome sequence object.
| reference | pointer to the GenomeSequence object. |
Definition at line 359 of file SamFile.cpp.
| void SamFile::setSortedValidation | ( | SortedType | sortType | ) |
Set the flag to validate that the file is sorted as it is read/written.
Must be called after the file has been opened. Sorting validation is reset everytime SetReadPosition is called since it can jump around in the file.
Definition at line 593 of file SamFile.cpp.
| void SamFile::SetWriteSequenceTranslation | ( | SamRecord::SequenceTranslation | translation | ) |
Set the type of sequence translation to use when writing the sequence.
Passed down to the SamRecord when it is written. The default type (if this method is never called) is NONE (the sequence is left as-is).
| translation | type of sequence translation to use. |
Definition at line 373 of file SamFile.cpp.
| bool SamFile::validateSortOrder | ( | SamRecord & | record, | |
| SamFileHeader & | header | |||
| ) | [protected] |
Validate that the record is sorted compared to the previously read record if there is one, according to the specified sort order.
If the sort order is UNSORTED, true is returned. Sorting validation is reset everytime SetReadPosition is called since it can jump around in the file.
Definition at line 910 of file SamFile.cpp.
References FLAG, SamRecord::get0BasedPosition(), SamRecord::getReadName(), SamRecord::getReferenceID(), myPrevCoord, myRecordCount, myStatus, QUERY_NAME, BamIndex::REF_ID_UNMAPPED, SamRecord::setReference(), SamRecord::setSequenceTranslation(), and UNSORTED.
Referenced by readIndexedRecord(), ReadRecord(), and WriteRecord().
00911 { 00912 if(myRefPtr != NULL) 00913 { 00914 record.setReference(myRefPtr); 00915 } 00916 record.setSequenceTranslation(myReadTranslation); 00917 00918 bool status = false; 00919 if(mySortedType == UNSORTED) 00920 { 00921 // Unsorted, so nothing to validate, just return true. 00922 status = true; 00923 } 00924 else 00925 { 00926 // Check to see if mySortedType is based on the header. 00927 if(mySortedType == FLAG) 00928 { 00929 // Determine the sorted type from what was read out of the header. 00930 mySortedType = getSortOrderFromHeader(header); 00931 } 00932 00933 if(mySortedType == QUERY_NAME) 00934 { 00935 // Validate that it is sorted by query name. 00936 // Get the query name from the record. 00937 const char* readName = record.getReadName(); 00938 if(myPrevReadName.compare(readName) > 0) 00939 { 00940 // The previous name is greater than the new record's name, so 00941 // return false. 00942 String errorMessage = "ERROR: File is not sorted at record "; 00943 errorMessage += myRecordCount; 00944 myStatus.setStatus(SamStatus::INVALID_SORT, 00945 errorMessage.c_str()); 00946 status = false; 00947 } 00948 else 00949 { 00950 myPrevReadName = readName; 00951 status = true; 00952 } 00953 } 00954 else 00955 { 00956 // Validate that it is sorted by COORDINATES. 00957 // Get the leftmost coordinate and the reference index. 00958 int32_t refID = record.getReferenceID(); 00959 int32_t coord = record.get0BasedPosition(); 00960 // The unmapped reference id is at the end of a sorted file. 00961 if(refID == BamIndex::REF_ID_UNMAPPED) 00962 { 00963 // A new reference ID that is for the unmapped reads 00964 // is always valid. 00965 status = true; 00966 myPrevRefID = refID; 00967 myPrevCoord = coord; 00968 } 00969 else if(myPrevRefID == BamIndex::REF_ID_UNMAPPED) 00970 { 00971 // Previous reference ID was for unmapped reads, but the 00972 // current one is not, so this is not sorted. 00973 String errorMessage = "ERROR: File is not sorted at record "; 00974 errorMessage += myRecordCount; 00975 myStatus.setStatus(SamStatus::INVALID_SORT, 00976 errorMessage.c_str()); 00977 status = false; 00978 } 00979 else if(refID < myPrevRefID) 00980 { 00981 // Current reference id is less than the previous one, 00982 //meaning that it is not sorted. 00983 String errorMessage = "ERROR: File is not sorted at record "; 00984 errorMessage += myRecordCount; 00985 myStatus.setStatus(SamStatus::INVALID_SORT, 00986 errorMessage.c_str()); 00987 status = false; 00988 } 00989 else 00990 { 00991 // The reference IDs are in the correct order. 00992 if(refID > myPrevRefID) 00993 { 00994 // New reference id, so set the previous coordinate to -1 00995 myPrevCoord = -1; 00996 } 00997 00998 // Check the coordinates. 00999 if(coord < myPrevCoord) 01000 { 01001 // New Coord is less than the previous position. 01002 String errorMessage = "ERROR: File is not sorted at record "; 01003 errorMessage += myRecordCount; 01004 myStatus.setStatus(SamStatus::INVALID_SORT, 01005 errorMessage.c_str()); 01006 status = false; 01007 } 01008 else 01009 { 01010 myPrevRefID = refID; 01011 myPrevCoord = coord; 01012 status = true; 01013 } 01014 } 01015 } 01016 } 01017 01018 return(status); 01019 }
| bool SamFile::WriteHeader | ( | SamFileHeader & | header | ) |
Writes the specified header into the file.
Definition at line 432 of file SamFile.cpp.
References myHasHeader, myIsOpenForWrite, and myStatus.
Referenced by OpenForWrite().
00433 { 00434 if(myIsOpenForWrite == false) 00435 { 00436 // File is not open for write 00437 // -OR- 00438 // The header has already been written. 00439 myStatus.setStatus(SamStatus::FAIL_ORDER, 00440 "Cannot write header since the file is not open for writing"); 00441 return(false); 00442 } 00443 00444 if(myHasHeader == true) 00445 { 00446 // The header has already been written. 00447 myStatus.setStatus(SamStatus::FAIL_ORDER, 00448 "Cannot write header since it has already been written"); 00449 return(false); 00450 } 00451 00452 myStatus = myInterfacePtr->writeHeader(myFilePtr, header); 00453 if(myStatus == SamStatus::SUCCESS) 00454 { 00455 // The header has now been successfully written. 00456 myHasHeader = true; 00457 return(true); 00458 } 00459 00460 // return the status. 00461 return(false); 00462 }
| bool SamFile::WriteRecord | ( | SamFileHeader & | header, | |
| SamRecord & | record | |||
| ) |
Writes the specified record into the file.
Definition at line 543 of file SamFile.cpp.
References myHasHeader, myIsOpenForWrite, myRecordCount, myStatus, SamRecord::setReference(), and validateSortOrder().
00545 { 00546 if(myIsOpenForWrite == false) 00547 { 00548 // File is not open for writing 00549 myStatus.setStatus(SamStatus::FAIL_ORDER, 00550 "Cannot write record since the file is not open for writing"); 00551 return(false); 00552 } 00553 00554 if(myHasHeader == false) 00555 { 00556 // The header has not yet been written. 00557 myStatus.setStatus(SamStatus::FAIL_ORDER, 00558 "Cannot write record since the header has not been written"); 00559 return(false); 00560 } 00561 00562 // Before trying to write the record, validate the sort order. 00563 if(!validateSortOrder(record, header)) 00564 { 00565 // Not sorted like it is supposed to be, do not write the record 00566 myStatus.setStatus(SamStatus::INVALID_SORT, 00567 "Cannot write the record since the file is not properly sorted."); 00568 return(false); 00569 } 00570 00571 if(myRefPtr != NULL) 00572 { 00573 record.setReference(myRefPtr); 00574 } 00575 00576 // File is open for writing and the header has been written, so write the 00577 // record. 00578 myStatus = myInterfacePtr->writeRecord(myFilePtr, header, record, 00579 myWriteTranslation); 00580 00581 if(myStatus == SamStatus::SUCCESS) 00582 { 00583 // A record was successfully written, so increment the record count. 00584 myRecordCount++; 00585 return(true); 00586 } 00587 return(false); 00588 }
bool SamFile::myHasHeader [protected] |
Flag to indicate if a header has been read/written - required before being able to read/write a record.
Definition at line 313 of file SamFile.h.
Referenced by ReadHeader(), ReadRecord(), resetFile(), WriteHeader(), and WriteRecord().
1.6.3