libStatGen Software
1
|
Class for reading/validating a fastq file. More...
#include <FastQFile.h>
Public Member Functions | |
FastQFile (int minReadLength=10, int numPrintableErrors=20) | |
Constructor. | |
void | disableMessages () |
Disable messages - do not write to cout. | |
void | enableMessages () |
Enable messages - write to cout. | |
void | disableSeqIDCheck () |
Disable Unique Sequence ID checking (Unique Sequence ID checking is enabled by default). | |
void | enableSeqIDCheck () |
Enable Unique Sequence ID checking. | |
void | setMaxErrors (int maxErrors) |
Set the number of errors after which to quit reading/validating a file, defaults to -1. | |
FastQStatus::Status | openFile (const char *fileName, BaseAsciiMap::SPACE_TYPE spaceType=BaseAsciiMap::UNKNOWN) |
Open a FastQFile. | |
FastQStatus::Status | closeFile () |
Close a FastQFile. | |
bool | isOpen () |
Check to see if the file is open. | |
bool | isEof () |
Check to see if the file is at the end of the file. | |
bool | keepReadingFile () |
Returns whether or not to keep reading the file, it stops reading (false) if eof or there is a problem reading the file. | |
FastQStatus::Status | validateFastQFile (const String &filename, bool printBaseComp, BaseAsciiMap::SPACE_TYPE spaceType, bool printQualAvg=false) |
Validate the specified fastq file. | |
FastQStatus::Status | readFastQSequence () |
Read 1 FastQSequence, validating it. | |
BaseAsciiMap::SPACE_TYPE | getSpaceType () |
Get the space type used for this file. | |
Public Attributes | |
Public Sequence Line variables. | |
Keep public variables for a sequence's line so they can be accessed without having to do string copies. | |
String | myRawSequence |
String | mySequenceIdLine |
String | mySequenceIdentifier |
String | myPlusLine |
String | myQualityString |
Class for reading/validating a fastq file.
Definition at line 29 of file FastQFile.h.
FastQFile::FastQFile | ( | int | minReadLength = 10 , |
int | numPrintableErrors = 20 |
||
) |
Constructor.
/param minReadLength The minimum length that a base sequence must be for it to be valid.
numPrintableErrors | The maximum number of errors that should be reported in detail before suppressing the errors. |
Definition at line 30 of file FastQFile.cpp.
: myFile(NULL), myBaseComposition(), myQualPerCycle(), myCountPerCycle(), myCheckSeqID(true), myMinReadLength(minReadLength), myNumPrintableErrors(numPrintableErrors), myMaxErrors(-1), myDisableMessages(false), myFileProblem(false) { // Reset the member data. reset(); }
void FastQFile::disableSeqIDCheck | ( | ) |
Disable Unique Sequence ID checking (Unique Sequence ID checking is enabled by default).
Definition at line 61 of file FastQFile.cpp.
{
myCheckSeqID = false;
}
void FastQFile::enableSeqIDCheck | ( | ) |
Enable Unique Sequence ID checking.
(Unique Sequence ID checking is enabled by default).
Definition at line 69 of file FastQFile.cpp.
{
myCheckSeqID = true;
}
bool FastQFile::keepReadingFile | ( | ) |
Returns whether or not to keep reading the file, it stops reading (false) if eof or there is a problem reading the file.
Definition at line 184 of file FastQFile.cpp.
References isEof().
Referenced by validateFastQFile().
{ if(isEof() || myFileProblem) { return(false); } return(true); }
FastQStatus::Status FastQFile::openFile | ( | const char * | fileName, |
BaseAsciiMap::SPACE_TYPE | spaceType = BaseAsciiMap::UNKNOWN |
||
) |
Open a FastQFile.
Use the specified SPACE_TYPE to determine BASE, COLOR, or UNKNOWN.
Definition at line 83 of file FastQFile.cpp.
References closeFile(), FastQStatus::FASTQ_OPEN_ERROR, FastQStatus::FASTQ_SUCCESS, ifopen(), BaseComposition::resetBaseMapType(), and BaseComposition::setBaseMapType().
Referenced by validateFastQFile().
{ // reset the member data. reset(); myBaseComposition.resetBaseMapType(); myBaseComposition.setBaseMapType(spaceType); myQualPerCycle.clear(); myCountPerCycle.clear(); FastQStatus::Status status = FastQStatus::FASTQ_SUCCESS; // Close the file if there is already one open - checked by close. status = closeFile(); if(status == FastQStatus::FASTQ_SUCCESS) { // Successfully closed a previously opened file if there was one. // Open the file myFile = ifopen(fileName, "rt"); myFileName = fileName; if(myFile == NULL) { // Failed to open the file. status = FastQStatus::FASTQ_OPEN_ERROR; } } if(status != FastQStatus::FASTQ_SUCCESS) { // Failed to open the file. std::string errorMessage = "ERROR: Failed to open file: "; errorMessage += fileName; logMessage(errorMessage.c_str()); } return(status); }
void FastQFile::setMaxErrors | ( | int | maxErrors | ) |
Set the number of errors after which to quit reading/validating a file, defaults to -1.
maxErrors | # of errors before quitting, -1 indicates to not quit until the entire file has been read/validated (default), 0 indicates to quit without reading/validating anything. |
Definition at line 76 of file FastQFile.cpp.
{ myMaxErrors = maxErrors; }
FastQStatus::Status FastQFile::validateFastQFile | ( | const String & | filename, |
bool | printBaseComp, | ||
BaseAsciiMap::SPACE_TYPE | spaceType, | ||
bool | printQualAvg = false |
||
) |
Validate the specified fastq file.
filename | fastq file to be validated. |
printBaseComp | whether or not to print the base composition for the file. true means print it, false means do not. |
spaceType | the spaceType to use for validation - BASE_SPACE, COLOR_SPACE, or UNKNOWN (UNKNOWN means to determine the spaceType to validate against from the first character of the first sequence). |
printQualAvg | whether or not to print the quality averages for the file. true means to print it, false (default) means do not. |
Definition at line 195 of file FastQFile.cpp.
References closeFile(), FastQStatus::FASTQ_INVALID, FastQStatus::FASTQ_NO_SEQUENCE_ERROR, FastQStatus::FASTQ_OPEN_ERROR, FastQStatus::FASTQ_SUCCESS, keepReadingFile(), openFile(), BaseComposition::print(), and readFastQSequence().
{ // Open the fastqfile. if(openFile(filename, spaceType) != FastQStatus::FASTQ_SUCCESS) { // Failed to open the specified file. return(FastQStatus::FASTQ_OPEN_ERROR); } // Track the total number of sequences that were validated. int numSequences = 0; // Keep reading the file until there are no more fastq sequences to process // and not configured to quit after a certain number of errors or there // has not yet been that many errors. // Or exit if there is a problem reading the file. FastQStatus::Status status = FastQStatus::FASTQ_SUCCESS; while (keepReadingFile() && ((myMaxErrors == -1) || (myMaxErrors > myNumErrors))) { // Validate one sequence. This call will read all the lines for // one sequence. status = readFastQSequence(); if((status == FastQStatus::FASTQ_SUCCESS) || (status == FastQStatus::FASTQ_INVALID)) { // Read a sequence and it is either valid or invalid, but // either way, a sequence was read, so increment the sequence count. ++numSequences; } else { // Other error, so break out of processing. break; } } // Report Base Composition Statistics. if(printBaseComp) { myBaseComposition.print(); } if(printQualAvg) { printAvgQual(); } std::string finishMessage = "Finished processing "; finishMessage += myFileName.c_str(); char buffer[100]; if(sprintf(buffer, " with %u lines containing %d sequences.", myLineNum, numSequences) > 0) { finishMessage += buffer; logMessage(finishMessage.c_str()); } if(sprintf(buffer, "There were a total of %d errors.", myNumErrors) > 0) { logMessage(buffer); } // Close the input file. FastQStatus::Status closeStatus = closeFile(); if((status != FastQStatus::FASTQ_SUCCESS) && (status != FastQStatus::FASTQ_INVALID)) { // Stopped validating due to some error other than invalid, so // return that error. return(status); } else if(myNumErrors == 0) { // No errors, check to see if there were any sequences. // Finished processing all of the sequences in the file. // If there are no sequences, report an error. if(numSequences == 0) { // Empty file, return error. logMessage("ERROR: No FastQSequences in the file."); return(FastQStatus::FASTQ_NO_SEQUENCE_ERROR); } return(FastQStatus::FASTQ_SUCCESS); } else { // The file is invalid. But check the close status. If the close // failed, it means there is a problem with the file itself not just // with validation, so the close failure should be returned. if(closeStatus != FastQStatus::FASTQ_SUCCESS) { return(closeStatus); } return(FastQStatus::FASTQ_INVALID); } }