|
libStatGen Software
1
|
Class for reading/validating a fastq file. More...
#include <FastQFile.h>

Public Member Functions | |
| FastQFile (int minReadLength=10, int numPrintableErrors=20) | |
| Constructor. | |
| void | disableMessages () |
| Disable messages - do not write to cout. | |
| void | enableMessages () |
| Enable messages - write to cout. | |
| void | disableSeqIDCheck () |
| Disable Unique Sequence ID checking (Unique Sequence ID checking is enabled by default). | |
| void | enableSeqIDCheck () |
| Enable Unique Sequence ID checking. | |
| void | setMaxErrors (int maxErrors) |
| Set the number of errors after which to quit reading/validating a file, defaults to -1. | |
| FastQStatus::Status | openFile (const char *fileName, BaseAsciiMap::SPACE_TYPE spaceType=BaseAsciiMap::UNKNOWN) |
| Open a FastQFile. | |
| FastQStatus::Status | closeFile () |
| Close a FastQFile. | |
| bool | isOpen () |
| Check to see if the file is open. | |
| bool | isEof () |
| Check to see if the file is at the end of the file. | |
| bool | keepReadingFile () |
| Returns whether or not to keep reading the file, it stops reading (false) if eof or there is a problem reading the file. | |
| FastQStatus::Status | validateFastQFile (const String &filename, bool printBaseComp, BaseAsciiMap::SPACE_TYPE spaceType, bool printQualAvg=false) |
| Validate the specified fastq file. | |
| FastQStatus::Status | readFastQSequence () |
| Read 1 FastQSequence, validating it. | |
| BaseAsciiMap::SPACE_TYPE | getSpaceType () |
| Get the space type used for this file. | |
Public Attributes | |
Public Sequence Line variables. | |
Keep public variables for a sequence's line so they can be accessed without having to do string copies. | |
| String | myRawSequence |
| String | mySequenceIdLine |
| String | mySequenceIdentifier |
| String | myPlusLine |
| String | myQualityString |
Class for reading/validating a fastq file.
Definition at line 29 of file FastQFile.h.
| FastQFile::FastQFile | ( | int | minReadLength = 10, |
| int | numPrintableErrors = 20 |
||
| ) |
Constructor.
/param minReadLength The minimum length that a base sequence must be for it to be valid.
| numPrintableErrors | The maximum number of errors that should be reported in detail before suppressing the errors. |
Definition at line 30 of file FastQFile.cpp.
: myFile(NULL),
myBaseComposition(),
myQualPerCycle(),
myCountPerCycle(),
myCheckSeqID(true),
myMinReadLength(minReadLength),
myNumPrintableErrors(numPrintableErrors),
myMaxErrors(-1),
myDisableMessages(false),
myFileProblem(false)
{
// Reset the member data.
reset();
}
| void FastQFile::disableSeqIDCheck | ( | ) |
Disable Unique Sequence ID checking (Unique Sequence ID checking is enabled by default).
Definition at line 61 of file FastQFile.cpp.
{
myCheckSeqID = false;
}
| void FastQFile::enableSeqIDCheck | ( | ) |
Enable Unique Sequence ID checking.
(Unique Sequence ID checking is enabled by default).
Definition at line 69 of file FastQFile.cpp.
{
myCheckSeqID = true;
}
| bool FastQFile::keepReadingFile | ( | ) |
Returns whether or not to keep reading the file, it stops reading (false) if eof or there is a problem reading the file.
Definition at line 184 of file FastQFile.cpp.
References isEof().
Referenced by validateFastQFile().
{
if(isEof() || myFileProblem)
{
return(false);
}
return(true);
}
| FastQStatus::Status FastQFile::openFile | ( | const char * | fileName, |
| BaseAsciiMap::SPACE_TYPE | spaceType = BaseAsciiMap::UNKNOWN |
||
| ) |
Open a FastQFile.
Use the specified SPACE_TYPE to determine BASE, COLOR, or UNKNOWN.
Definition at line 83 of file FastQFile.cpp.
References closeFile(), FastQStatus::FASTQ_OPEN_ERROR, FastQStatus::FASTQ_SUCCESS, ifopen(), BaseComposition::resetBaseMapType(), and BaseComposition::setBaseMapType().
Referenced by validateFastQFile().
{
// reset the member data.
reset();
myBaseComposition.resetBaseMapType();
myBaseComposition.setBaseMapType(spaceType);
myQualPerCycle.clear();
myCountPerCycle.clear();
FastQStatus::Status status = FastQStatus::FASTQ_SUCCESS;
// Close the file if there is already one open - checked by close.
status = closeFile();
if(status == FastQStatus::FASTQ_SUCCESS)
{
// Successfully closed a previously opened file if there was one.
// Open the file
myFile = ifopen(fileName, "rt");
myFileName = fileName;
if(myFile == NULL)
{
// Failed to open the file.
status = FastQStatus::FASTQ_OPEN_ERROR;
}
}
if(status != FastQStatus::FASTQ_SUCCESS)
{
// Failed to open the file.
std::string errorMessage = "ERROR: Failed to open file: ";
errorMessage += fileName;
logMessage(errorMessage.c_str());
}
return(status);
}
| void FastQFile::setMaxErrors | ( | int | maxErrors | ) |
Set the number of errors after which to quit reading/validating a file, defaults to -1.
| maxErrors | # of errors before quitting, -1 indicates to not quit until the entire file has been read/validated (default), 0 indicates to quit without reading/validating anything. |
Definition at line 76 of file FastQFile.cpp.
{
myMaxErrors = maxErrors;
}
| FastQStatus::Status FastQFile::validateFastQFile | ( | const String & | filename, |
| bool | printBaseComp, | ||
| BaseAsciiMap::SPACE_TYPE | spaceType, | ||
| bool | printQualAvg = false |
||
| ) |
Validate the specified fastq file.
| filename | fastq file to be validated. |
| printBaseComp | whether or not to print the base composition for the file. true means print it, false means do not. |
| spaceType | the spaceType to use for validation - BASE_SPACE, COLOR_SPACE, or UNKNOWN (UNKNOWN means to determine the spaceType to validate against from the first character of the first sequence). |
| printQualAvg | whether or not to print the quality averages for the file. true means to print it, false (default) means do not. |
Definition at line 195 of file FastQFile.cpp.
References closeFile(), FastQStatus::FASTQ_INVALID, FastQStatus::FASTQ_NO_SEQUENCE_ERROR, FastQStatus::FASTQ_OPEN_ERROR, FastQStatus::FASTQ_SUCCESS, keepReadingFile(), openFile(), BaseComposition::print(), and readFastQSequence().
{
// Open the fastqfile.
if(openFile(filename, spaceType) != FastQStatus::FASTQ_SUCCESS)
{
// Failed to open the specified file.
return(FastQStatus::FASTQ_OPEN_ERROR);
}
// Track the total number of sequences that were validated.
int numSequences = 0;
// Keep reading the file until there are no more fastq sequences to process
// and not configured to quit after a certain number of errors or there
// has not yet been that many errors.
// Or exit if there is a problem reading the file.
FastQStatus::Status status = FastQStatus::FASTQ_SUCCESS;
while (keepReadingFile() &&
((myMaxErrors == -1) || (myMaxErrors > myNumErrors)))
{
// Validate one sequence. This call will read all the lines for
// one sequence.
status = readFastQSequence();
if((status == FastQStatus::FASTQ_SUCCESS) || (status == FastQStatus::FASTQ_INVALID))
{
// Read a sequence and it is either valid or invalid, but
// either way, a sequence was read, so increment the sequence count.
++numSequences;
}
else
{
// Other error, so break out of processing.
break;
}
}
// Report Base Composition Statistics.
if(printBaseComp)
{
myBaseComposition.print();
}
if(printQualAvg)
{
printAvgQual();
}
std::string finishMessage = "Finished processing ";
finishMessage += myFileName.c_str();
char buffer[100];
if(sprintf(buffer,
" with %u lines containing %d sequences.",
myLineNum, numSequences) > 0)
{
finishMessage += buffer;
logMessage(finishMessage.c_str());
}
if(sprintf(buffer,
"There were a total of %d errors.",
myNumErrors) > 0)
{
logMessage(buffer);
}
// Close the input file.
FastQStatus::Status closeStatus = closeFile();
if((status != FastQStatus::FASTQ_SUCCESS) && (status != FastQStatus::FASTQ_INVALID))
{
// Stopped validating due to some error other than invalid, so
// return that error.
return(status);
}
else if(myNumErrors == 0)
{
// No errors, check to see if there were any sequences.
// Finished processing all of the sequences in the file.
// If there are no sequences, report an error.
if(numSequences == 0)
{
// Empty file, return error.
logMessage("ERROR: No FastQSequences in the file.");
return(FastQStatus::FASTQ_NO_SEQUENCE_ERROR);
}
return(FastQStatus::FASTQ_SUCCESS);
}
else
{
// The file is invalid. But check the close status. If the close
// failed, it means there is a problem with the file itself not just
// with validation, so the close failure should be returned.
if(closeStatus != FastQStatus::FASTQ_SUCCESS)
{
return(closeStatus);
}
return(FastQStatus::FASTQ_INVALID);
}
}