libStatGen Software
1
|
The SamValidator class contains static methods for validating the SAM/BAM Record and each of its fields. More...
#include <SamValidation.h>
Static Public Member Functions | |
static bool | isValid (SamFileHeader &samHeader, SamRecord &samRecord, SamValidationErrors &validationErrors) |
Validates whether or not the specified SamRecord is valid, calling all of the other validations. | |
static bool | isValidQname (const char *qname, uint8_t qnameLen, SamValidationErrors &validationErrors) |
Determines whether or not the specified qname is valid. | |
static bool | isValidFlag (uint16_t flag, SamValidationErrors &validationErrors) |
Determines whether or not the flag is valid. | |
static bool | isValidRname (SamFileHeader &samHeader, const char *rname, SamValidationErrors &validationErrors) |
Validate the reference name including validating against the header. | |
static bool | isValidRname (const char *rname, SamValidationErrors &validationErrors) |
Validate the rname without validating against the header. | |
static bool | isValidRefID (int32_t refID, const SamReferenceInfo &refInfo, SamValidationErrors &validationErrors) |
Validate whether or not the specified reference id is valid. | |
static bool | isValid1BasedPos (int32_t pos, SamValidationErrors &validationErrors) |
Validate the refeference position. | |
static bool | isValidMapQuality (uint8_t mapQuality, SamValidationErrors &validationErrors) |
Validate the mapping quality. | |
static bool | isValidSequence (SamRecord &samRecord, SamValidationErrors &validationErrors) |
Validate the sequence, but not against the cigar or quality string. | |
static bool | isValidCigar (SamRecord &samRecord, SamValidationErrors &validationErrors) |
Validate the cigar. | |
static bool | isValidCigar (const char *cigar, const char *sequence, SamValidationErrors &validationErrors) |
Validate the cigar. | |
static bool | isValidCigar (const char *cigar, int seqLen, SamValidationErrors &validationErrors) |
Validate the cigar. | |
static bool | isValidMrnm () |
TODO: validate the mate/next fragment's reference name. | |
static bool | isValidMpos () |
TODO: validate the mate/next fragment's position. | |
static bool | isValidIsize () |
TODO: validate the insertion size/observed template length. | |
static bool | isValidSeq () |
TODO, validate the sequence. | |
static bool | isValidQuality (SamRecord &samRecord, SamValidationErrors &validationErrors) |
Validate the base quality. | |
static bool | isValidQuality (const char *quality, const char *sequence, SamValidationErrors &validationErrors) |
Validate the base quality. | |
static bool | isValidQuality (const char *quality, int seqLength, SamValidationErrors &validationErrors) |
Validate the base quality. | |
static bool | isValidTags (SamRecord &samRecord, SamValidationErrors &validationErrors) |
Validate the tags. | |
static bool | isValidVtype () |
TODO validate the tag vtype. | |
static bool | isValidValue () |
TODO validate the tag vtype. |
The SamValidator class contains static methods for validating the SAM/BAM Record and each of its fields.
The generic isValid method performs all of the other validations. The SamValidator methods return whether or not what is being validated is valid. True means it is valid, false means it is not. The specifics of the invalid value(s) are contained in the SamValidationErrors object that is passed in (by reference) to the method. The specific errors can be pulled out of that object. TODO: VALIDATION METHODS STILL NEED TO BE ADDED, and isValid does not yet validate all fields!!!
Definition at line 173 of file SamValidation.h.
bool SamValidator::isValid | ( | SamFileHeader & | samHeader, |
SamRecord & | samRecord, | ||
SamValidationErrors & | validationErrors | ||
) | [static] |
Validates whether or not the specified SamRecord is valid, calling all of the other validations.
TODO: more validation needs to be added.
samHeader | header associated with the record to be validated. |
samRecord | record to be validated. |
validationErrors | status to append any errors too. |
Definition at line 195 of file SamValidation.cpp.
References SamRecord::get1BasedPosition(), SamRecord::getFlag(), SamRecord::getMapQuality(), SamRecord::getReadName(), SamRecord::getReadNameLength(), SamRecord::getReferenceID(), SamFileHeader::getReferenceInfo(), SamRecord::getReferenceName(), isValid1BasedPos(), isValidCigar(), isValidFlag(), isValidMapQuality(), isValidQname(), isValidQuality(), isValidRefID(), isValidRname(), isValidSequence(), and isValidTags().
Referenced by SamRecord::isValid().
{ bool status = true; status &= isValidQname(samRecord.getReadName(), samRecord.getReadNameLength(), validationErrors); status &= isValidFlag(samRecord.getFlag(), validationErrors); // Validate the RName including validating it against the header. status &= isValidRname(samHeader, samRecord.getReferenceName(), validationErrors); status &= isValidRefID(samRecord.getReferenceID(), samHeader.getReferenceInfo(), validationErrors); status &= isValid1BasedPos(samRecord.get1BasedPosition(), validationErrors); status &= isValidMapQuality(samRecord.getMapQuality(), validationErrors); status &= isValidSequence(samRecord, validationErrors); status &= isValidCigar(samRecord, validationErrors); status &= isValidQuality(samRecord, validationErrors); status &= isValidTags(samRecord, validationErrors); return(status); }
bool SamValidator::isValid1BasedPos | ( | int32_t | pos, |
SamValidationErrors & | validationErrors | ||
) | [static] |
Validate the refeference position.
Validation for pos is: a) must be between 0 and (2^29)-1.
pos | position to be validated. |
validationErrors | status to append any errors too. |
Definition at line 504 of file SamValidation.cpp.
References SamValidationErrors::addError(), SamValidationError::INVALID_POS, and SamValidationError::WARNING.
Referenced by isValid().
{ // Validation for pos is: // a) must be between 0 and (2^29)-1. bool status = true; if((pos < 0) || (pos > 536870911)) { String message = "POS out of range ("; message += pos; message += ") must be between 0 and (2^29)-1."; validationErrors.addError(SamValidationError::INVALID_POS, SamValidationError::WARNING, message.c_str()); status = false; } return(status); }
bool SamValidator::isValidCigar | ( | SamRecord & | samRecord, |
SamValidationErrors & | validationErrors | ||
) | [static] |
Validate the cigar.
Cigar validation depends on sequence. Validation for CIGAR is: a) cannot be 0 length. if not "*", validate the following: b) must have an integer length for each operator (if not "*"). TODO c) all operators must be valid (if not "*"). TODO d) evaluates to the same read length as the sequence string.
samRecord | record whose cigar should be validated. |
validationErrors | status to append any errors too. |
Definition at line 543 of file SamValidation.cpp.
References SamRecord::getCigar(), and SamRecord::getReadLength().
Referenced by isValid(), and isValidCigar().
{ return(isValidCigar(samRecord.getCigar(), samRecord.getReadLength(), validationErrors)); }
bool SamValidator::isValidCigar | ( | const char * | cigar, |
const char * | sequence, | ||
SamValidationErrors & | validationErrors | ||
) | [static] |
Validate the cigar.
Cigar validation depends on sequence. Validation for CIGAR is: a) cannot be 0 length. if not "*", validate the following: b) must have an integer length for each operator (if not "*"). TODO c) all operators must be valid (if not "*"). TODO d) evaluates to the same read length as the sequence string.
cigar | cigar string to be validated. |
sequence | sequence to check the cigar against. |
validationErrors | status to append any errors too. |
Definition at line 551 of file SamValidation.cpp.
References isValidCigar().
{ return(isValidCigar(cigar, strlen(sequence), validationErrors)); }
bool SamValidator::isValidCigar | ( | const char * | cigar, |
int | seqLen, | ||
SamValidationErrors & | validationErrors | ||
) | [static] |
Validate the cigar.
Cigar validation depends on sequence. Validation for CIGAR is: a) cannot be 0 length. if not "*", validate the following: b) TODO: must have an integer length for each operator (if not "*"). c) TODO: all operators must be valid (if not "*"). d) evaluates to the same read length as the sequence string.
cigar | cigar string to be validated. |
seqLen | sequence length to check the cigar against. |
validationErrors | status to append any errors too. |
Definition at line 558 of file SamValidation.cpp.
References SamValidationErrors::addError(), Cigar::getExpectedQueryBaseCount(), SamValidationError::INVALID_CIGAR, and SamValidationError::WARNING.
{ // Validation for CIGAR is: // a) cannot be 0 length. // if not "*", validate the following: // b) must have an integer length for each operator (if not "*"). TODO // c) all operators must be valid (if not "*"). TODO // d) evaluates to the same read length as the sequence string. bool status = true; String message; int32_t cigarLen = strlen(cigar); // a) cannot be 0 length. if(cigarLen == 0) { validationErrors.addError(SamValidationError::INVALID_CIGAR, SamValidationError::WARNING, "Cigar must not be blank."); status = false; } if(strcmp(cigar, "*") != 0) { // The cigar is not "*", so validate it. CigarRoller cigarRoller(cigar); // b) must have an integer length for each operator. // TODO // c) all operators must be valid. // TODO // d) is the same length as the sequence string. int cigarSeqLen = cigarRoller.getExpectedQueryBaseCount(); if(cigarSeqLen != seqLen) { message = "CIGAR does not evaluate to the same length as SEQ, ("; message += cigarSeqLen; message += " != "; message += seqLen; message += ")."; validationErrors.addError(SamValidationError::INVALID_CIGAR, SamValidationError::WARNING, message.c_str()); status = false; } } return(status); }
bool SamValidator::isValidFlag | ( | uint16_t | flag, |
SamValidationErrors & | validationErrors | ||
) | [static] |
Determines whether or not the flag is valid.
TODO: currently no validation is done on the flag.
flag | flag to be validated. |
validationErrors | status to append any errors too. |
Definition at line 340 of file SamValidation.cpp.
Referenced by isValid().
{ // All values in a uint16_t are valid, so return true. return(true); }
static bool SamValidator::isValidIsize | ( | ) | [static] |
TODO: validate the insertion size/observed template length.
bool SamValidator::isValidMapQuality | ( | uint8_t | mapQuality, |
SamValidationErrors & | validationErrors | ||
) | [static] |
Validate the mapping quality.
TODO: currently no validation is done on the mapping quality.
mapQuality | mapping quality to be validated. |
validationErrors | status to append any errors too. |
Definition at line 528 of file SamValidation.cpp.
Referenced by isValid().
{ // All values in a uint8_t are valid, so return true. return(true); }
static bool SamValidator::isValidMpos | ( | ) | [static] |
TODO: validate the mate/next fragment's position.
static bool SamValidator::isValidMrnm | ( | ) | [static] |
TODO: validate the mate/next fragment's reference name.
bool SamValidator::isValidQname | ( | const char * | qname, |
uint8_t | qnameLen, | ||
SamValidationErrors & | validationErrors | ||
) | [static] |
Determines whether or not the specified qname is valid.
Validation for QNAME is: a) length of the qname string is the same as the read name length b) length is between 1 and 254. c) [
] are not allowed in the name.
qname | the read/query name. |
qnameLen | length of the read including the null (result of SamRecord::getReadNameLength(). |
validationErrors | status to append any errors too. |
Definition at line 238 of file SamValidation.cpp.
References SamValidationErrors::addError(), SamValidationError::ERROR, SamValidationError::INVALID_QNAME, and SamValidationError::WARNING.
Referenced by isValid().
{ // Validation for QNAME is: // a) length of the qname string is the same as the read name length // b) length is between 1 and 254. // c) [ \t\n\r] are not allowed in the name. bool status = true; // Get the length of the qname string. int32_t qnameLenNull = strlen(qname) + 1; //////////////////////////////////// // a) length of the qname string is the same as the read name length if(qnameLenNull != readNameLen) { // This results from a poorly formatted bam file, where the null // terminated read_name field is not the same length as specified by // read_name_len. String message = "Invalid Query Name - the string length ("; message += qnameLenNull; message += ") does not match the specified query name length ("; message += readNameLen; message += ")."; validationErrors.addError(SamValidationError::INVALID_QNAME, SamValidationError::ERROR, message.c_str()); status = false; } //////////////////////////////////// // b) length is between 1 and 254 // The length with the terminating null must be between 2 & 255, if((qnameLenNull < 2) || (qnameLenNull > 255)) { String message = "Invalid Query Name (QNAME) length: "; message += qnameLenNull; message += ". Length with the terminating null must be between 2 & 255."; validationErrors.addError(SamValidationError::INVALID_QNAME, SamValidationError::WARNING, message.c_str()); status = false; } //////////////////////////////////// // Loop through and validate they all characters are valid. // c) [ \t\n\r] are not allowed in the name. String message; for(int i = 0; i < qnameLenNull; ++i) { switch(qname[i]) { case ' ': // Invalid character. message = "Invalid character in the Query Name (QNAME): ' ' at position "; message += i; message += "."; validationErrors.addError(SamValidationError::INVALID_QNAME, SamValidationError::WARNING, message.c_str()); status = false; break; case '\t': // Invalid character. message = "Invalid character in the Query Name (QNAME): '\t' at position "; message += i; message += "."; validationErrors.addError(SamValidationError::INVALID_QNAME, SamValidationError::WARNING, message.c_str()); status = false; break; case '\n': // Invalid character. message = "Invalid character in the Query Name (QNAME): '\n' at position "; message += i; message += "."; validationErrors.addError(SamValidationError::INVALID_QNAME, SamValidationError::WARNING, message.c_str()); status = false; break; case '\r': // Invalid character. message = "Invalid character in the Query Name (QNAME): '\r' at position "; message += i; message += "."; validationErrors.addError(SamValidationError::INVALID_QNAME, SamValidationError::WARNING, message.c_str()); status = false; break; } } return(status); }
bool SamValidator::isValidQuality | ( | SamRecord & | samRecord, |
SamValidationErrors & | validationErrors | ||
) | [static] |
Validate the base quality.
Quality validation depends on sequence. Validation for quality is: a) quality & sequence are the same length if both are specified. TODO: more validation.
samRecord | record whose quality should be validated. |
validationErrors | status to append any errors too. |
Definition at line 611 of file SamValidation.cpp.
References SamRecord::getQuality(), and SamRecord::getReadLength().
Referenced by isValid(), and isValidQuality().
{ return(isValidQuality(samRecord.getQuality(), samRecord.getReadLength(), validationErrors)); }
bool SamValidator::isValidQuality | ( | const char * | quality, |
const char * | sequence, | ||
SamValidationErrors & | validationErrors | ||
) | [static] |
Validate the base quality.
Quality validation depends on sequence. Validation for quality is: a) quality & sequence are the same length if both are specified. TODO: more validation.
quality | quality string to be validated. |
seqLen | sequence length to check the quality against. |
validationErrors | status to append any errors too. |
Definition at line 620 of file SamValidation.cpp.
References isValidQuality().
{ // Determine the length of the sequence. int seqLen = strlen(sequence); // Check if the sequence is '*' since then the seqLength is 0. if(strcmp(sequence, "*") == 0) { seqLen = 0; } return(isValidQuality(quality, seqLen, validationErrors)); }
bool SamValidator::isValidQuality | ( | const char * | quality, |
int | seqLength, | ||
SamValidationErrors & | validationErrors | ||
) | [static] |
Validate the base quality.
Quality validation depends on sequence. Validation for quality is: a) quality & sequence are the same length if both are specified. TODO: more validation.
quality | quality string to be validated. |
seqLen | sequence length to check the quality against. |
validationErrors | status to append any errors too. |
Definition at line 636 of file SamValidation.cpp.
References SamValidationErrors::addError(), SamValidationError::INVALID_QUAL, and SamValidationError::WARNING.
{ bool status = true; // If the quality or the sequence are non-"*", validate that the quality // and sequence have the same length. if((seqLength != 0) && (strcmp(quality, "*") != 0)) { int qualLen = strlen(quality); // Both the sequence and the quality are not "*", so validate // that they are the same length. if(seqLength != qualLen) { // Both fields are specified but are different lengths. String message = "QUAL is not the same length as SEQ, ("; message += qualLen; message += " != "; message += seqLength; message += ")."; validationErrors.addError(SamValidationError::INVALID_QUAL, SamValidationError::WARNING, message.c_str()); status = false; } } return(status); }
bool SamValidator::isValidRefID | ( | int32_t | refID, |
const SamReferenceInfo & | refInfo, | ||
SamValidationErrors & | validationErrors | ||
) | [static] |
Validate whether or not the specified reference id is valid.
Validation for rID is: a) must be between -1 and the number of refInfo. -1 is allowed, and otherwise it must properly index into the array.
refID | reference id to be validated. |
refInfo | sam reference information containing the mapping from reference id to reference name for this refID. |
validationErrors | status to append any errors too. |
Definition at line 476 of file SamValidation.cpp.
References SamValidationErrors::addError(), SamReferenceInfo::getNumEntries(), SamValidationError::INVALID_REF_ID, and SamValidationError::WARNING.
Referenced by isValid().
{ // Validation for rID is: // a) must be between -1 and the number of refInfo. // -1 is allowed, and otherwise it must properly index into the array. bool status = true; if((refID < -1) || (refID >= refInfo.getNumEntries())) { // Reference ID is too large or too small. String message = "Invalid Reference ID, out of range ("; message += refID; message += ") must be between -1 and "; message += refInfo.getNumEntries() - 1; message += "."; validationErrors.addError(SamValidationError::INVALID_REF_ID, SamValidationError::WARNING, message.c_str()); status = false; } return(status); }
bool SamValidator::isValidRname | ( | SamFileHeader & | samHeader, |
const char * | rname, | ||
SamValidationErrors & | validationErrors | ||
) | [static] |
Validate the reference name including validating against the header.
1) Cross validate the rname and the header. 2) perform the validation in the method that doesn't take the header.
samHeader | header associated with the rname to be validated. |
rname | reference name to be validated. |
validationErrors | status to append any errors too. |
Definition at line 348 of file SamValidation.cpp.
References SamValidationErrors::addError(), SamFileHeader::getNumSQs(), SamFileHeader::getSQ(), SamValidationError::INVALID_RNAME, and SamValidationError::WARNING.
Referenced by isValid().
{ bool status = true; // Cross validate the rname and the header. // If the rname is not '*' // AND there are any SQ records in the header, // Then the rname must be in one of them. if((strcmp(rname, "*") != 0) && (samHeader.getNumSQs() != 0) && (samHeader.getSQ(rname) == NULL)) { // There are SQ fields, but the ref name is not in it. status = false; std::string message = "RNAME, "; message += rname; message += ", was not found in a SAM Header SQ record"; validationErrors.addError(SamValidationError::INVALID_RNAME, SamValidationError::WARNING, message.c_str()); } status &= isValidRname(rname, validationErrors); return(status); }
bool SamValidator::isValidRname | ( | const char * | rname, |
SamValidationErrors & | validationErrors | ||
) | [static] |
Validate the rname without validating against the header.
Validation for RNAME is: a) cannot be 0 length. b) [
@=] are not allowed in the name.
rname | reference name to be validated. |
validationErrors | status to append any errors too. |
Definition at line 376 of file SamValidation.cpp.
References SamValidationErrors::addError(), SamValidationError::INVALID_RNAME, and SamValidationError::WARNING.
{ // Validation for RNAME is: // a) cannot be 0 length. // b) [ \t\n\r@=] are not allowed in the name. bool status = true; // Get the length of the rname string. int32_t rnameLen = strlen(rname); String message; if(rnameLen == 0) { validationErrors.addError(SamValidationError::INVALID_RNAME, SamValidationError::WARNING, "Reference Sequence Name (RNAME) cannot have 0 length."); status = false; } //////////////////////////////////// //////////////////////////////////// // Loop through and validate they all characters are valid. // b) [ \t\n\r] are not allowed in the name. for(int i = 0; i < rnameLen; ++i) { switch(rname[i]) { case ' ': // Invalid character. message = "Invalid character in the Reference Sequence Name (RNAME): ' ' at position "; message += i; message += "."; validationErrors.addError(SamValidationError::INVALID_RNAME, SamValidationError::WARNING, message.c_str()); status = false; break; case '\t': // Invalid character. message = "Invalid character in the Reference Sequence Name (RNAME): '\t' at position "; message += i; message += "."; validationErrors.addError(SamValidationError::INVALID_RNAME, SamValidationError::WARNING, message.c_str()); status = false; break; case '\n': // Invalid character. message = "Invalid character in the Reference Sequence Name (RNAME): '\n' at position "; message += i; message += "."; validationErrors.addError(SamValidationError::INVALID_RNAME, SamValidationError::WARNING, message.c_str()); status = false; break; case '\r': // Invalid character. message = "Invalid character in the Reference Sequence Name (RNAME): '\r' at position "; message += i; message += "."; validationErrors.addError(SamValidationError::INVALID_RNAME, SamValidationError::WARNING, message.c_str()); status = false; break; case '@': // Invalid character. message = "Invalid character in the Reference Sequence Name (RNAME): '@' at position "; message += i; message += "."; validationErrors.addError(SamValidationError::INVALID_RNAME, SamValidationError::WARNING, message.c_str()); status = false; break; case '=': // Invalid character. message = "Invalid character in the Reference Sequence Name (RNAME): '=' at position "; message += i; message += "."; validationErrors.addError(SamValidationError::INVALID_RNAME, SamValidationError::WARNING, message.c_str()); status = false; break; default: // Allowed character. break; } } return(status); }
static bool SamValidator::isValidSeq | ( | ) | [static] |
TODO, validate the sequence.
bool SamValidator::isValidSequence | ( | SamRecord & | samRecord, |
SamValidationErrors & | validationErrors | ||
) | [static] |
Validate the sequence, but not against the cigar or quality string.
Validation against cigar is done in isValidCigar. Validation against the quality string is done in isValidQuality. TODO: currently no validation is done in this method.
samRecord | record whose sequence should be validated. |
validationErrors | status to append any errors too. |
Definition at line 536 of file SamValidation.cpp.
Referenced by isValid().
{ return(true); }
bool SamValidator::isValidTags | ( | SamRecord & | samRecord, |
SamValidationErrors & | validationErrors | ||
) | [static] |
Validate the tags.
Validation for tags is: a) check that the "MD" tag is correct if it is present. TODO: more validation.
samRecord | record whose tags should be validated. |
validationErrors | status to append any errors too. |
Definition at line 669 of file SamValidation.cpp.
References SamValidationErrors::addError(), SamTags::createMDTag(), SamRecord::getReference(), SamRecord::getStringTag(), SamValidationError::INVALID_TAG, SamTags::isMDTagCorrect(), and SamValidationError::WARNING.
Referenced by isValid().
{ bool status = true; GenomeSequence* reference = samRecord.getReference(); // If the reference is not null, check the MD tag. if(reference != NULL) { const String* recordMD = samRecord.getStringTag(SamTags::MD_TAG); if(recordMD != NULL) { // The record has an MD tag so check to see if it is correct. if(!SamTags::isMDTagCorrect(samRecord, *reference)) { // Invalid MD tags. String correctMD; if(!SamTags::createMDTag(correctMD, samRecord, *reference)) { // Failed to get the MD tag, so indicate that it is unknown. correctMD = "UNKNOWN"; } String message = "Incorrect MD Tag, "; message += *recordMD; message += ", should be "; message += correctMD; message += "."; validationErrors.addError(SamValidationError::INVALID_TAG, SamValidationError::WARNING, message.c_str()); status = false; } } } return(status); }
static bool SamValidator::isValidValue | ( | ) | [static] |
TODO validate the tag vtype.
static bool SamValidator::isValidVtype | ( | ) | [static] |
TODO validate the tag vtype.