00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #ifndef __SAM_VALIDATION_H__ 00019 #define __SAM_VALIDATION_H__ 00020 00021 #include "SamFile.h" 00022 #include <list> 00023 00024 /// The SamValidationError class describes a validation error that occured, 00025 /// containing the error type, severity, and textual error message. 00026 class SamValidationError 00027 { 00028 public: 00029 /// Severity of the error. 00030 enum Severity 00031 { 00032 WARNING, ///< Warning is used if it is just an invalid value. 00033 ERROR ///< Error is used if parsing could not succeed. 00034 }; 00035 00036 /// Type of the error. 00037 /// TODO: NOT ALL INVALID TYPES HAVE BEEN ADDED SINCE NOT ALL VALIDATION 00038 /// IS COMPLETE YET 00039 enum Type 00040 { 00041 INVALID_QNAME, ///< Invalid read/query name 00042 INVALID_REF_ID, ///< Invalid reference id 00043 INVALID_RNAME, ///< Invalid reference name 00044 INVALID_POS, ///< Invalid position 00045 INVALID_MAPQ, ///< Invalid mapping quality 00046 INVALID_CIGAR, ///< Invalid CIGAR 00047 INVALID_MRNM, ///< Invalid mate/next fragment reference name 00048 INVALID_QUAL, ///< Invalid base quality 00049 INVALID_TAG ///< Invalid tag 00050 }; 00051 00052 /// Get the string representing the specified type of validation error. 00053 static const char* getTypeString(Type type); 00054 00055 /// Constructor that sets the type, severity, and message for the 00056 /// validation error. 00057 SamValidationError(Type type, Severity severity, std::string Message); 00058 00059 /// Return the type enum of this validation error object. 00060 Type getType() const; 00061 00062 /// Return the severity enum of this validation error object. 00063 Severity getSeverity() const; 00064 00065 /// Return the error message of this validation error object. 00066 const char* getMessage() const; 00067 00068 /// Return the string representing this object's type of validation error. 00069 const char* getTypeString() const; 00070 00071 /// Return the string representing this object's severity of validation 00072 /// error. 00073 const char* getSeverityString() const; 00074 00075 /// Get the error string representing this object's error. 00076 void getErrorString(std::string& errorString) const; 00077 00078 /// Print a formatted output of the error to cerr. 00079 void printError() const; 00080 00081 private: 00082 SamValidationError(); 00083 00084 static const char* enumTypeString[]; 00085 static const char* enumSeverityString[]; 00086 00087 Type myType; 00088 Severity mySeverity; 00089 std::string myMessage; 00090 00091 }; 00092 00093 00094 /// stream output for validation failure information 00095 inline std::ostream &operator << (std::ostream &stream, 00096 const SamValidationError &error) 00097 { 00098 std::string errorMessage; 00099 error.getErrorString(errorMessage); 00100 stream << errorMessage; 00101 return stream; 00102 } 00103 00104 00105 /// The SamValidationErrors class is a container class that holds 00106 /// SamValidationError Objects, allowing a validation method to return all 00107 /// of the invalid errors rather than just one. 00108 class SamValidationErrors 00109 { 00110 public: 00111 /// Constructor. 00112 SamValidationErrors(); 00113 /// Destructor 00114 ~SamValidationErrors(); 00115 00116 /// Remove all the errors from the container. 00117 void clear(); 00118 00119 /// Add the specified error to this container. 00120 void addError(SamValidationError::Type newType, 00121 SamValidationError::Severity newSeverity, 00122 const char* newMessage); 00123 00124 /// Return the number of validation errors contained in this object. 00125 unsigned int numErrors(); 00126 00127 /// Return a pointer to the next error without removing it from the 00128 /// container, and returning null once all errors have been retrieved 00129 /// until resetErrorIter is called. 00130 const SamValidationError* getNextError(); 00131 00132 /// Reset the iterator to the begining of the errors. 00133 void resetErrorIter(); 00134 00135 /// Append the error messages contained in this container to the passed 00136 /// in string. 00137 void getErrorString(std::string& errorString) const; 00138 00139 private: 00140 std::list<const SamValidationError*> myValidationErrors; 00141 std::list<const SamValidationError*>::const_iterator myErrorIter; 00142 }; 00143 00144 00145 /// stream output for all validation failures information 00146 inline std::ostream& operator << (std::ostream& stream, 00147 const SamValidationErrors& errors) 00148 { 00149 std::string errorString = ""; 00150 errors.getErrorString(errorString); 00151 stream << errorString; 00152 return stream; 00153 } 00154 00155 00156 /// The SamValidator class contains static methods for validating the SAM/BAM 00157 /// Record and each of its fields. The generic isValid method performs all of 00158 /// the other validations. The SamValidator methods return whether or not what 00159 /// is being validated is valid. True means it is valid, false means it is not. 00160 /// The specifics of the invalid value(s) are contained in the 00161 /// SamValidationErrors object that is passed in (by reference) to the method. 00162 /// The specific errors can be pulled out of that object. 00163 /// TODO: VALIDATION METHODS STILL NEED TO BE ADDED, and isValid does not yet 00164 /// validate all fields!!! 00165 class SamValidator 00166 { 00167 public: 00168 00169 /// Validates whether or not the specified SamRecord is valid, calling 00170 /// all of the other validations. 00171 /// TODO: more validation needs to be added. 00172 /// \param samHeader header associated with the record to be validated. 00173 /// \param samRecord record to be validated. 00174 /// \param validationErrors status to append any errors too. 00175 /// \return true if it is valid, false and appends to SamValidationErrors 00176 /// if it is not 00177 static bool isValid(SamFileHeader& samHeader, SamRecord& samRecord, 00178 SamValidationErrors& validationErrors); 00179 00180 /// Determines whether or not the specified qname is valid. 00181 /// Validation for QNAME is: 00182 /// a) length of the qname string is the same as the read name length 00183 /// b) length is between 1 and 254. 00184 /// c) [ \t\n\r] are not allowed in the name. 00185 /// \param qname the read/query name. 00186 /// \param qnameLen length of the read including the null (result of 00187 /// SamRecord::getReadNameLength(). 00188 /// \param validationErrors status to append any errors too. 00189 /// \return true if it is valid, false and appends to SamValidationErrors 00190 /// if it is not 00191 static bool isValidQname(const char* qname, uint8_t qnameLen, 00192 SamValidationErrors& validationErrors); 00193 00194 /// Determines whether or not the flag is valid. 00195 /// TODO: currently no validation is done on the flag. 00196 /// \param flag flag to be validated. 00197 /// \param validationErrors status to append any errors too. 00198 /// \return true if it is valid, false and appends to SamValidationErrors 00199 /// if it is not 00200 static bool isValidFlag(uint16_t flag, 00201 SamValidationErrors& validationErrors); 00202 00203 /// Validate the reference name including validating against the header. 00204 /// 1) Cross validate the rname and the header. 00205 /// 2) perform the validation in the method that doesn't take the header. 00206 /// \param samHeader header associated with the rname to be validated. 00207 /// \param rname reference name to be validated. 00208 /// \param validationErrors status to append any errors too. 00209 /// \return true if it is valid, false and appends to SamValidationErrors 00210 /// if it is not 00211 static bool isValidRname(SamFileHeader& samHeader, 00212 const char* rname, 00213 SamValidationErrors& validationErrors); 00214 /// Validate the rname without validating against the header. 00215 /// Validation for RNAME is: 00216 /// a) cannot be 0 length. 00217 /// b) [ \t\n\r@=] are not allowed in the name. 00218 /// \param rname reference name to be validated. 00219 /// \param validationErrors status to append any errors too. 00220 /// \return true if it is valid, false and appends to SamValidationErrors 00221 /// if it is not 00222 static bool isValidRname(const char* rname, 00223 SamValidationErrors& validationErrors); 00224 00225 /// Validate whether or not the specified reference id is valid. 00226 /// Validation for rID is: 00227 /// a) must be between -1 and the number of refInfo. 00228 /// -1 is allowed, and otherwise it must properly index into the array. 00229 /// \param refID reference id to be validated. 00230 /// \param refInfo sam reference information containing the mapping 00231 /// from reference id to reference name for this refID. 00232 /// \param validationErrors status to append any errors too. 00233 /// \return true if it is valid, false and appends to SamValidationErrors 00234 /// if it is not 00235 static bool isValidRefID(int32_t refID, const SamReferenceInfo& refInfo, 00236 SamValidationErrors& validationErrors); 00237 00238 /// Validate the refeference position. 00239 /// Validation for pos is: 00240 /// a) must be between 0 and (2^29)-1. 00241 /// \param pos position to be validated. 00242 /// \param validationErrors status to append any errors too. 00243 /// \return true if it is valid, false and appends to SamValidationErrors 00244 /// if it is not 00245 static bool isValid1BasedPos(int32_t pos, 00246 SamValidationErrors& validationErrors); 00247 00248 /// Validate the mapping quality. 00249 /// TODO: currently no validation is done on the mapping quality. 00250 /// \param mapQuality mapping quality to be validated. 00251 /// \param validationErrors status to append any errors too. 00252 /// \return true if it is valid, false and appends to SamValidationErrors 00253 /// if it is not 00254 static bool isValidMapQuality(uint8_t mapQuality, 00255 SamValidationErrors& validationErrors); 00256 00257 /// Validate the sequence, but not against the cigar or quality string. 00258 /// Validation against cigar is done in isValidCigar. 00259 /// Validation against the quality string is done in isValidQuality. 00260 /// TODO: currently no validation is done in this method. 00261 /// \param samRecord record whose sequence should be validated. 00262 /// \param validationErrors status to append any errors too. 00263 /// \return true if it is valid, false and appends to SamValidationErrors 00264 /// if it is not 00265 static bool isValidSequence(SamRecord& samRecord, 00266 SamValidationErrors& validationErrors); 00267 00268 /// Validate the cigar. Cigar validation depends on sequence. 00269 /// Validation for CIGAR is: 00270 /// a) cannot be 0 length. 00271 /// if not "*", validate the following: 00272 /// b) must have an integer length for each operator (if not "*"). TODO 00273 /// c) all operators must be valid (if not "*"). TODO 00274 /// d) evaluates to the same read length as the sequence string. 00275 /// \param samRecord record whose cigar should be validated. 00276 /// \param validationErrors status to append any errors too. 00277 /// \return true if it is valid, false and appends to SamValidationErrors 00278 /// if it is not 00279 static bool isValidCigar(SamRecord& samRecord, 00280 SamValidationErrors& validationErrors); 00281 00282 /// Validate the cigar. Cigar validation depends on sequence. 00283 /// Validation for CIGAR is: 00284 /// a) cannot be 0 length. 00285 /// if not "*", validate the following: 00286 /// b) must have an integer length for each operator (if not "*"). TODO 00287 /// c) all operators must be valid (if not "*"). TODO 00288 /// d) evaluates to the same read length as the sequence string. 00289 /// \param cigar cigar string to be validated. 00290 /// \param sequence sequence to check the cigar against. 00291 /// \param validationErrors status to append any errors too. 00292 /// \return true if it is valid, false and appends to SamValidationErrors 00293 /// if it is not 00294 static bool isValidCigar(const char* cigar, const char* sequence, 00295 SamValidationErrors& validationErrors); 00296 00297 /// Validate the cigar. Cigar validation depends on sequence. 00298 /// Validation for CIGAR is: 00299 /// a) cannot be 0 length. 00300 /// if not "*", validate the following: 00301 /// b) TODO: must have an integer length for each operator (if not "*"). 00302 /// c) TODO: all operators must be valid (if not "*"). 00303 /// d) evaluates to the same read length as the sequence string. 00304 /// \param cigar cigar string to be validated. 00305 /// \param seqLen sequence length to check the cigar against. 00306 /// \param validationErrors status to append any errors too. 00307 /// \return true if it is valid, false and appends to SamValidationErrors 00308 /// if it is not 00309 static bool isValidCigar(const char* cigar, 00310 int seqLen, 00311 SamValidationErrors& validationErrors); 00312 00313 /// TODO: validate the mate/next fragment's reference name. 00314 /// \return true if it is valid, false and appends to SamValidationErrors 00315 /// if it is not 00316 static bool isValidMrnm(); 00317 00318 /// TODO: validate the mate/next fragment's position. 00319 /// \return true if it is valid, false and appends to SamValidationErrors 00320 /// if it is not 00321 static bool isValidMpos(); 00322 00323 /// TODO: validate the insertion size/observed template length. 00324 /// \return true if it is valid, false and appends to SamValidationErrors 00325 /// if it is not 00326 static bool isValidIsize(); 00327 00328 /// TODO, validate the sequence. 00329 /// \return true if it is valid, false and appends to SamValidationErrors 00330 /// if it is not 00331 static bool isValidSeq(); 00332 00333 /// Validate the base quality. 00334 /// Quality validation depends on sequence. 00335 /// Validation for quality is: 00336 /// a) quality & sequence are the same length if both are specified. 00337 /// TODO: more validation. 00338 /// \param samRecord record whose quality should be validated. 00339 /// \param validationErrors status to append any errors too. 00340 /// \return true if it is valid, false and appends to SamValidationErrors 00341 /// if it is not 00342 static bool isValidQuality(SamRecord& samRecord, 00343 SamValidationErrors& validationErrors); 00344 00345 /// Validate the base quality. 00346 /// Quality validation depends on sequence. 00347 /// Validation for quality is: 00348 /// a) quality & sequence are the same length if both are specified. 00349 /// TODO: more validation. 00350 /// \param quality quality string to be validated. 00351 /// \param seqLen sequence length to check the quality against. 00352 /// \param validationErrors status to append any errors too. 00353 /// \return true if it is valid, false and appends to SamValidationErrors 00354 /// if it is not 00355 static bool isValidQuality(const char* quality, const char* sequence, 00356 SamValidationErrors& validationErrors); 00357 00358 /// Validate the base quality. 00359 /// Quality validation depends on sequence. 00360 /// Validation for quality is: 00361 /// a) quality & sequence are the same length if both are specified. 00362 /// TODO: more validation. 00363 /// \param quality quality string to be validated. 00364 /// \param seqLen sequence length to check the quality against. 00365 /// \param validationErrors status to append any errors too. 00366 /// \return true if it is valid, false and appends to SamValidationErrors 00367 /// if it is not 00368 bool static isValidQuality(const char* quality, 00369 int seqLength, 00370 SamValidationErrors& validationErrors); 00371 00372 /// Validate the tags. 00373 /// Validation for tags is: 00374 /// a) check that the "MD" tag is correct if it is present. 00375 /// TODO: more validation. 00376 /// \param samRecord record whose tags should be validated. 00377 /// \param validationErrors status to append any errors too. 00378 /// \return true if it is valid, false and appends to SamValidationErrors 00379 /// if it is not 00380 static bool isValidTags(SamRecord& samRecord, 00381 SamValidationErrors& validationErrors); 00382 00383 /// TODO validate the tag vtype 00384 /// \return true if it is valid, false and appends to SamValidationErrors 00385 /// if it is not 00386 static bool isValidVtype(); 00387 00388 /// TODO validate the tag vtype 00389 /// \return true if it is valid, false and appends to SamValidationErrors 00390 /// if it is not 00391 static bool isValidValue(); 00392 }; 00393 00394 00395 #endif