libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #ifndef __SAM_VALIDATION_H__ 00019 #define __SAM_VALIDATION_H__ 00020 00021 #include "SamFile.h" 00022 #include <list> 00023 00024 // On windows, ERROR and WARNING are pre-defined macros, so undefine them. 00025 #ifdef WARNING 00026 #undef WARNING 00027 #endif 00028 #ifdef ERROR 00029 #undef ERROR 00030 #endif 00031 00032 /// The SamValidationError class describes a validation error that occured, 00033 /// containing the error type, severity, and textual error message. 00034 class SamValidationError 00035 { 00036 public: 00037 /// Severity of the error. 00038 enum Severity 00039 { 00040 WARNING, ///< Warning is used if it is just an invalid value. 00041 ERROR ///< Error is used if parsing could not succeed. 00042 }; 00043 00044 /// Type of the error. 00045 /// TODO: NOT ALL INVALID TYPES HAVE BEEN ADDED SINCE NOT ALL VALIDATION 00046 /// IS COMPLETE YET 00047 enum Type 00048 { 00049 INVALID_QNAME, ///< Invalid read/query name 00050 INVALID_REF_ID, ///< Invalid reference id 00051 INVALID_RNAME, ///< Invalid reference name 00052 INVALID_POS, ///< Invalid position 00053 INVALID_MAPQ, ///< Invalid mapping quality 00054 INVALID_CIGAR, ///< Invalid CIGAR 00055 INVALID_MRNM, ///< Invalid mate/next fragment reference name 00056 INVALID_QUAL, ///< Invalid base quality 00057 INVALID_TAG ///< Invalid tag 00058 }; 00059 00060 /// Get the string representing the specified type of validation error. 00061 static const char* getTypeString(Type type); 00062 00063 /// Constructor that sets the type, severity, and message for the 00064 /// validation error. 00065 SamValidationError(Type type, Severity severity, std::string Message); 00066 00067 /// Return the type enum of this validation error object. 00068 Type getType() const; 00069 00070 /// Return the severity enum of this validation error object. 00071 Severity getSeverity() const; 00072 00073 /// Return the error message of this validation error object. 00074 const char* getMessage() const; 00075 00076 /// Return the string representing this object's type of validation error. 00077 const char* getTypeString() const; 00078 00079 /// Return the string representing this object's severity of validation 00080 /// error. 00081 const char* getSeverityString() const; 00082 00083 /// Get the error string representing this object's error. 00084 void getErrorString(std::string& errorString) const; 00085 00086 /// Print a formatted output of the error to cerr. 00087 void printError() const; 00088 00089 private: 00090 SamValidationError(); 00091 00092 static const char* enumTypeString[]; 00093 static const char* enumSeverityString[]; 00094 00095 Type myType; 00096 Severity mySeverity; 00097 std::string myMessage; 00098 00099 }; 00100 00101 00102 /// stream output for validation failure information 00103 inline std::ostream &operator << (std::ostream &stream, 00104 const SamValidationError &error) 00105 { 00106 std::string errorMessage; 00107 error.getErrorString(errorMessage); 00108 stream << errorMessage; 00109 return stream; 00110 } 00111 00112 00113 /// The SamValidationErrors class is a container class that holds 00114 /// SamValidationError Objects, allowing a validation method to return all 00115 /// of the invalid errors rather than just one. 00116 class SamValidationErrors 00117 { 00118 public: 00119 /// Constructor. 00120 SamValidationErrors(); 00121 /// Destructor 00122 ~SamValidationErrors(); 00123 00124 /// Remove all the errors from the container. 00125 void clear(); 00126 00127 /// Add the specified error to this container. 00128 void addError(SamValidationError::Type newType, 00129 SamValidationError::Severity newSeverity, 00130 const char* newMessage); 00131 00132 /// Return the number of validation errors contained in this object. 00133 unsigned int numErrors(); 00134 00135 /// Return a pointer to the next error without removing it from the 00136 /// container, and returning null once all errors have been retrieved 00137 /// until resetErrorIter is called. 00138 const SamValidationError* getNextError(); 00139 00140 /// Reset the iterator to the begining of the errors. 00141 void resetErrorIter(); 00142 00143 /// Append the error messages contained in this container to the passed 00144 /// in string. 00145 void getErrorString(std::string& errorString) const; 00146 00147 private: 00148 std::list<const SamValidationError*> myValidationErrors; 00149 std::list<const SamValidationError*>::const_iterator myErrorIter; 00150 }; 00151 00152 00153 /// stream output for all validation failures information 00154 inline std::ostream& operator << (std::ostream& stream, 00155 const SamValidationErrors& errors) 00156 { 00157 std::string errorString = ""; 00158 errors.getErrorString(errorString); 00159 stream << errorString; 00160 return stream; 00161 } 00162 00163 00164 /// The SamValidator class contains static methods for validating the SAM/BAM 00165 /// Record and each of its fields. The generic isValid method performs all of 00166 /// the other validations. The SamValidator methods return whether or not what 00167 /// is being validated is valid. True means it is valid, false means it is not. 00168 /// The specifics of the invalid value(s) are contained in the 00169 /// SamValidationErrors object that is passed in (by reference) to the method. 00170 /// The specific errors can be pulled out of that object. 00171 /// TODO: VALIDATION METHODS STILL NEED TO BE ADDED, and isValid does not yet 00172 /// validate all fields!!! 00173 class SamValidator 00174 { 00175 public: 00176 00177 /// Validates whether or not the specified SamRecord is valid, calling 00178 /// all of the other validations. 00179 /// TODO: more validation needs to be added. 00180 /// \param samHeader header associated with the record to be validated. 00181 /// \param samRecord record to be validated. 00182 /// \param validationErrors status to append any errors too. 00183 /// \return true if it is valid, false and appends to SamValidationErrors 00184 /// if it is not 00185 static bool isValid(SamFileHeader& samHeader, SamRecord& samRecord, 00186 SamValidationErrors& validationErrors); 00187 00188 /// Determines whether or not the specified qname is valid. 00189 /// Validation for QNAME is: 00190 /// a) length of the qname string is the same as the read name length 00191 /// b) length is between 1 and 254. 00192 /// c) [ \t\n\r] are not allowed in the name. 00193 /// \param qname the read/query name. 00194 /// \param qnameLen length of the read including the null (result of 00195 /// SamRecord::getReadNameLength(). 00196 /// \param validationErrors status to append any errors too. 00197 /// \return true if it is valid, false and appends to SamValidationErrors 00198 /// if it is not 00199 static bool isValidQname(const char* qname, uint8_t qnameLen, 00200 SamValidationErrors& validationErrors); 00201 00202 /// Determines whether or not the flag is valid. 00203 /// TODO: currently no validation is done on the flag. 00204 /// \param flag flag to be validated. 00205 /// \param validationErrors status to append any errors too. 00206 /// \return true if it is valid, false and appends to SamValidationErrors 00207 /// if it is not 00208 static bool isValidFlag(uint16_t flag, 00209 SamValidationErrors& validationErrors); 00210 00211 /// Validate the reference name including validating against the header. 00212 /// 1) Cross validate the rname and the header. 00213 /// 2) perform the validation in the method that doesn't take the header. 00214 /// \param samHeader header associated with the rname to be validated. 00215 /// \param rname reference name to be validated. 00216 /// \param validationErrors status to append any errors too. 00217 /// \return true if it is valid, false and appends to SamValidationErrors 00218 /// if it is not 00219 static bool isValidRname(SamFileHeader& samHeader, 00220 const char* rname, 00221 SamValidationErrors& validationErrors); 00222 /// Validate the rname without validating against the header. 00223 /// Validation for RNAME is: 00224 /// a) cannot be 0 length. 00225 /// b) [ \t\n\r@=] are not allowed in the name. 00226 /// \param rname reference name to be validated. 00227 /// \param validationErrors status to append any errors too. 00228 /// \return true if it is valid, false and appends to SamValidationErrors 00229 /// if it is not 00230 static bool isValidRname(const char* rname, 00231 SamValidationErrors& validationErrors); 00232 00233 /// Validate whether or not the specified reference id is valid. 00234 /// Validation for rID is: 00235 /// a) must be between -1 and the number of refInfo. 00236 /// -1 is allowed, and otherwise it must properly index into the array. 00237 /// \param refID reference id to be validated. 00238 /// \param refInfo sam reference information containing the mapping 00239 /// from reference id to reference name for this refID. 00240 /// \param validationErrors status to append any errors too. 00241 /// \return true if it is valid, false and appends to SamValidationErrors 00242 /// if it is not 00243 static bool isValidRefID(int32_t refID, const SamReferenceInfo& refInfo, 00244 SamValidationErrors& validationErrors); 00245 00246 /// Validate the refeference position. 00247 /// Validation for pos is: 00248 /// a) must be between 0 and (2^29)-1. 00249 /// \param pos position to be validated. 00250 /// \param validationErrors status to append any errors too. 00251 /// \return true if it is valid, false and appends to SamValidationErrors 00252 /// if it is not 00253 static bool isValid1BasedPos(int32_t pos, 00254 SamValidationErrors& validationErrors); 00255 00256 /// Validate the mapping quality. 00257 /// TODO: currently no validation is done on the mapping quality. 00258 /// \param mapQuality mapping quality to be validated. 00259 /// \param validationErrors status to append any errors too. 00260 /// \return true if it is valid, false and appends to SamValidationErrors 00261 /// if it is not 00262 static bool isValidMapQuality(uint8_t mapQuality, 00263 SamValidationErrors& validationErrors); 00264 00265 /// Validate the sequence, but not against the cigar or quality string. 00266 /// Validation against cigar is done in isValidCigar. 00267 /// Validation against the quality string is done in isValidQuality. 00268 /// TODO: currently no validation is done in this method. 00269 /// \param samRecord record whose sequence should be validated. 00270 /// \param validationErrors status to append any errors too. 00271 /// \return true if it is valid, false and appends to SamValidationErrors 00272 /// if it is not 00273 static bool isValidSequence(SamRecord& samRecord, 00274 SamValidationErrors& validationErrors); 00275 00276 /// Validate the cigar. Cigar validation depends on sequence. 00277 /// Validation for CIGAR is: 00278 /// a) cannot be 0 length. 00279 /// if not "*", validate the following: 00280 /// b) must have an integer length for each operator (if not "*"). TODO 00281 /// c) all operators must be valid (if not "*"). TODO 00282 /// d) evaluates to the same read length as the sequence string. 00283 /// \param samRecord record whose cigar should be validated. 00284 /// \param validationErrors status to append any errors too. 00285 /// \return true if it is valid, false and appends to SamValidationErrors 00286 /// if it is not 00287 static bool isValidCigar(SamRecord& samRecord, 00288 SamValidationErrors& validationErrors); 00289 00290 /// Validate the cigar. Cigar validation depends on sequence. 00291 /// Validation for CIGAR is: 00292 /// a) cannot be 0 length. 00293 /// if not "*", validate the following: 00294 /// b) must have an integer length for each operator (if not "*"). TODO 00295 /// c) all operators must be valid (if not "*"). TODO 00296 /// d) evaluates to the same read length as the sequence string. 00297 /// \param cigar cigar string to be validated. 00298 /// \param sequence sequence to check the cigar against. 00299 /// \param validationErrors status to append any errors too. 00300 /// \return true if it is valid, false and appends to SamValidationErrors 00301 /// if it is not 00302 static bool isValidCigar(const char* cigar, const char* sequence, 00303 SamValidationErrors& validationErrors); 00304 00305 /// Validate the cigar. Cigar validation depends on sequence. 00306 /// Validation for CIGAR is: 00307 /// a) cannot be 0 length. 00308 /// if not "*", validate the following: 00309 /// b) TODO: must have an integer length for each operator (if not "*"). 00310 /// c) TODO: all operators must be valid (if not "*"). 00311 /// d) evaluates to the same read length as the sequence string. 00312 /// \param cigar cigar string to be validated. 00313 /// \param seqLen sequence length to check the cigar against. 00314 /// \param validationErrors status to append any errors too. 00315 /// \return true if it is valid, false and appends to SamValidationErrors 00316 /// if it is not 00317 static bool isValidCigar(const char* cigar, 00318 int seqLen, 00319 SamValidationErrors& validationErrors); 00320 00321 /// TODO: validate the mate/next fragment's reference name. 00322 /// \return true if it is valid, false and appends to SamValidationErrors 00323 /// if it is not 00324 static bool isValidMrnm(); 00325 00326 /// TODO: validate the mate/next fragment's position. 00327 /// \return true if it is valid, false and appends to SamValidationErrors 00328 /// if it is not 00329 static bool isValidMpos(); 00330 00331 /// TODO: validate the insertion size/observed template length. 00332 /// \return true if it is valid, false and appends to SamValidationErrors 00333 /// if it is not 00334 static bool isValidIsize(); 00335 00336 /// TODO, validate the sequence. 00337 /// \return true if it is valid, false and appends to SamValidationErrors 00338 /// if it is not 00339 static bool isValidSeq(); 00340 00341 /// Validate the base quality. 00342 /// Quality validation depends on sequence. 00343 /// Validation for quality is: 00344 /// a) quality & sequence are the same length if both are specified. 00345 /// TODO: more validation. 00346 /// \param samRecord record whose quality should be validated. 00347 /// \param validationErrors status to append any errors too. 00348 /// \return true if it is valid, false and appends to SamValidationErrors 00349 /// if it is not 00350 static bool isValidQuality(SamRecord& samRecord, 00351 SamValidationErrors& validationErrors); 00352 00353 /// Validate the base quality. 00354 /// Quality validation depends on sequence. 00355 /// Validation for quality is: 00356 /// a) quality & sequence are the same length if both are specified. 00357 /// TODO: more validation. 00358 /// \param quality quality string to be validated. 00359 /// \param seqLen sequence length to check the quality against. 00360 /// \param validationErrors status to append any errors too. 00361 /// \return true if it is valid, false and appends to SamValidationErrors 00362 /// if it is not 00363 static bool isValidQuality(const char* quality, const char* sequence, 00364 SamValidationErrors& validationErrors); 00365 00366 /// Validate the base quality. 00367 /// Quality validation depends on sequence. 00368 /// Validation for quality is: 00369 /// a) quality & sequence are the same length if both are specified. 00370 /// TODO: more validation. 00371 /// \param quality quality string to be validated. 00372 /// \param seqLen sequence length to check the quality against. 00373 /// \param validationErrors status to append any errors too. 00374 /// \return true if it is valid, false and appends to SamValidationErrors 00375 /// if it is not 00376 bool static isValidQuality(const char* quality, 00377 int seqLength, 00378 SamValidationErrors& validationErrors); 00379 00380 /// Validate the tags. 00381 /// Validation for tags is: 00382 /// a) check that the "MD" tag is correct if it is present. 00383 /// TODO: more validation. 00384 /// \param samRecord record whose tags should be validated. 00385 /// \param validationErrors status to append any errors too. 00386 /// \return true if it is valid, false and appends to SamValidationErrors 00387 /// if it is not 00388 static bool isValidTags(SamRecord& samRecord, 00389 SamValidationErrors& validationErrors); 00390 00391 /// TODO validate the tag vtype 00392 /// \return true if it is valid, false and appends to SamValidationErrors 00393 /// if it is not 00394 static bool isValidVtype(); 00395 00396 /// TODO validate the tag vtype 00397 /// \return true if it is valid, false and appends to SamValidationErrors 00398 /// if it is not 00399 static bool isValidValue(); 00400 }; 00401 00402 00403 #endif