SamValidation.h

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef __SAM_VALIDATION_H__
00019 #define __SAM_VALIDATION_H__
00020 
00021 #include "SamFile.h"
00022 #include <list>
00023 
00024 /// The SamValidationError class describes a validation error that occured,
00025 /// containing the error type, severity, and textual error message.
00026 class SamValidationError
00027 {
00028 public:
00029     /// Severity of the error.
00030     enum Severity 
00031         {
00032             WARNING, ///< Warning is used if it is just an invalid value.
00033             ERROR    ///< Error is used if parsing could not succeed.
00034         };
00035 
00036     /// Type of the error.
00037     /// TODO: NOT ALL INVALID TYPES HAVE BEEN ADDED SINCE NOT ALL VALIDATION
00038     /// IS COMPLETE YET
00039     enum Type
00040         {
00041             INVALID_QNAME, ///< Invalid read/query name
00042             INVALID_REF_ID, ///< Invalid reference id
00043             INVALID_RNAME, ///< Invalid reference name
00044             INVALID_POS, ///< Invalid position
00045             INVALID_MAPQ, ///< Invalid mapping quality
00046             INVALID_CIGAR, ///< Invalid CIGAR
00047             INVALID_MRNM, ///< Invalid mate/next fragment reference name
00048             INVALID_QUAL, ///< Invalid base quality
00049             INVALID_TAG ///< Invalid tag
00050         };
00051 
00052     /// Get the string representing the specified type of validation error.
00053     static const char* getTypeString(Type type);
00054 
00055     /// Constructor that sets the type, severity, and message for the
00056     /// validation error.
00057     SamValidationError(Type type, Severity severity, std::string Message);
00058    
00059     /// Return the type enum of this validation error object.
00060     Type getType() const;
00061 
00062     /// Return the severity enum of this validation error object.
00063     Severity getSeverity() const;
00064 
00065     /// Return the error message of this validation error object.
00066     const char* getMessage() const;
00067 
00068     /// Return the string representing this object's type of validation error.
00069     const char* getTypeString() const;
00070 
00071     /// Return the string representing this object's severity of validation
00072     /// error.
00073     const char* getSeverityString() const;
00074 
00075     /// Get the error string representing this object's error.
00076     void getErrorString(std::string& errorString) const;
00077 
00078     /// Print a formatted output of the error to cerr.
00079     void printError() const;
00080 
00081 private:
00082     SamValidationError();
00083 
00084     static const char* enumTypeString[];
00085     static const char* enumSeverityString[];
00086 
00087     Type myType;
00088     Severity mySeverity;
00089     std::string myMessage;
00090 
00091 };
00092 
00093 
00094 /// stream output for validation failure information
00095 inline std::ostream &operator << (std::ostream &stream, 
00096                                   const SamValidationError &error)
00097 {
00098     std::string errorMessage;
00099     error.getErrorString(errorMessage);
00100     stream << errorMessage;
00101     return stream;
00102 }
00103 
00104 
00105 /// The SamValidationErrors class is a container class that holds
00106 /// SamValidationError Objects, allowing a validation method to return all
00107 /// of the invalid errors rather than just one.
00108 class SamValidationErrors
00109 {
00110 public:
00111     /// Constructor.
00112     SamValidationErrors();
00113     /// Destructor
00114     ~SamValidationErrors();
00115 
00116     /// Remove all the errors from the container.
00117     void clear();
00118 
00119     /// Add the specified error to this container.
00120     void addError(SamValidationError::Type newType, 
00121                   SamValidationError::Severity newSeverity,
00122                   const char* newMessage);
00123 
00124     /// Return the number of validation errors contained in this object.
00125     unsigned int numErrors();
00126 
00127     /// Return a pointer to the next error without removing it from the 
00128     /// container, and returning null once all errors have been retrieved
00129     /// until resetErrorIter is called.
00130     const SamValidationError* getNextError();
00131    
00132     /// Reset the iterator to the begining of the errors.
00133     void resetErrorIter();
00134 
00135     /// Append the error messages contained in this container to the passed
00136     /// in string.
00137     void getErrorString(std::string& errorString) const;
00138 
00139 private:
00140     std::list<const SamValidationError*> myValidationErrors;
00141     std::list<const SamValidationError*>::const_iterator myErrorIter;
00142 };
00143 
00144 
00145 /// stream output for all validation failures information
00146 inline std::ostream& operator << (std::ostream& stream,
00147                                   const SamValidationErrors& errors)
00148 {
00149     std::string errorString = "";
00150     errors.getErrorString(errorString);
00151     stream << errorString;
00152     return stream;
00153 }
00154 
00155 
00156 /// The SamValidator class contains static methods for validating the SAM/BAM
00157 /// Record and each of its fields. The generic isValid method performs all of
00158 /// the other validations. The SamValidator methods return whether or not what
00159 /// is being validated is valid. True means it is valid, false means it is not.
00160 /// The specifics of the invalid value(s) are contained in the
00161 /// SamValidationErrors object that is passed in (by reference) to the method.
00162 /// The specific errors can be pulled out of that object.
00163 /// TODO: VALIDATION METHODS STILL NEED TO BE ADDED, and isValid does not yet
00164 /// validate all fields!!!
00165 class SamValidator
00166 {
00167 public:
00168 
00169     /// Validates whether or not the specified SamRecord is valid, calling
00170     /// all of the other validations.
00171     /// TODO: more validation needs to be added.
00172     /// \param samHeader header associated with the record to be validated.
00173     /// \param samRecord record to be validated.
00174     /// \param validationErrors status  to append any errors too.
00175     /// \return true if it is valid, false and appends to SamValidationErrors
00176     /// if it is not
00177     static bool isValid(SamFileHeader& samHeader, SamRecord& samRecord, 
00178                         SamValidationErrors& validationErrors);
00179 
00180     /// Determines whether or not the specified qname is valid.
00181     /// Validation for QNAME is:
00182     ///   a) length of the qname string is the same as the read name length
00183     ///   b) length is between 1 and 254.
00184     ///   c) [ \t\n\r] are not allowed in the name.
00185     /// \param qname the read/query name.
00186     /// \param qnameLen length of the read including the null (result of 
00187     /// SamRecord::getReadNameLength().
00188     /// \param validationErrors status  to append any errors too.
00189     /// \return true if it is valid, false and appends to SamValidationErrors
00190     /// if it is not
00191     static bool isValidQname(const char* qname, uint8_t qnameLen, 
00192                              SamValidationErrors& validationErrors);
00193 
00194     /// Determines whether or not the flag is valid.
00195     /// TODO: currently no validation is done on the flag.
00196     /// \param flag flag to be validated.
00197     /// \param validationErrors status  to append any errors too.
00198     /// \return true if it is valid, false and appends to SamValidationErrors
00199     /// if it is not
00200     static bool isValidFlag(uint16_t flag,
00201                             SamValidationErrors& validationErrors);
00202 
00203     /// Validate the reference name including validating against the header.
00204     /// 1) Cross validate the rname and the header.
00205     /// 2) perform the validation in the method that doesn't take the header.
00206     /// \param samHeader header associated with the rname to be validated.
00207     /// \param rname reference name to be validated.
00208     /// \param validationErrors status  to append any errors too.
00209     /// \return true if it is valid, false and appends to SamValidationErrors
00210     /// if it is not
00211     static bool isValidRname(SamFileHeader& samHeader, 
00212                              const char* rname,
00213                              SamValidationErrors& validationErrors);
00214     /// Validate the rname without validating against the header.
00215     /// Validation for RNAME is:
00216     ///   a) cannot be 0 length.
00217     ///   b) [ \t\n\r@=] are not allowed in the name.
00218     /// \param rname reference name to be validated.
00219     /// \param validationErrors status  to append any errors too.
00220     /// \return true if it is valid, false and appends to SamValidationErrors
00221     /// if it is not
00222     static bool isValidRname(const char* rname,
00223                              SamValidationErrors& validationErrors);
00224 
00225     /// Validate whether or not the specified reference id is valid.
00226     /// Validation for rID is:
00227     ///  a) must be between -1 and the number of refInfo.
00228     ///     -1 is allowed, and otherwise it must properly index into the array.
00229     /// \param refID reference id to be validated.
00230     /// \param refInfo sam reference information containing the mapping
00231     /// from reference id to reference name for this refID.
00232     /// \param validationErrors status  to append any errors too.
00233     /// \return true if it is valid, false and appends to SamValidationErrors
00234     /// if it is not
00235     static bool isValidRefID(int32_t refID, const SamReferenceInfo& refInfo, 
00236                              SamValidationErrors& validationErrors);
00237 
00238     /// Validate the refeference position.
00239     /// Validation for pos is:
00240     ///   a) must be between 0 and (2^29)-1.
00241     /// \param pos position to be validated.
00242     /// \param validationErrors status  to append any errors too.
00243     /// \return true if it is valid, false and appends to SamValidationErrors
00244     /// if it is not
00245     static bool isValid1BasedPos(int32_t pos, 
00246                                  SamValidationErrors& validationErrors);
00247 
00248     /// Validate the mapping quality.
00249     /// TODO: currently no validation is done on the mapping quality.
00250     /// \param mapQuality mapping quality to be validated.
00251     /// \param validationErrors status  to append any errors too.
00252     /// \return true if it is valid, false and appends to SamValidationErrors
00253     /// if it is not
00254     static bool isValidMapQuality(uint8_t mapQuality,
00255                                   SamValidationErrors& validationErrors);
00256 
00257     /// Validate the sequence, but not against the cigar or quality string.
00258     /// Validation against cigar is done in isValidCigar.
00259     /// Validation against the quality string is done in isValidQuality.
00260     /// TODO: currently no validation is done in this method.
00261     /// \param samRecord record whose sequence should be validated.
00262     /// \param validationErrors status  to append any errors too.
00263     /// \return true if it is valid, false and appends to SamValidationErrors
00264     /// if it is not
00265     static bool isValidSequence(SamRecord& samRecord,
00266                                 SamValidationErrors& validationErrors);
00267 
00268     /// Validate the cigar.  Cigar validation depends on sequence.
00269     /// Validation for CIGAR is:
00270     ///   a) cannot be 0 length.
00271     /// if not "*", validate the following:
00272     ///   b) must have an integer length for each operator (if not "*"). TODO
00273     ///   c) all operators must be valid (if not "*"). TODO
00274     ///   d) evaluates to the same read length as the sequence string.
00275     /// \param samRecord record whose cigar should be validated.
00276     /// \param validationErrors status  to append any errors too.
00277     /// \return true if it is valid, false and appends to SamValidationErrors
00278     /// if it is not
00279     static bool isValidCigar(SamRecord& samRecord,
00280                              SamValidationErrors& validationErrors);
00281 
00282     /// Validate the cigar.  Cigar validation depends on sequence.
00283     /// Validation for CIGAR is:
00284     ///   a) cannot be 0 length.
00285     /// if not "*", validate the following:
00286     ///   b) must have an integer length for each operator (if not "*"). TODO
00287     ///   c) all operators must be valid (if not "*"). TODO
00288     ///   d) evaluates to the same read length as the sequence string.
00289     /// \param cigar cigar string to be validated.
00290     /// \param sequence sequence to check the cigar against.
00291     /// \param validationErrors status  to append any errors too.
00292     /// \return true if it is valid, false and appends to SamValidationErrors
00293     /// if it is not
00294     static bool isValidCigar(const char* cigar, const char* sequence,
00295                              SamValidationErrors& validationErrors);
00296 
00297     /// Validate the cigar.  Cigar validation depends on sequence.
00298     /// Validation for CIGAR is:
00299     ///   a) cannot be 0 length.
00300     /// if not "*", validate the following:
00301     ///   b) TODO: must have an integer length for each operator (if not "*").
00302     ///   c) TODO: all operators must be valid (if not "*").
00303     ///   d) evaluates to the same read length as the sequence string.
00304     /// \param cigar cigar string to be validated.
00305     /// \param seqLen sequence length to check the cigar against.
00306     /// \param validationErrors status  to append any errors too.
00307     /// \return true if it is valid, false and appends to SamValidationErrors
00308     /// if it is not
00309     static bool isValidCigar(const char* cigar,
00310                              int seqLen,
00311                              SamValidationErrors& validationErrors);
00312 
00313     /// TODO: validate the mate/next fragment's reference name.
00314     /// \return true if it is valid, false and appends to SamValidationErrors
00315     /// if it is not
00316     static bool isValidMrnm();
00317 
00318     /// TODO: validate the mate/next fragment's position.
00319     /// \return true if it is valid, false and appends to SamValidationErrors
00320     /// if it is not
00321     static bool isValidMpos();
00322 
00323     /// TODO: validate the insertion size/observed template length.
00324     /// \return true if it is valid, false and appends to SamValidationErrors
00325     /// if it is not
00326     static bool isValidIsize();
00327 
00328     /// TODO, validate the sequence.
00329     /// \return true if it is valid, false and appends to SamValidationErrors
00330     /// if it is not
00331     static bool isValidSeq();
00332 
00333     /// Validate the base quality.
00334     /// Quality validation depends on sequence.
00335     /// Validation for quality is:
00336     ///   a) quality & sequence are the same length if both are specified.
00337     /// TODO: more validation.
00338     /// \param samRecord record whose quality should be validated.
00339     /// \param validationErrors status  to append any errors too.
00340     /// \return true if it is valid, false and appends to SamValidationErrors
00341     /// if it is not
00342     static bool isValidQuality(SamRecord& samRecord,
00343                                SamValidationErrors& validationErrors);
00344 
00345     /// Validate the base quality.
00346     /// Quality validation depends on sequence.
00347     /// Validation for quality is:
00348     ///   a) quality & sequence are the same length if both are specified.
00349     /// TODO: more validation.
00350     /// \param quality quality string to be validated.
00351     /// \param seqLen sequence length to check the quality against.
00352     /// \param validationErrors status  to append any errors too.
00353     /// \return true if it is valid, false and appends to SamValidationErrors
00354     /// if it is not
00355     static bool isValidQuality(const char* quality, const char* sequence,
00356                                SamValidationErrors& validationErrors);
00357 
00358     /// Validate the base quality.
00359     /// Quality validation depends on sequence.
00360     /// Validation for quality is:
00361     ///   a) quality & sequence are the same length if both are specified.
00362     /// TODO: more validation.
00363     /// \param quality quality string to be validated.
00364     /// \param seqLen sequence length to check the quality against.
00365     /// \param validationErrors status  to append any errors too.
00366     /// \return true if it is valid, false and appends to SamValidationErrors
00367     /// if it is not
00368     bool static isValidQuality(const char* quality,
00369                                int seqLength,
00370                                SamValidationErrors& validationErrors);
00371 
00372     /// Validate the tags.
00373     /// Validation for tags is:
00374     ///   a) check that the "MD" tag is correct if it is present.
00375     /// TODO: more validation.
00376     /// \param samRecord record whose tags should be validated.
00377     /// \param validationErrors status  to append any errors too.
00378     /// \return true if it is valid, false and appends to SamValidationErrors
00379     /// if it is not
00380     static bool isValidTags(SamRecord& samRecord,
00381                             SamValidationErrors& validationErrors);
00382 
00383     /// TODO validate the tag vtype
00384     /// \return true if it is valid, false and appends to SamValidationErrors
00385     /// if it is not
00386     static bool isValidVtype();
00387 
00388     /// TODO validate the tag vtype
00389     /// \return true if it is valid, false and appends to SamValidationErrors
00390     /// if it is not
00391     static bool isValidValue();
00392 };
00393 
00394 
00395 #endif
Generated on Tue Sep 6 17:52:00 2011 for libStatGen Software by  doxygen 1.6.3