00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <iostream>
00019
00020 #include "SamValidation.h"
00021 #include "CigarRoller.h"
00022
00023 const char* SamValidationError::enumSeverityString[] = {
00024 "WARNING", "ERROR"};
00025
00026 const char* SamValidationError::enumTypeString[] = {
00027 "INVALID_QNAME",
00028 "INVALID_REF_ID",
00029 "INVALID_RNAME",
00030 "INVALID_POS",
00031 "INVALID_MAPQ",
00032 "INVALID_CIGAR",
00033 "INVALID_MRNM",
00034 "INVALID_QUAL"
00035 };
00036
00037 const char* SamValidationError::getTypeString(Type type)
00038 {
00039 return(enumTypeString[type]);
00040 }
00041
00042
00043 SamValidationError::SamValidationError(Type type, Severity severity,
00044 std::string message)
00045 {
00046 myType = type;
00047 mySeverity = severity;
00048 myMessage = message;
00049 }
00050
00051
00052 SamValidationError::Type SamValidationError::getType() const
00053 {
00054 return(myType);
00055 }
00056
00057
00058 SamValidationError::Severity SamValidationError::getSeverity() const
00059 {
00060 return(mySeverity);
00061 }
00062
00063
00064 const char* SamValidationError::getMessage() const
00065 {
00066 return(myMessage.c_str());
00067 }
00068
00069
00070 const char* SamValidationError::getTypeString() const
00071 {
00072 return(enumTypeString[myType]);
00073 }
00074
00075
00076 const char* SamValidationError::getSeverityString() const
00077 {
00078 return(enumSeverityString[mySeverity]);
00079 }
00080
00081
00082 void SamValidationError::getErrorString(std::string& errorString) const
00083 {
00084 errorString = getTypeString();
00085 errorString += " (";
00086 errorString += getSeverityString();
00087 errorString += ") : ";
00088 errorString += getMessage();
00089 errorString += "\n";
00090 }
00091
00092
00093 void SamValidationError::printError() const
00094 {
00095 std::cerr << this;
00096 }
00097
00098
00099
00100
00101 SamValidationErrors::SamValidationErrors()
00102 : myValidationErrors()
00103 {
00104 myErrorIter = myValidationErrors.begin();
00105 }
00106
00107
00108
00109 SamValidationErrors::~SamValidationErrors()
00110 {
00111 clear();
00112 }
00113
00114
00115 void SamValidationErrors::clear()
00116 {
00117
00118 std::list<const SamValidationError*>::iterator errorIter;
00119 for(errorIter = myValidationErrors.begin();
00120 errorIter != myValidationErrors.end(); ++errorIter)
00121 {
00122 delete *errorIter;
00123 *errorIter = NULL;
00124 }
00125 myValidationErrors.clear();
00126 myErrorIter = myValidationErrors.end();
00127 }
00128
00129
00130 void SamValidationErrors::addError(SamValidationError::Type newType,
00131 SamValidationError::Severity newSeverity,
00132 const char* newMessage)
00133 {
00134 myValidationErrors.push_back(new SamValidationError(newType,
00135 newSeverity,
00136 newMessage));
00137
00138
00139 if(myValidationErrors.size() == 1)
00140 {
00141
00142 myErrorIter = myValidationErrors.begin();
00143 }
00144 }
00145
00146
00147
00148
00149 unsigned int SamValidationErrors::numErrors()
00150 {
00151 return(myValidationErrors.size());
00152 }
00153
00154
00155
00156
00157
00158 const SamValidationError* SamValidationErrors::getNextError()
00159 {
00160 if(myErrorIter == myValidationErrors.end())
00161 {
00162
00163 return(NULL);
00164 }
00165
00166 return(*myErrorIter++);
00167 }
00168
00169
00170
00171 void SamValidationErrors::resetErrorIter()
00172 {
00173 myErrorIter = myValidationErrors.begin();
00174 }
00175
00176
00177
00178 void SamValidationErrors::getErrorString(std::string& errorString) const
00179 {
00180 for(std::list<const SamValidationError*>::
00181 const_iterator validationErrorIter =
00182 myValidationErrors.begin();
00183 validationErrorIter != myValidationErrors.end();
00184 validationErrorIter++)
00185 {
00186 std::string error = "";
00187 (*validationErrorIter)->getErrorString(error);
00188 errorString += error;
00189 }
00190 }
00191
00192
00193 bool SamValidator::isValid(SamFileHeader& samHeader, SamRecord& samRecord,
00194 SamValidationErrors& validationErrors)
00195 {
00196 bool status = true;
00197 status &= isValidQname(samRecord.getReadName(),
00198 samRecord.getReadNameLength(),
00199 validationErrors);
00200
00201 status &= isValidFlag(samRecord.getFlag(),
00202 validationErrors);
00203
00204
00205 status &= isValidRname(samHeader,
00206 samRecord.getReferenceName(),
00207 validationErrors);
00208
00209 status &= isValidRefID(samRecord.getReferenceID(),
00210 *samHeader.getReferenceInfo(),
00211 validationErrors);
00212
00213 status &= isValid1BasedPos(samRecord.get1BasedPosition(),
00214 validationErrors);
00215
00216 status &= isValidMapQuality(samRecord.getMapQuality(),
00217 validationErrors);
00218
00219 const char* sequence = samRecord.getSequence();
00220
00221 status &= isValidCigar(samRecord.getCigar(),
00222 sequence,
00223 validationErrors);
00224
00225 status &= isValidQuality(samRecord.getQuality(),
00226 sequence,
00227 validationErrors);
00228
00229 return(status);
00230 }
00231
00232
00233
00234
00235
00236
00237
00238
00239 bool SamValidator::isValidQname(const char* qname, uint8_t readNameLen,
00240 SamValidationErrors& validationErrors)
00241 {
00242
00243
00244
00245
00246
00247 bool status = true;
00248
00249
00250 int32_t qnameLenNull = strlen(qname) + 1;
00251
00252
00253
00254 if(qnameLenNull != readNameLen)
00255 {
00256
00257
00258
00259 String message = "Invalid Query Name - the string length (";
00260 message += qnameLenNull;
00261 message += ") does not match the specified query name length (";
00262 message += readNameLen;
00263 message += ").";
00264
00265 validationErrors.addError(SamValidationError::INVALID_QNAME,
00266 SamValidationError::ERROR,
00267 message.c_str());
00268 status = false;
00269 }
00270
00271
00272
00273
00274 if((qnameLenNull < 2) || (qnameLenNull > 255))
00275 {
00276 String message = "Invalid Query Name (QNAME) length: ";
00277 message += qnameLenNull;
00278 message += ". Length with the terminating null must be between 2 & 255.";
00279
00280 validationErrors.addError(SamValidationError::INVALID_QNAME,
00281 SamValidationError::WARNING,
00282 message.c_str());
00283 status = false;
00284 }
00285
00286
00287
00288
00289 String message;
00290 for(int i = 0; i < qnameLenNull; ++i)
00291 {
00292 switch(qname[i])
00293 {
00294 case ' ':
00295
00296 message = "Invalid character in the Query Name (QNAME): ' ' at position ";
00297 message += i;
00298 message += ".";
00299 validationErrors.addError(SamValidationError::INVALID_QNAME,
00300 SamValidationError::WARNING,
00301 message.c_str());
00302 status = false;
00303 break;
00304 case '\t':
00305
00306 message = "Invalid character in the Query Name (QNAME): '\t' at position ";
00307 message += i;
00308 message += ".";
00309 validationErrors.addError(SamValidationError::INVALID_QNAME,
00310 SamValidationError::WARNING,
00311 message.c_str());
00312 status = false;
00313 break;
00314 case '\n':
00315
00316 message = "Invalid character in the Query Name (QNAME): '\n' at position ";
00317 message += i;
00318 message += ".";
00319 validationErrors.addError(SamValidationError::INVALID_QNAME,
00320 SamValidationError::WARNING,
00321 message.c_str());
00322 status = false;
00323 break;
00324 case '\r':
00325
00326 message = "Invalid character in the Query Name (QNAME): '\r' at position ";
00327 message += i;
00328 message += ".";
00329 validationErrors.addError(SamValidationError::INVALID_QNAME,
00330 SamValidationError::WARNING,
00331 message.c_str());
00332 status = false;
00333 break;
00334 }
00335 }
00336
00337 return(status);
00338 }
00339
00340
00341 bool SamValidator::isValidFlag(uint16_t flag,
00342 SamValidationErrors& validationErrors)
00343 {
00344
00345 return(true);
00346 }
00347
00348
00349 bool SamValidator::isValidRname(SamFileHeader& samHeader,
00350 const char* rname,
00351 SamValidationErrors& validationErrors)
00352 {
00353 bool status = true;
00354
00355
00356
00357
00358
00359 if((strcmp(rname, "*") != 0) &&
00360 (samHeader.getNumSQs() != 0) &&
00361 (samHeader.getSQ(rname) == NULL))
00362 {
00363
00364 status = false;
00365 std::string message = "RNAME, ";
00366 message += rname;
00367 message += ", was not found in a SAM Header SQ record";
00368 validationErrors.addError(SamValidationError::INVALID_RNAME,
00369 SamValidationError::WARNING,
00370 message.c_str());
00371 }
00372 status &= isValidRname(rname, validationErrors);
00373 return(status);
00374 }
00375
00376
00377 bool SamValidator::isValidRname(const char* rname,
00378 SamValidationErrors& validationErrors)
00379 {
00380
00381
00382
00383
00384 bool status = true;
00385
00386
00387 int32_t rnameLen = strlen(rname);
00388
00389 String message;
00390
00391 if(rnameLen == 0)
00392 {
00393 validationErrors.addError(SamValidationError::INVALID_RNAME,
00394 SamValidationError::WARNING,
00395 "Reference Sequence Name (RNAME) cannot have 0 length.");
00396 status = false;
00397 }
00398
00399
00400
00401
00402
00403 for(int i = 0; i < rnameLen; ++i)
00404 {
00405 switch(rname[i])
00406 {
00407 case ' ':
00408
00409 message = "Invalid character in the Reference Sequence Name (RNAME): ' ' at position ";
00410 message += i;
00411 message += ".";
00412 validationErrors.addError(SamValidationError::INVALID_RNAME,
00413 SamValidationError::WARNING,
00414 message.c_str());
00415 status = false;
00416 break;
00417 case '\t':
00418
00419 message = "Invalid character in the Reference Sequence Name (RNAME): '\t' at position ";
00420 message += i;
00421 message += ".";
00422 validationErrors.addError(SamValidationError::INVALID_RNAME,
00423 SamValidationError::WARNING,
00424 message.c_str());
00425 status = false;
00426 break;
00427 case '\n':
00428
00429 message = "Invalid character in the Reference Sequence Name (RNAME): '\n' at position ";
00430 message += i;
00431 message += ".";
00432 validationErrors.addError(SamValidationError::INVALID_RNAME,
00433 SamValidationError::WARNING,
00434 message.c_str());
00435 status = false;
00436 break;
00437 case '\r':
00438
00439 message = "Invalid character in the Reference Sequence Name (RNAME): '\r' at position ";
00440 message += i;
00441 message += ".";
00442 validationErrors.addError(SamValidationError::INVALID_RNAME,
00443 SamValidationError::WARNING,
00444 message.c_str());
00445 status = false;
00446 break;
00447 case '@':
00448
00449 message = "Invalid character in the Reference Sequence Name (RNAME): '@' at position ";
00450 message += i;
00451 message += ".";
00452 validationErrors.addError(SamValidationError::INVALID_RNAME,
00453 SamValidationError::WARNING,
00454 message.c_str());
00455 status = false;
00456 break;
00457 case '=':
00458
00459 message = "Invalid character in the Reference Sequence Name (RNAME): '=' at position ";
00460 message += i;
00461 message += ".";
00462 validationErrors.addError(SamValidationError::INVALID_RNAME,
00463 SamValidationError::WARNING,
00464 message.c_str());
00465 status = false;
00466 break;
00467 default:
00468
00469 break;
00470 }
00471 }
00472
00473 return(status);
00474 }
00475
00476
00477 bool SamValidator::isValidRefID(int32_t refID,
00478 const SamReferenceInfo& refInfo,
00479 SamValidationErrors& validationErrors)
00480 {
00481
00482
00483
00484
00485 bool status = true;
00486 if((refID < -1) || (refID >= refInfo.getNumEntries()))
00487 {
00488
00489 String message = "Invalid Reference ID, out of range (";
00490 message += refID;
00491 message += ") must be between -1 and ";
00492 message += refInfo.getNumEntries() - 1;
00493 message += ".";
00494
00495 validationErrors.addError(SamValidationError::INVALID_REF_ID,
00496 SamValidationError::WARNING,
00497 message.c_str());
00498 status = false;
00499 }
00500
00501 return(status);
00502 }
00503
00504
00505 bool SamValidator::isValid1BasedPos(int32_t pos,
00506 SamValidationErrors& validationErrors)
00507 {
00508
00509
00510
00511 bool status = true;
00512
00513 if((pos < 0) || (pos > 536870911))
00514 {
00515 String message = "POS out of range (";
00516 message += pos;
00517 message += ") must be between 0 and (2^29)-1.";
00518
00519 validationErrors.addError(SamValidationError::INVALID_POS,
00520 SamValidationError::WARNING,
00521 message.c_str());
00522 status = false;
00523 }
00524
00525 return(status);
00526 }
00527
00528
00529 bool SamValidator::isValidMapQuality(uint8_t mapQuality,
00530 SamValidationErrors& validationErrors)
00531 {
00532
00533 return(true);
00534 }
00535
00536
00537 bool SamValidator::isValidCigar(const char* cigar,
00538 const char* sequence,
00539 SamValidationErrors& validationErrors)
00540 {
00541
00542
00543
00544
00545
00546
00547 bool status = true;
00548 String message;
00549
00550 int32_t cigarLen = strlen(cigar);
00551
00552
00553 if(cigarLen == 0)
00554 {
00555 validationErrors.addError(SamValidationError::INVALID_CIGAR,
00556 SamValidationError::WARNING,
00557 "Cigar must not be blank.");
00558 status = false;
00559 }
00560
00561 if(strcmp(cigar, "*") != 0)
00562 {
00563
00564 CigarRoller cigarRoller(cigar);
00565
00566
00567
00568
00569
00570
00571
00572 int seqLen = strlen(sequence);
00573 int cigarSeqLen = cigarRoller.getExpectedQueryBaseCount();
00574 if(cigarSeqLen != seqLen)
00575 {
00576 message = "CIGAR does not evaluate to the same length as SEQ, (";
00577 message += cigarSeqLen;
00578 message += " != ";
00579 message += seqLen;
00580 message += ").";
00581 validationErrors.addError(SamValidationError::INVALID_CIGAR,
00582 SamValidationError::WARNING,
00583 message.c_str());
00584 status = false;
00585 }
00586 }
00587 return(status);
00588 }
00589
00590
00591 bool SamValidator::isValidQuality(const char* quality,
00592 const char* sequence,
00593 SamValidationErrors& validationErrors)
00594 {
00595 bool status = true;
00596
00597
00598
00599 if((strcmp(sequence, "*") != 0) && (strcmp(quality, "*") != 0))
00600 {
00601 int seqLen = strlen(sequence);
00602 int qualLen = strlen(quality);
00603
00604
00605 if(seqLen != qualLen)
00606 {
00607
00608
00609 String message = "QUAL is not the same length as SEQ, (";
00610 message += qualLen;
00611 message += " != ";
00612 message += seqLen;
00613 message += ").";
00614
00615 validationErrors.addError(SamValidationError::INVALID_QUAL,
00616 SamValidationError::WARNING,
00617 message.c_str());
00618 status = false;
00619 }
00620 }
00621 return(status);
00622 }