00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <iostream>
00019
00020 #include "SamValidation.h"
00021 #include "CigarRoller.h"
00022
00023 const char* SamValidationError::enumSeverityString[] = {
00024 "WARNING", "ERROR"};
00025
00026 const char* SamValidationError::enumTypeString[] = {
00027 "INVALID_QNAME",
00028 "INVALID_REF_ID",
00029 "INVALID_RNAME",
00030 "INVALID_POS",
00031 "INVALID_MAPQ",
00032 "INVALID_CIGAR",
00033 "INVALID_MRNM",
00034 "INVALID_QUAL"
00035 };
00036
00037 const char* SamValidationError::getTypeString(Type type)
00038 {
00039 return(enumTypeString[type]);
00040 }
00041
00042
00043 SamValidationError::SamValidationError(Type type, Severity severity,
00044 std::string message)
00045 {
00046 myType = type;
00047 mySeverity = severity;
00048 myMessage = message;
00049 }
00050
00051
00052 SamValidationError::Type SamValidationError::getType() const
00053 {
00054 return(myType);
00055 }
00056
00057
00058 SamValidationError::Severity SamValidationError::getSeverity() const
00059 {
00060 return(mySeverity);
00061 }
00062
00063
00064 const char* SamValidationError::getMessage() const
00065 {
00066 return(myMessage.c_str());
00067 }
00068
00069
00070 const char* SamValidationError::getTypeString() const
00071 {
00072 return(enumTypeString[myType]);
00073 }
00074
00075
00076 const char* SamValidationError::getSeverityString() const
00077 {
00078 return(enumSeverityString[mySeverity]);
00079 }
00080
00081
00082 void SamValidationError::getErrorString(std::string& errorString) const
00083 {
00084 errorString = getTypeString();
00085 errorString += " (";
00086 errorString += getSeverityString();
00087 errorString += ") : ";
00088 errorString += getMessage();
00089 errorString += "\n";
00090 }
00091
00092
00093 void SamValidationError::printError() const
00094 {
00095 std::cerr << this;
00096 }
00097
00098
00099
00100
00101 SamValidationErrors::SamValidationErrors()
00102 : myValidationErrors()
00103 {
00104 myErrorIter = myValidationErrors.begin();
00105 }
00106
00107
00108
00109 SamValidationErrors::~SamValidationErrors()
00110 {
00111 clear();
00112 }
00113
00114
00115 void SamValidationErrors::clear()
00116 {
00117
00118 std::list<const SamValidationError*>::iterator errorIter;
00119 for(errorIter = myValidationErrors.begin();
00120 errorIter != myValidationErrors.end(); ++errorIter)
00121 {
00122 delete *errorIter;
00123 *errorIter = NULL;
00124 }
00125 myValidationErrors.clear();
00126 myErrorIter = myValidationErrors.end();
00127 }
00128
00129
00130 void SamValidationErrors::addError(SamValidationError::Type newType,
00131 SamValidationError::Severity newSeverity,
00132 const char* newMessage)
00133 {
00134 myValidationErrors.push_back(new SamValidationError(newType,
00135 newSeverity,
00136 newMessage));
00137
00138
00139 if(myValidationErrors.size() == 1)
00140 {
00141
00142 myErrorIter = myValidationErrors.begin();
00143 }
00144 }
00145
00146
00147
00148
00149 unsigned int SamValidationErrors::numErrors()
00150 {
00151 return(myValidationErrors.size());
00152 }
00153
00154
00155
00156
00157
00158 const SamValidationError* SamValidationErrors::getNextError()
00159 {
00160 if(myErrorIter == myValidationErrors.end())
00161 {
00162
00163 return(NULL);
00164 }
00165
00166 return(*myErrorIter++);
00167 }
00168
00169
00170
00171 void SamValidationErrors::resetErrorIter()
00172 {
00173 myErrorIter = myValidationErrors.begin();
00174 }
00175
00176
00177
00178 void SamValidationErrors::getErrorString(std::string& errorString) const
00179 {
00180 for(std::list<const SamValidationError*>::
00181 const_iterator validationErrorIter =
00182 myValidationErrors.begin();
00183 validationErrorIter != myValidationErrors.end();
00184 validationErrorIter++)
00185 {
00186 std::string error = "";
00187 (*validationErrorIter)->getErrorString(error);
00188 errorString += error;
00189 }
00190 }
00191
00192
00193 bool SamValidator::isValid(SamFileHeader& samHeader, SamRecord& samRecord,
00194 SamValidationErrors& validationErrors)
00195 {
00196 bool status = true;
00197 status &= isValidQname(samRecord.getReadName(),
00198 samRecord.getReadNameLength(),
00199 validationErrors);
00200
00201 status &= isValidFlag(samRecord.getFlag(),
00202 validationErrors);
00203
00204
00205 status &= isValidRname(samHeader,
00206 samRecord.getReferenceName(),
00207 validationErrors);
00208
00209 status &= isValidRefID(samRecord.getReferenceID(),
00210 *samHeader.getReferenceInfo(),
00211 validationErrors);
00212
00213 status &= isValid1BasedPos(samRecord.get1BasedPosition(),
00214 validationErrors);
00215
00216 status &= isValidMapQuality(samRecord.getMapQuality(), validationErrors);
00217
00218 status &= isValidSequence(samRecord, validationErrors);
00219
00220 status &= isValidCigar(samRecord, validationErrors);
00221
00222 status &= isValidQuality(samRecord, validationErrors);
00223
00224 return(status);
00225 }
00226
00227
00228
00229
00230
00231
00232
00233
00234 bool SamValidator::isValidQname(const char* qname, uint8_t readNameLen,
00235 SamValidationErrors& validationErrors)
00236 {
00237
00238
00239
00240
00241
00242 bool status = true;
00243
00244
00245 int32_t qnameLenNull = strlen(qname) + 1;
00246
00247
00248
00249 if(qnameLenNull != readNameLen)
00250 {
00251
00252
00253
00254 String message = "Invalid Query Name - the string length (";
00255 message += qnameLenNull;
00256 message += ") does not match the specified query name length (";
00257 message += readNameLen;
00258 message += ").";
00259
00260 validationErrors.addError(SamValidationError::INVALID_QNAME,
00261 SamValidationError::ERROR,
00262 message.c_str());
00263 status = false;
00264 }
00265
00266
00267
00268
00269 if((qnameLenNull < 2) || (qnameLenNull > 255))
00270 {
00271 String message = "Invalid Query Name (QNAME) length: ";
00272 message += qnameLenNull;
00273 message += ". Length with the terminating null must be between 2 & 255.";
00274
00275 validationErrors.addError(SamValidationError::INVALID_QNAME,
00276 SamValidationError::WARNING,
00277 message.c_str());
00278 status = false;
00279 }
00280
00281
00282
00283
00284 String message;
00285 for(int i = 0; i < qnameLenNull; ++i)
00286 {
00287 switch(qname[i])
00288 {
00289 case ' ':
00290
00291 message = "Invalid character in the Query Name (QNAME): ' ' at position ";
00292 message += i;
00293 message += ".";
00294 validationErrors.addError(SamValidationError::INVALID_QNAME,
00295 SamValidationError::WARNING,
00296 message.c_str());
00297 status = false;
00298 break;
00299 case '\t':
00300
00301 message = "Invalid character in the Query Name (QNAME): '\t' at position ";
00302 message += i;
00303 message += ".";
00304 validationErrors.addError(SamValidationError::INVALID_QNAME,
00305 SamValidationError::WARNING,
00306 message.c_str());
00307 status = false;
00308 break;
00309 case '\n':
00310
00311 message = "Invalid character in the Query Name (QNAME): '\n' at position ";
00312 message += i;
00313 message += ".";
00314 validationErrors.addError(SamValidationError::INVALID_QNAME,
00315 SamValidationError::WARNING,
00316 message.c_str());
00317 status = false;
00318 break;
00319 case '\r':
00320
00321 message = "Invalid character in the Query Name (QNAME): '\r' at position ";
00322 message += i;
00323 message += ".";
00324 validationErrors.addError(SamValidationError::INVALID_QNAME,
00325 SamValidationError::WARNING,
00326 message.c_str());
00327 status = false;
00328 break;
00329 }
00330 }
00331
00332 return(status);
00333 }
00334
00335
00336 bool SamValidator::isValidFlag(uint16_t flag,
00337 SamValidationErrors& validationErrors)
00338 {
00339
00340 return(true);
00341 }
00342
00343
00344 bool SamValidator::isValidRname(SamFileHeader& samHeader,
00345 const char* rname,
00346 SamValidationErrors& validationErrors)
00347 {
00348 bool status = true;
00349
00350
00351
00352
00353
00354 if((strcmp(rname, "*") != 0) &&
00355 (samHeader.getNumSQs() != 0) &&
00356 (samHeader.getSQ(rname) == NULL))
00357 {
00358
00359 status = false;
00360 std::string message = "RNAME, ";
00361 message += rname;
00362 message += ", was not found in a SAM Header SQ record";
00363 validationErrors.addError(SamValidationError::INVALID_RNAME,
00364 SamValidationError::WARNING,
00365 message.c_str());
00366 }
00367 status &= isValidRname(rname, validationErrors);
00368 return(status);
00369 }
00370
00371
00372 bool SamValidator::isValidRname(const char* rname,
00373 SamValidationErrors& validationErrors)
00374 {
00375
00376
00377
00378
00379 bool status = true;
00380
00381
00382 int32_t rnameLen = strlen(rname);
00383
00384 String message;
00385
00386 if(rnameLen == 0)
00387 {
00388 validationErrors.addError(SamValidationError::INVALID_RNAME,
00389 SamValidationError::WARNING,
00390 "Reference Sequence Name (RNAME) cannot have 0 length.");
00391 status = false;
00392 }
00393
00394
00395
00396
00397
00398 for(int i = 0; i < rnameLen; ++i)
00399 {
00400 switch(rname[i])
00401 {
00402 case ' ':
00403
00404 message = "Invalid character in the Reference Sequence Name (RNAME): ' ' at position ";
00405 message += i;
00406 message += ".";
00407 validationErrors.addError(SamValidationError::INVALID_RNAME,
00408 SamValidationError::WARNING,
00409 message.c_str());
00410 status = false;
00411 break;
00412 case '\t':
00413
00414 message = "Invalid character in the Reference Sequence Name (RNAME): '\t' at position ";
00415 message += i;
00416 message += ".";
00417 validationErrors.addError(SamValidationError::INVALID_RNAME,
00418 SamValidationError::WARNING,
00419 message.c_str());
00420 status = false;
00421 break;
00422 case '\n':
00423
00424 message = "Invalid character in the Reference Sequence Name (RNAME): '\n' at position ";
00425 message += i;
00426 message += ".";
00427 validationErrors.addError(SamValidationError::INVALID_RNAME,
00428 SamValidationError::WARNING,
00429 message.c_str());
00430 status = false;
00431 break;
00432 case '\r':
00433
00434 message = "Invalid character in the Reference Sequence Name (RNAME): '\r' at position ";
00435 message += i;
00436 message += ".";
00437 validationErrors.addError(SamValidationError::INVALID_RNAME,
00438 SamValidationError::WARNING,
00439 message.c_str());
00440 status = false;
00441 break;
00442 case '@':
00443
00444 message = "Invalid character in the Reference Sequence Name (RNAME): '@' at position ";
00445 message += i;
00446 message += ".";
00447 validationErrors.addError(SamValidationError::INVALID_RNAME,
00448 SamValidationError::WARNING,
00449 message.c_str());
00450 status = false;
00451 break;
00452 case '=':
00453
00454 message = "Invalid character in the Reference Sequence Name (RNAME): '=' at position ";
00455 message += i;
00456 message += ".";
00457 validationErrors.addError(SamValidationError::INVALID_RNAME,
00458 SamValidationError::WARNING,
00459 message.c_str());
00460 status = false;
00461 break;
00462 default:
00463
00464 break;
00465 }
00466 }
00467
00468 return(status);
00469 }
00470
00471
00472 bool SamValidator::isValidRefID(int32_t refID,
00473 const SamReferenceInfo& refInfo,
00474 SamValidationErrors& validationErrors)
00475 {
00476
00477
00478
00479
00480 bool status = true;
00481 if((refID < -1) || (refID >= refInfo.getNumEntries()))
00482 {
00483
00484 String message = "Invalid Reference ID, out of range (";
00485 message += refID;
00486 message += ") must be between -1 and ";
00487 message += refInfo.getNumEntries() - 1;
00488 message += ".";
00489
00490 validationErrors.addError(SamValidationError::INVALID_REF_ID,
00491 SamValidationError::WARNING,
00492 message.c_str());
00493 status = false;
00494 }
00495
00496 return(status);
00497 }
00498
00499
00500 bool SamValidator::isValid1BasedPos(int32_t pos,
00501 SamValidationErrors& validationErrors)
00502 {
00503
00504
00505
00506 bool status = true;
00507
00508 if((pos < 0) || (pos > 536870911))
00509 {
00510 String message = "POS out of range (";
00511 message += pos;
00512 message += ") must be between 0 and (2^29)-1.";
00513
00514 validationErrors.addError(SamValidationError::INVALID_POS,
00515 SamValidationError::WARNING,
00516 message.c_str());
00517 status = false;
00518 }
00519
00520 return(status);
00521 }
00522
00523
00524 bool SamValidator::isValidMapQuality(uint8_t mapQuality,
00525 SamValidationErrors& validationErrors)
00526 {
00527
00528 return(true);
00529 }
00530
00531
00532 bool SamValidator::isValidSequence(SamRecord& samRecord,
00533 SamValidationErrors& validationErrors)
00534 {
00535 return(true);
00536 }
00537
00538
00539 bool SamValidator::isValidCigar(SamRecord& samRecord,
00540 SamValidationErrors& validationErrors)
00541 {
00542 return(isValidCigar(samRecord.getCigar(),
00543 samRecord.getReadLength(),
00544 validationErrors));
00545 }
00546
00547 bool SamValidator::isValidCigar(const char* cigar,
00548 const char* sequence,
00549 SamValidationErrors& validationErrors)
00550 {
00551 return(isValidCigar(cigar, strlen(sequence), validationErrors));
00552 }
00553
00554 bool SamValidator::isValidCigar(const char* cigar,
00555 int seqLen,
00556 SamValidationErrors& validationErrors)
00557 {
00558
00559
00560
00561
00562
00563
00564 bool status = true;
00565 String message;
00566
00567 int32_t cigarLen = strlen(cigar);
00568
00569
00570 if(cigarLen == 0)
00571 {
00572 validationErrors.addError(SamValidationError::INVALID_CIGAR,
00573 SamValidationError::WARNING,
00574 "Cigar must not be blank.");
00575 status = false;
00576 }
00577
00578 if(strcmp(cigar, "*") != 0)
00579 {
00580
00581 CigarRoller cigarRoller(cigar);
00582
00583
00584
00585
00586
00587
00588
00589 int cigarSeqLen = cigarRoller.getExpectedQueryBaseCount();
00590 if(cigarSeqLen != seqLen)
00591 {
00592 message = "CIGAR does not evaluate to the same length as SEQ, (";
00593 message += cigarSeqLen;
00594 message += " != ";
00595 message += seqLen;
00596 message += ").";
00597 validationErrors.addError(SamValidationError::INVALID_CIGAR,
00598 SamValidationError::WARNING,
00599 message.c_str());
00600 status = false;
00601 }
00602 }
00603 return(status);
00604 }
00605
00606
00607 bool SamValidator::isValidQuality(SamRecord& samRecord,
00608 SamValidationErrors& validationErrors)
00609 {
00610 return(isValidQuality(samRecord.getQuality(),
00611 samRecord.getReadLength(),
00612 validationErrors));
00613 }
00614
00615
00616 bool SamValidator::isValidQuality(const char* quality,
00617 const char* sequence,
00618 SamValidationErrors& validationErrors)
00619 {
00620
00621 int seqLen = strlen(sequence);
00622
00623
00624 if(strcmp(sequence, "*") == 0)
00625 {
00626 seqLen = 0;
00627 }
00628 return(isValidQuality(quality, seqLen, validationErrors));
00629 }
00630
00631
00632 bool SamValidator::isValidQuality(const char* quality,
00633 int seqLength,
00634 SamValidationErrors& validationErrors)
00635 {
00636 bool status = true;
00637
00638
00639
00640 if((seqLength != 0) && (strcmp(quality, "*") != 0))
00641 {
00642 int qualLen = strlen(quality);
00643
00644
00645 if(seqLength != qualLen)
00646 {
00647
00648
00649 String message = "QUAL is not the same length as SEQ, (";
00650 message += qualLen;
00651 message += " != ";
00652 message += seqLength;
00653 message += ").";
00654
00655 validationErrors.addError(SamValidationError::INVALID_QUAL,
00656 SamValidationError::WARNING,
00657 message.c_str());
00658 status = false;
00659 }
00660 }
00661 return(status);
00662 }
00663