ModifyVar.cpp
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "SamFile.h"
00019 #include "ModifyVar.h"
00020 #include "SamValidation.h"
00021
00022 void modifyFirstBase()
00023 {
00024 SamFile samIn;
00025
00026 assert(samIn.OpenForRead("testFiles/testVar.bam"));
00027
00028 SamFile samOut;
00029
00030 assert(samOut.OpenForWrite("results/updateVar.bam"));
00031
00032
00033 SamFileHeader samHeader;
00034 assert(samIn.ReadHeader(samHeader));
00035
00036 assert(samOut.WriteHeader(samHeader));
00037
00038
00039 SamRecord samRecord;
00040
00041
00042 while(samIn.ReadRecord(samHeader, samRecord))
00043 {
00044
00045
00046 SamValidationErrors samValidationErrors;
00047 assert(SamValidator::isValid(samHeader, samRecord,
00048 samValidationErrors));
00049
00050
00051 const char* sequence = samRecord.getSequence();
00052 assert(strcmp(sequence, "") != 0);
00053 std::string upSeq = sequence;
00054 upSeq[0] = 'N';
00055 assert(samRecord.setSequence(upSeq.c_str()));
00056
00057
00058 assert(samOut.WriteRecord(samHeader, samRecord));
00059 }
00060
00061
00062 assert(samIn.GetStatus() == SamStatus::NO_MORE_RECS);
00063 }
00064
00065
00066 void modifyFirstBaseLong()
00067 {
00068 SamFile samIn;
00069
00070 assert(samIn.OpenForRead("statusTests/HalfG.bam"));
00071
00072 SamFile samOut;
00073
00074 assert(samOut.OpenForWrite("results/updateSeq.bam"));
00075
00076
00077 SamFileHeader samHeader;
00078 assert(samIn.ReadHeader(samHeader));
00079
00080 assert(samOut.WriteHeader(samHeader));
00081
00082
00083 SamRecord samRecord;
00084
00085
00086 while(samIn.ReadRecord(samHeader, samRecord))
00087 {
00088
00089
00090 SamValidationErrors samValidationErrors;
00091 assert(SamValidator::isValid(samHeader, samRecord,
00092 samValidationErrors));
00093
00094
00095 const char* sequence = samRecord.getSequence();
00096 assert(strcmp(sequence, "") != 0);
00097 std::string upSeq = sequence;
00098 upSeq[0] = 'N';
00099 assert(samRecord.setSequence(upSeq.c_str()));
00100
00101
00102 assert(samOut.WriteRecord(samHeader, samRecord));
00103 }
00104
00105
00106 assert(samIn.GetStatus() == SamStatus::NO_MORE_RECS);
00107 }
00108
00109
00110 void testModifyVar()
00111 {
00112 #ifdef __ZLIB_AVAILABLE__
00113 modifyFirstBase();
00114 #endif
00115 modifyVar modTest;
00116 modTest.testModifyVar("testFiles/testSam.sam", true);
00117 modTest.testModifyVar("testFiles/testSam.sam", false);
00118 #ifdef __ZLIB_AVAILABLE__
00119 modTest.testModifyVar("testFiles/testBam.bam", true);
00120 modTest.testModifyVar("testFiles/testBam.bam", false);
00121 #endif
00122 }
00123
00124
00125 void modifyVar::testModifyVar(const char* filename, bool valBufFirst)
00126 {
00127 myFilename = filename;
00128 myValBufFirst = valBufFirst;
00129
00130 testModifyReadNameOnlySameLength();
00131 testModifyCigarOnlySameLength();
00132 testModifySequenceOnlySameLength();
00133 testModifyQualityOnlySameLength();
00134 testRemoveQuality();
00135 testShortenQuality();
00136 testLengthenQuality();
00137
00138 testShortenReadName();
00139 testShortenCigar();
00140 testShortenSequence();
00141
00142 testLengthenReadName();
00143 testLengthenCigar();
00144 testLengthenSequence();
00145
00146 testRemoveCigar();
00147 testRemoveSequence();
00148 testLengthenSequenceAndQuality();
00149 }
00150
00151
00152 void modifyVar::testModifyReadNameOnlySameLength()
00153 {
00154 resetExpected();
00155 openAndRead1Rec();
00156
00157
00158 expectedReadNameString = "1:1011:G:255+17M15D20M";
00159 samRecord.setReadName(expectedReadNameString.c_str());
00160
00161 validate();
00162 }
00163
00164
00165 void modifyVar::testModifyCigarOnlySameLength()
00166 {
00167 resetExpected();
00168 openAndRead1Rec();
00169
00170
00171 expectedCigarString = "3M2I";
00172 samRecord.setCigar(expectedCigarString.c_str());
00173
00174
00175
00176
00177 expectedCigarBufLen = 2;
00178 expectedCigarBuffer[0] = 0x30;
00179 expectedCigarBuffer[1] = 0x21;
00180
00181 validate();
00182 }
00183
00184
00185 void modifyVar::testModifySequenceOnlySameLength()
00186 {
00187 resetExpected();
00188 openAndRead1Rec();
00189
00190
00191 expectedSequenceString = "NCGAN";
00192 samRecord.setSequence(expectedSequenceString.c_str());
00193
00194
00195 expectedSequenceBuffer[0] = 0xF2;
00196 expectedSequenceBuffer[1] = 0x41;
00197 expectedSequenceBuffer[2] = 0xF0;
00198
00199 validate();
00200 }
00201
00202
00203 void modifyVar::testModifyQualityOnlySameLength()
00204 {
00205 resetExpected();
00206 openAndRead1Rec();
00207
00208
00209 expectedQualityString = "!>6+!";
00210 samRecord.setQuality(expectedQualityString.c_str());
00211
00212 validate();
00213 }
00214
00215
00216 void modifyVar::testRemoveQuality()
00217 {
00218 resetExpected();
00219 openAndRead1Rec();
00220
00221
00222
00223 expectedQualityString = "*";
00224 samRecord.setQuality(expectedQualityString.c_str());
00225
00226 validate();
00227 }
00228
00229
00230 void modifyVar::testShortenQuality()
00231 {
00232 resetExpected();
00233 openAndRead1Rec();
00234
00235
00236
00237 expectedQualityString = "!!";
00238 samRecord.setQuality(expectedQualityString.c_str());
00239
00240 validate();
00241 }
00242
00243
00244 void modifyVar::testLengthenQuality()
00245 {
00246 resetExpected();
00247 openAndRead1Rec();
00248
00249
00250
00251 expectedQualityString = "!!@@##";
00252 samRecord.setQuality(expectedQualityString.c_str());
00253
00254 validate();
00255 }
00256
00257
00258 void modifyVar::testShortenReadName()
00259 {
00260 resetExpected();
00261 openAndRead1Rec();
00262
00263
00264 expectedReadNameString = "1:1011:G:255";
00265 samRecord.setReadName(expectedReadNameString.c_str());
00266
00267 validate();
00268 }
00269
00270
00271 void modifyVar::testShortenCigar()
00272 {
00273 resetExpected();
00274 openAndRead1Rec();
00275
00276
00277 expectedCigarString = "5M";
00278 samRecord.setCigar(expectedCigarString.c_str());
00279
00280
00281
00282 expectedCigarBufLen = 1;
00283 expectedCigarBuffer[0] = 0x50;
00284
00285 validate();
00286 }
00287
00288
00289 void modifyVar::testShortenSequence()
00290 {
00291 resetExpected();
00292 openAndRead1Rec();
00293
00294
00295 expectedSequenceString = "CCGA";
00296 samRecord.setSequence(expectedSequenceString.c_str());
00297
00298
00299 expectedSequenceBuffer[0] = 0x22;
00300 expectedSequenceBuffer[1] = 0x41;
00301
00302 validate();
00303 }
00304
00305
00306 void modifyVar::testLengthenReadName()
00307 {
00308 resetExpected();
00309 openAndRead1Rec();
00310
00311
00312 expectedReadNameString = "1:1011:G:255+17M15D20M:1111111";
00313 samRecord.setReadName(expectedReadNameString.c_str());
00314
00315 validate();
00316 }
00317
00318
00319 void modifyVar::testLengthenCigar()
00320 {
00321 resetExpected();
00322 openAndRead1Rec();
00323
00324
00325 expectedCigarString = "3M2D2I";
00326 samRecord.setCigar(expectedCigarString.c_str());
00327
00328
00329
00330
00331
00332 expectedCigarBufLen = 3;
00333 expectedCigarBuffer[0] = 0x30;
00334 expectedCigarBuffer[1] = 0x22;
00335 expectedCigarBuffer[2] = 0x21;
00336
00337 validate();
00338 }
00339
00340
00341 void modifyVar::testLengthenSequence()
00342 {
00343 resetExpected();
00344 openAndRead1Rec();
00345
00346
00347 expectedSequenceString = "CCGAATT";
00348 samRecord.setSequence(expectedSequenceString.c_str());
00349
00350
00351 expectedSequenceBuffer[0] = 0x22;
00352 expectedSequenceBuffer[1] = 0x41;
00353 expectedSequenceBuffer[2] = 0x18;
00354 expectedSequenceBuffer[3] = 0x80;
00355
00356 validate();
00357 }
00358
00359
00360 void modifyVar::testRemoveCigar()
00361 {
00362 resetExpected();
00363 openAndRead1Rec();
00364
00365
00366 expectedCigarString = "*";
00367 expectedCigarBufLen = 0;
00368 samRecord.setCigar(expectedCigarString.c_str());
00369
00370 validate();
00371 }
00372
00373
00374 void modifyVar::testRemoveSequence()
00375 {
00376 resetExpected();
00377 openAndRead1Rec();
00378
00379
00380 expectedSequenceString = "*";
00381 samRecord.setSequence(expectedSequenceString.c_str());
00382
00383 validate();
00384 }
00385
00386
00387 void modifyVar::testLengthenSequenceAndQuality()
00388 {
00389 resetExpected();
00390 openAndRead1Rec();
00391
00392
00393 expectedSequenceString = "CCGAATT";
00394 expectedQualityString = "!@#$%^&";
00395 samRecord.setSequence(expectedSequenceString.c_str());
00396 samRecord.setQuality(expectedQualityString.c_str());
00397
00398
00399 expectedSequenceBuffer[0] = 0x22;
00400 expectedSequenceBuffer[1] = 0x41;
00401 expectedSequenceBuffer[2] = 0x18;
00402 expectedSequenceBuffer[3] = 0x80;
00403
00404 validate();
00405 }
00406
00407
00408 void modifyVar::validate()
00409 {
00410 if(myValBufFirst)
00411 {
00412
00413 const bamRecordStruct* recordBuffer =
00414 (const bamRecordStruct*)samRecord.getRecordBuffer();
00415
00416
00417 validateReadName(recordBuffer);
00418 validateCigar(recordBuffer);
00419 validateSequence(recordBuffer);
00420 validateQuality(recordBuffer);
00421 validateTags(recordBuffer);
00422
00423
00424 validateReadNameString();
00425 validateCigarString();
00426 validateSequenceString();
00427 validateQualityString();
00428 validateTagsString();
00429 }
00430 else
00431 {
00432
00433 const bamRecordStruct* recordBuffer =
00434 (const bamRecordStruct*)samRecord.getRecordBuffer();
00435
00436
00437 validateReadName(recordBuffer);
00438 validateCigar(recordBuffer);
00439 validateSequence(recordBuffer);
00440 validateQuality(recordBuffer);
00441 validateTags(recordBuffer);
00442
00443
00444 validateReadNameString();
00445 validateCigarString();
00446 validateSequenceString();
00447 validateQualityString();
00448 validateTagsString();
00449 }
00450 }
00451
00452 void modifyVar::validateReadName(const bamRecordStruct* recordBuffer)
00453 {
00454 const char* varPtr = (const char*)&(recordBuffer->myData);
00455
00456 unsigned int len = expectedReadNameString.length();
00457 for(unsigned int i = 0; i < len; i++)
00458 {
00459 assert(varPtr[i] == expectedReadNameString[i]);
00460 }
00461
00462
00463 assert(varPtr[len] == 0);
00464
00465
00466 assert(recordBuffer->myReadNameLength ==
00467 expectedReadNameString.length() + 1);
00468 }
00469
00470
00471 void modifyVar::validateCigar(const bamRecordStruct* recordBuffer)
00472 {
00473 const unsigned char* cigarStart =
00474 (const unsigned char*)&(recordBuffer->myData)
00475 + recordBuffer->myReadNameLength;
00476
00477 unsigned int* varPtr = (unsigned int*)cigarStart;
00478
00479 for(int i = 0; i < expectedCigarBufLen; i++)
00480 {
00481 assert(varPtr[i] == expectedCigarBuffer[i]);
00482 }
00483
00484 assert(recordBuffer->myCigarLength == expectedCigarBufLen);
00485 }
00486
00487
00488 void modifyVar::validateSequence(const bamRecordStruct* recordBuffer)
00489 {
00490
00491 int expectedReadLen = expectedSequenceString.length();
00492 int seqLen = (expectedReadLen + 1)/2;
00493 if(expectedSequenceString == "*")
00494 {
00495 expectedReadLen = 0;
00496 seqLen = 0;
00497 }
00498
00499 const unsigned char* sequencePtr =
00500 (const unsigned char*)&(recordBuffer->myData)
00501 + recordBuffer->myReadNameLength + (recordBuffer->myCigarLength * 4);
00502
00503 for(int i = 0; i < seqLen; i++)
00504 {
00505 assert(sequencePtr[i] == expectedSequenceBuffer[i]);
00506 }
00507
00508 assert(recordBuffer->myReadLength == expectedReadLen);
00509 }
00510
00511
00512 void modifyVar::validateQuality(const bamRecordStruct* recordBuffer)
00513 {
00514 int expectedReadLen = expectedSequenceString.length();
00515 int seqLen = (expectedReadLen + 1)/2;
00516 if(expectedSequenceString == "*")
00517 {
00518 expectedReadLen = 0;
00519 seqLen = 0;
00520 }
00521
00522 const uint8_t* qualityPtr =
00523 (const unsigned char*)&(recordBuffer->myData)
00524 + recordBuffer->myReadNameLength + (recordBuffer->myCigarLength * 4)
00525 + seqLen;
00526
00527 int qualityLen = expectedQualityString.length();
00528
00529 for(int i = 0; i < expectedReadLen; i++)
00530 {
00531 if(expectedQualityString == "*")
00532 {
00533
00534 assert(qualityPtr[i] == 0xFF);
00535 }
00536 else if(i >= qualityLen)
00537 {
00538
00539
00540 assert(qualityPtr[i] == 0xFF);
00541 }
00542 else
00543 {
00544 assert(qualityPtr[i] == (expectedQualityString[i] - 33));
00545 }
00546 }
00547
00548 assert(recordBuffer->myReadLength == expectedReadLen);
00549 }
00550
00551
00552 void modifyVar::validateTags(const bamRecordStruct* recordBuffer)
00553 {
00554 const unsigned char* tagsPtr =
00555 (const unsigned char*)&(recordBuffer->myData)
00556 + recordBuffer->myReadNameLength + (recordBuffer->myCigarLength * 4)
00557 + (recordBuffer->myReadLength + 1)/2 + recordBuffer->myReadLength;
00558
00559 for(int i = 0; i < expectedTagsLen; i++)
00560 {
00561 assert(tagsPtr[i] == expectedTagsBuffer[i]);
00562 }
00563
00564
00565
00566
00567 int32_t expectedBlockSize = tagsPtr - (const unsigned char*)(recordBuffer)
00568 + expectedTagsLen - 4;
00569 assert(recordBuffer->myBlockSize == expectedBlockSize);
00570 }
00571
00572
00573 void modifyVar::validateTagsString()
00574 {
00575 char tag[3];
00576 char type;
00577 void* value;
00578 assert(samRecord.getNextSamTag(tag, type, &value) == true);
00579 assert(tag[0] == 'A');
00580 assert(tag[1] == 'M');
00581 assert(type == 'i');
00582 assert(*(char*)value == 0);
00583 assert(samRecord.getNextSamTag(tag, type, &value) == true);
00584 assert(tag[0] == 'M');
00585 assert(tag[1] == 'D');
00586 assert(type == 'Z');
00587 assert(*(String*)value == "37");
00588 assert(samRecord.getNextSamTag(tag, type, &value) == true);
00589 assert(tag[0] == 'N');
00590 assert(tag[1] == 'M');
00591 assert(type == 'i');
00592 assert(*(char*)value == 0);
00593 assert(samRecord.getNextSamTag(tag, type, &value) == true);
00594 assert(tag[0] == 'X');
00595 assert(tag[1] == 'T');
00596 assert(type == 'A');
00597 assert(*(char*)value == 'R');
00598
00599 assert(samRecord.getNextSamTag(tag, type, &value) == false);
00600 assert(samRecord.getNextSamTag(tag, type, &value) == false);
00601 }
00602
00603 void modifyVar::validateReadNameString()
00604 {
00605 assert(samRecord.getReadName() == expectedReadNameString);
00606 }
00607
00608 void modifyVar::validateCigarString()
00609 {
00610 assert(samRecord.getCigar() == expectedCigarString);
00611 }
00612
00613 void modifyVar::validateSequenceString()
00614 {
00615 assert(samRecord.getSequence() == expectedSequenceString);
00616 }
00617
00618 void modifyVar::validateQualityString()
00619 {
00620 assert(samRecord.getQuality() == expectedQualityString);
00621 }
00622
00623
00624 void modifyVar::resetExpected()
00625 {
00626 expectedReadNameString = "1:1011:F:255+17M15D20M";
00627 expectedCigarString = "5M2D";
00628 expectedSequenceString = "CCGAA";
00629 expectedQualityString = "6>6+4";
00630
00631
00632
00633
00634 expectedCigarBufLen = 2;
00635 expectedCigarBuffer[0] = 0x50;
00636 expectedCigarBuffer[1] = 0x22;
00637
00638
00639 expectedSequenceBuffer[0] = 0x22;
00640 expectedSequenceBuffer[1] = 0x41;
00641 expectedSequenceBuffer[2] = 0x10;
00642
00643 expectedTagsLen = 18;
00644 expectedTagsBuffer[0] = 'A';
00645 expectedTagsBuffer[1] = 'M';
00646 expectedTagsBuffer[2] = 'C';
00647 expectedTagsBuffer[3] = 0;
00648 expectedTagsBuffer[4] = 'M';
00649 expectedTagsBuffer[5] = 'D';
00650 expectedTagsBuffer[6] = 'Z';
00651 expectedTagsBuffer[7] = '3';
00652 expectedTagsBuffer[8] = '7';
00653 expectedTagsBuffer[9] = 0;
00654 expectedTagsBuffer[10] = 'N';
00655 expectedTagsBuffer[11] = 'M';
00656 expectedTagsBuffer[12] = 'C';
00657 expectedTagsBuffer[13] = 0;
00658 expectedTagsBuffer[14] = 'X';
00659 expectedTagsBuffer[15] = 'T';
00660 expectedTagsBuffer[16] = 'A';
00661 expectedTagsBuffer[17] = 'R';
00662 }
00663
00664
00665 void modifyVar::openAndRead1Rec()
00666 {
00667
00668 assert(samIn.OpenForRead(myFilename));
00669
00670
00671 assert(samIn.ReadHeader(samHeader));
00672
00673
00674 assert(samIn.ReadRecord(samHeader, samRecord));
00675 }