ModifyVar.cpp
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "SamFile.h"
00019 #include "ModifyVar.h"
00020 #include "SamValidation.h"
00021
00022 void modifyFirstBase()
00023 {
00024 SamFile samIn;
00025
00026 assert(samIn.OpenForRead("testFiles/testVar.bam"));
00027
00028 SamFile samOut;
00029
00030 assert(samOut.OpenForWrite("results/updateVar.bam"));
00031
00032
00033 SamFileHeader samHeader;
00034 assert(samIn.ReadHeader(samHeader));
00035
00036 assert(samOut.WriteHeader(samHeader));
00037
00038
00039 SamRecord samRecord;
00040
00041
00042 while(samIn.ReadRecord(samHeader, samRecord))
00043 {
00044
00045
00046 SamValidationErrors samValidationErrors;
00047 assert(SamValidator::isValid(samHeader, samRecord,
00048 samValidationErrors));
00049
00050
00051 const char* sequence = samRecord.getSequence();
00052 assert(strcmp(sequence, "") != 0);
00053 std::string upSeq = sequence;
00054 upSeq[0] = 'N';
00055 assert(samRecord.setSequence(upSeq.c_str()));
00056
00057
00058 assert(samOut.WriteRecord(samHeader, samRecord));
00059 }
00060
00061
00062 assert(samIn.GetStatus() == SamStatus::NO_MORE_RECS);
00063 }
00064
00065
00066 void modifyFirstBaseLong()
00067 {
00068 SamFile samIn;
00069
00070 assert(samIn.OpenForRead("statusTests/HalfG.bam"));
00071
00072 SamFile samOut;
00073
00074 assert(samOut.OpenForWrite("results/updateSeq.bam"));
00075
00076
00077 SamFileHeader samHeader;
00078 assert(samIn.ReadHeader(samHeader));
00079
00080 assert(samOut.WriteHeader(samHeader));
00081
00082
00083 SamRecord samRecord;
00084
00085
00086 while(samIn.ReadRecord(samHeader, samRecord))
00087 {
00088
00089
00090 SamValidationErrors samValidationErrors;
00091 assert(SamValidator::isValid(samHeader, samRecord,
00092 samValidationErrors));
00093
00094
00095 const char* sequence = samRecord.getSequence();
00096 assert(strcmp(sequence, "") != 0);
00097 std::string upSeq = sequence;
00098 upSeq[0] = 'N';
00099 assert(samRecord.setSequence(upSeq.c_str()));
00100
00101
00102 assert(samOut.WriteRecord(samHeader, samRecord));
00103 }
00104
00105
00106 assert(samIn.GetStatus() == SamStatus::NO_MORE_RECS);
00107 }
00108
00109
00110 void testModifyVar()
00111 {
00112 modifyFirstBase();
00113 modifyVar modTest;
00114 modTest.testModifyVar("testFiles/testSam.sam", true);
00115 modTest.testModifyVar("testFiles/testBam.bam", true);
00116 modTest.testModifyVar("testFiles/testSam.sam", false);
00117 modTest.testModifyVar("testFiles/testBam.bam", false);
00118 }
00119
00120
00121 void modifyVar::testModifyVar(const char* filename, bool valBufFirst)
00122 {
00123 myFilename = filename;
00124 myValBufFirst = valBufFirst;
00125
00126 testModifyReadNameOnlySameLength();
00127 testModifyCigarOnlySameLength();
00128 testModifySequenceOnlySameLength();
00129 testModifyQualityOnlySameLength();
00130 testRemoveQuality();
00131 testShortenQuality();
00132 testLengthenQuality();
00133
00134 testShortenReadName();
00135 testShortenCigar();
00136 testShortenSequence();
00137
00138 testLengthenReadName();
00139 testLengthenCigar();
00140 testLengthenSequence();
00141
00142 testRemoveCigar();
00143 testRemoveSequence();
00144 testLengthenSequenceAndQuality();
00145 }
00146
00147
00148 void modifyVar::testModifyReadNameOnlySameLength()
00149 {
00150 resetExpected();
00151 openAndRead1Rec();
00152
00153
00154 expectedReadNameString = "1:1011:G:255+17M15D20M";
00155 samRecord.setReadName(expectedReadNameString.c_str());
00156
00157 validate();
00158 }
00159
00160
00161 void modifyVar::testModifyCigarOnlySameLength()
00162 {
00163 resetExpected();
00164 openAndRead1Rec();
00165
00166
00167 expectedCigarString = "3M2I";
00168 samRecord.setCigar(expectedCigarString.c_str());
00169
00170
00171
00172
00173 expectedCigarBufLen = 2;
00174 expectedCigarBuffer[0] = 0x30;
00175 expectedCigarBuffer[1] = 0x21;
00176
00177 validate();
00178 }
00179
00180
00181 void modifyVar::testModifySequenceOnlySameLength()
00182 {
00183 resetExpected();
00184 openAndRead1Rec();
00185
00186
00187 expectedSequenceString = "NCGAN";
00188 samRecord.setSequence(expectedSequenceString.c_str());
00189
00190
00191 expectedSequenceBuffer[0] = 0xF2;
00192 expectedSequenceBuffer[1] = 0x41;
00193 expectedSequenceBuffer[2] = 0xF0;
00194
00195 validate();
00196 }
00197
00198
00199 void modifyVar::testModifyQualityOnlySameLength()
00200 {
00201 resetExpected();
00202 openAndRead1Rec();
00203
00204
00205 expectedQualityString = "!>6+!";
00206 samRecord.setQuality(expectedQualityString.c_str());
00207
00208 validate();
00209 }
00210
00211
00212 void modifyVar::testRemoveQuality()
00213 {
00214 resetExpected();
00215 openAndRead1Rec();
00216
00217
00218
00219 expectedQualityString = "*";
00220 samRecord.setQuality(expectedQualityString.c_str());
00221
00222 validate();
00223 }
00224
00225
00226 void modifyVar::testShortenQuality()
00227 {
00228 resetExpected();
00229 openAndRead1Rec();
00230
00231
00232
00233 expectedQualityString = "!!";
00234 samRecord.setQuality(expectedQualityString.c_str());
00235
00236 validate();
00237 }
00238
00239
00240 void modifyVar::testLengthenQuality()
00241 {
00242 resetExpected();
00243 openAndRead1Rec();
00244
00245
00246
00247 expectedQualityString = "!!@@##";
00248 samRecord.setQuality(expectedQualityString.c_str());
00249
00250 validate();
00251 }
00252
00253
00254 void modifyVar::testShortenReadName()
00255 {
00256 resetExpected();
00257 openAndRead1Rec();
00258
00259
00260 expectedReadNameString = "1:1011:G:255";
00261 samRecord.setReadName(expectedReadNameString.c_str());
00262
00263 validate();
00264 }
00265
00266
00267 void modifyVar::testShortenCigar()
00268 {
00269 resetExpected();
00270 openAndRead1Rec();
00271
00272
00273 expectedCigarString = "5M";
00274 samRecord.setCigar(expectedCigarString.c_str());
00275
00276
00277
00278 expectedCigarBufLen = 1;
00279 expectedCigarBuffer[0] = 0x50;
00280
00281 validate();
00282 }
00283
00284
00285 void modifyVar::testShortenSequence()
00286 {
00287 resetExpected();
00288 openAndRead1Rec();
00289
00290
00291 expectedSequenceString = "CCGA";
00292 samRecord.setSequence(expectedSequenceString.c_str());
00293
00294
00295 expectedSequenceBuffer[0] = 0x22;
00296 expectedSequenceBuffer[1] = 0x41;
00297
00298 validate();
00299 }
00300
00301
00302 void modifyVar::testLengthenReadName()
00303 {
00304 resetExpected();
00305 openAndRead1Rec();
00306
00307
00308 expectedReadNameString = "1:1011:G:255+17M15D20M:1111111";
00309 samRecord.setReadName(expectedReadNameString.c_str());
00310
00311 validate();
00312 }
00313
00314
00315 void modifyVar::testLengthenCigar()
00316 {
00317 resetExpected();
00318 openAndRead1Rec();
00319
00320
00321 expectedCigarString = "3M2D2I";
00322 samRecord.setCigar(expectedCigarString.c_str());
00323
00324
00325
00326
00327
00328 expectedCigarBufLen = 3;
00329 expectedCigarBuffer[0] = 0x30;
00330 expectedCigarBuffer[1] = 0x22;
00331 expectedCigarBuffer[2] = 0x21;
00332
00333 validate();
00334 }
00335
00336
00337 void modifyVar::testLengthenSequence()
00338 {
00339 resetExpected();
00340 openAndRead1Rec();
00341
00342
00343 expectedSequenceString = "CCGAATT";
00344 samRecord.setSequence(expectedSequenceString.c_str());
00345
00346
00347 expectedSequenceBuffer[0] = 0x22;
00348 expectedSequenceBuffer[1] = 0x41;
00349 expectedSequenceBuffer[2] = 0x18;
00350 expectedSequenceBuffer[3] = 0x80;
00351
00352 validate();
00353 }
00354
00355
00356 void modifyVar::testRemoveCigar()
00357 {
00358 resetExpected();
00359 openAndRead1Rec();
00360
00361
00362 expectedCigarString = "*";
00363 expectedCigarBufLen = 0;
00364 samRecord.setCigar(expectedCigarString.c_str());
00365
00366 validate();
00367 }
00368
00369
00370 void modifyVar::testRemoveSequence()
00371 {
00372 resetExpected();
00373 openAndRead1Rec();
00374
00375
00376 expectedSequenceString = "*";
00377 samRecord.setSequence(expectedSequenceString.c_str());
00378
00379 validate();
00380 }
00381
00382
00383 void modifyVar::testLengthenSequenceAndQuality()
00384 {
00385 resetExpected();
00386 openAndRead1Rec();
00387
00388
00389 expectedSequenceString = "CCGAATT";
00390 expectedQualityString = "!@#$%^&";
00391 samRecord.setSequence(expectedSequenceString.c_str());
00392 samRecord.setQuality(expectedQualityString.c_str());
00393
00394
00395 expectedSequenceBuffer[0] = 0x22;
00396 expectedSequenceBuffer[1] = 0x41;
00397 expectedSequenceBuffer[2] = 0x18;
00398 expectedSequenceBuffer[3] = 0x80;
00399
00400 validate();
00401 }
00402
00403
00404 void modifyVar::validate()
00405 {
00406 if(myValBufFirst)
00407 {
00408
00409 const bamRecordStruct* recordBuffer =
00410 (const bamRecordStruct*)samRecord.getRecordBuffer();
00411
00412
00413 validateReadName(recordBuffer);
00414 validateCigar(recordBuffer);
00415 validateSequence(recordBuffer);
00416 validateQuality(recordBuffer);
00417 validateTags(recordBuffer);
00418
00419
00420 validateReadNameString();
00421 validateCigarString();
00422 validateSequenceString();
00423 validateQualityString();
00424 validateTagsString();
00425 }
00426 else
00427 {
00428
00429 const bamRecordStruct* recordBuffer =
00430 (const bamRecordStruct*)samRecord.getRecordBuffer();
00431
00432
00433 validateReadName(recordBuffer);
00434 validateCigar(recordBuffer);
00435 validateSequence(recordBuffer);
00436 validateQuality(recordBuffer);
00437 validateTags(recordBuffer);
00438
00439
00440 validateReadNameString();
00441 validateCigarString();
00442 validateSequenceString();
00443 validateQualityString();
00444 validateTagsString();
00445 }
00446 }
00447
00448 void modifyVar::validateReadName(const bamRecordStruct* recordBuffer)
00449 {
00450 const char* varPtr = (const char*)&(recordBuffer->myData);
00451
00452 unsigned int len = expectedReadNameString.length();
00453 for(unsigned int i = 0; i < len; i++)
00454 {
00455 assert(varPtr[i] == expectedReadNameString[i]);
00456 }
00457
00458
00459 assert(varPtr[len] == 0);
00460
00461
00462 assert(recordBuffer->myReadNameLength ==
00463 expectedReadNameString.length() + 1);
00464 }
00465
00466
00467 void modifyVar::validateCigar(const bamRecordStruct* recordBuffer)
00468 {
00469 const unsigned char* cigarStart =
00470 (const unsigned char*)&(recordBuffer->myData)
00471 + recordBuffer->myReadNameLength;
00472
00473 unsigned int* varPtr = (unsigned int*)cigarStart;
00474
00475 for(int i = 0; i < expectedCigarBufLen; i++)
00476 {
00477 assert(varPtr[i] == expectedCigarBuffer[i]);
00478 }
00479
00480 assert(recordBuffer->myCigarLength == expectedCigarBufLen);
00481 }
00482
00483
00484 void modifyVar::validateSequence(const bamRecordStruct* recordBuffer)
00485 {
00486
00487 int expectedReadLen = expectedSequenceString.length();
00488 int seqLen = (expectedReadLen + 1)/2;
00489 if(expectedSequenceString == "*")
00490 {
00491 expectedReadLen = 0;
00492 seqLen = 0;
00493 }
00494
00495 const unsigned char* sequencePtr =
00496 (const unsigned char*)&(recordBuffer->myData)
00497 + recordBuffer->myReadNameLength + (recordBuffer->myCigarLength * 4);
00498
00499 for(int i = 0; i < seqLen; i++)
00500 {
00501 assert(sequencePtr[i] == expectedSequenceBuffer[i]);
00502 }
00503
00504 assert(recordBuffer->myReadLength == expectedReadLen);
00505 }
00506
00507
00508 void modifyVar::validateQuality(const bamRecordStruct* recordBuffer)
00509 {
00510 int expectedReadLen = expectedSequenceString.length();
00511 int seqLen = (expectedReadLen + 1)/2;
00512 if(expectedSequenceString == "*")
00513 {
00514 expectedReadLen = 0;
00515 seqLen = 0;
00516 }
00517
00518 const uint8_t* qualityPtr =
00519 (const unsigned char*)&(recordBuffer->myData)
00520 + recordBuffer->myReadNameLength + (recordBuffer->myCigarLength * 4)
00521 + seqLen;
00522
00523 int qualityLen = expectedQualityString.length();
00524
00525 for(int i = 0; i < expectedReadLen; i++)
00526 {
00527 if(expectedQualityString == "*")
00528 {
00529
00530 assert(qualityPtr[i] == 0xFF);
00531 }
00532 else if(i >= qualityLen)
00533 {
00534
00535
00536 assert(qualityPtr[i] == 0xFF);
00537 }
00538 else
00539 {
00540 assert(qualityPtr[i] == (expectedQualityString[i] - 33));
00541 }
00542 }
00543
00544 assert(recordBuffer->myReadLength == expectedReadLen);
00545 }
00546
00547
00548 void modifyVar::validateTags(const bamRecordStruct* recordBuffer)
00549 {
00550 const unsigned char* tagsPtr =
00551 (const unsigned char*)&(recordBuffer->myData)
00552 + recordBuffer->myReadNameLength + (recordBuffer->myCigarLength * 4)
00553 + (recordBuffer->myReadLength + 1)/2 + recordBuffer->myReadLength;
00554
00555 for(int i = 0; i < expectedTagsLen; i++)
00556 {
00557 assert(tagsPtr[i] == expectedTagsBuffer[i]);
00558 }
00559
00560
00561
00562
00563 int32_t expectedBlockSize = tagsPtr - (const unsigned char*)(recordBuffer)
00564 + expectedTagsLen - 4;
00565 assert(recordBuffer->myBlockSize == expectedBlockSize);
00566 }
00567
00568
00569 void modifyVar::validateTagsString()
00570 {
00571 char tag[3];
00572 char type;
00573 void* value;
00574 assert(samRecord.getNextSamTag(tag, type, &value) == true);
00575 assert(tag[0] == 'A');
00576 assert(tag[1] == 'M');
00577 assert(type == 'i');
00578 assert(*(char*)value == 0);
00579 assert(samRecord.getNextSamTag(tag, type, &value) == true);
00580 assert(tag[0] == 'M');
00581 assert(tag[1] == 'D');
00582 assert(type == 'Z');
00583 assert(*(String*)value == "37");
00584 assert(samRecord.getNextSamTag(tag, type, &value) == true);
00585 assert(tag[0] == 'N');
00586 assert(tag[1] == 'M');
00587 assert(type == 'i');
00588 assert(*(char*)value == 0);
00589 assert(samRecord.getNextSamTag(tag, type, &value) == true);
00590 assert(tag[0] == 'X');
00591 assert(tag[1] == 'T');
00592 assert(type == 'A');
00593 assert(*(char*)value == 'R');
00594
00595 assert(samRecord.getNextSamTag(tag, type, &value) == false);
00596 assert(samRecord.getNextSamTag(tag, type, &value) == false);
00597 }
00598
00599 void modifyVar::validateReadNameString()
00600 {
00601 assert(samRecord.getReadName() == expectedReadNameString);
00602 }
00603
00604 void modifyVar::validateCigarString()
00605 {
00606 assert(samRecord.getCigar() == expectedCigarString);
00607 }
00608
00609 void modifyVar::validateSequenceString()
00610 {
00611 assert(samRecord.getSequence() == expectedSequenceString);
00612 }
00613
00614 void modifyVar::validateQualityString()
00615 {
00616 assert(samRecord.getQuality() == expectedQualityString);
00617 }
00618
00619
00620 void modifyVar::resetExpected()
00621 {
00622 expectedReadNameString = "1:1011:F:255+17M15D20M";
00623 expectedCigarString = "5M2D";
00624 expectedSequenceString = "CCGAA";
00625 expectedQualityString = "6>6+4";
00626
00627
00628
00629
00630 expectedCigarBufLen = 2;
00631 expectedCigarBuffer[0] = 0x50;
00632 expectedCigarBuffer[1] = 0x22;
00633
00634
00635 expectedSequenceBuffer[0] = 0x22;
00636 expectedSequenceBuffer[1] = 0x41;
00637 expectedSequenceBuffer[2] = 0x10;
00638
00639 expectedTagsLen = 18;
00640 expectedTagsBuffer[0] = 'A';
00641 expectedTagsBuffer[1] = 'M';
00642 expectedTagsBuffer[2] = 'C';
00643 expectedTagsBuffer[3] = 0;
00644 expectedTagsBuffer[4] = 'M';
00645 expectedTagsBuffer[5] = 'D';
00646 expectedTagsBuffer[6] = 'Z';
00647 expectedTagsBuffer[7] = '3';
00648 expectedTagsBuffer[8] = '7';
00649 expectedTagsBuffer[9] = 0;
00650 expectedTagsBuffer[10] = 'N';
00651 expectedTagsBuffer[11] = 'M';
00652 expectedTagsBuffer[12] = 'C';
00653 expectedTagsBuffer[13] = 0;
00654 expectedTagsBuffer[14] = 'X';
00655 expectedTagsBuffer[15] = 'T';
00656 expectedTagsBuffer[16] = 'A';
00657 expectedTagsBuffer[17] = 'R';
00658 }
00659
00660
00661 void modifyVar::openAndRead1Rec()
00662 {
00663
00664 assert(samIn.OpenForRead(myFilename));
00665
00666
00667 assert(samIn.ReadHeader(samHeader));
00668
00669
00670 assert(samIn.ReadRecord(samHeader, samRecord));
00671 }