BamIndexTest.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "BamIndex.h"
00019 #include "Validate.h"
00020 #include "BamIndexTest.h"
00021 
00022 #include <assert.h>
00023 
00024 void testIndex(BamIndex& bamIndex)
00025 {
00026     assert(bamIndex.getNumMappedReads(1) == 2);
00027     assert(bamIndex.getNumUnMappedReads(1) == 0);
00028     assert(bamIndex.getNumMappedReads(0) == 4);
00029     assert(bamIndex.getNumUnMappedReads(0) == 1);
00030     assert(bamIndex.getNumMappedReads(23) == -1);
00031     assert(bamIndex.getNumUnMappedReads(23) == -1);
00032     assert(bamIndex.getNumMappedReads(-1) == 0);
00033     assert(bamIndex.getNumUnMappedReads(-1) == 2);
00034     assert(bamIndex.getNumMappedReads(-2) == -1);
00035     assert(bamIndex.getNumUnMappedReads(-2) == -1);
00036     assert(bamIndex.getNumMappedReads(22) == 0);
00037     assert(bamIndex.getNumUnMappedReads(22) == 0);
00038 
00039     // Get the chunks for reference id 1.
00040     Chunk testChunk;
00041     SortedChunkList chunkList;
00042     assert(bamIndex.getChunksForRegion(1, -1, -1, chunkList) == true);
00043     assert(!chunkList.empty());
00044     testChunk = chunkList.pop();
00045     assert(chunkList.empty());
00046     assert(testChunk.chunk_beg == 0x4e7);
00047     assert(testChunk.chunk_end == 0x599);
00048 
00049     // Get the chunks for reference id 0.
00050     assert(bamIndex.getChunksForRegion(0, -1, -1, chunkList) == true);
00051     assert(!chunkList.empty());
00052     testChunk = chunkList.pop();
00053     assert(chunkList.empty());
00054     assert(testChunk.chunk_beg == 0x360);
00055     assert(testChunk.chunk_end == 0x4e7);
00056 
00057 
00058     // Get the chunks for reference id 2.
00059     assert(bamIndex.getChunksForRegion(2, -1, -1, chunkList) == true);
00060     assert(!chunkList.empty());
00061     testChunk = chunkList.pop();
00062     assert(chunkList.empty());
00063     assert(testChunk.chunk_beg == 0x599);
00064     assert(testChunk.chunk_end == 0x5ea);
00065 
00066     // Get the chunks for reference id 3.
00067     // There isn't one for this ref id, but still successfully read the file,
00068     // so it should return true, but the list should be empty.
00069     assert(bamIndex.getChunksForRegion(3, -1, -1, chunkList) == true);
00070     assert(chunkList.empty());
00071 
00072     // Test reading an indexed bam file.
00073     SamFile inFile;
00074     assert(inFile.OpenForRead("testFiles/sortedBam.bam"));
00075     inFile.setSortedValidation(SamFile::COORDINATE);
00076     assert(inFile.ReadBamIndex("testFiles/sortedBam.bam.bai"));
00077     SamFileHeader samHeader;
00078     assert(inFile.ReadHeader(samHeader));
00079     SamRecord samRecord;
00080 
00081     // Test getting num mapped/unmapped reads.
00082     assert(inFile.getNumMappedReadsFromIndex(1) == 2);
00083     assert(inFile.getNumUnMappedReadsFromIndex(1) == 0);
00084     assert(inFile.getNumMappedReadsFromIndex(0) == 4);
00085     assert(inFile.getNumUnMappedReadsFromIndex(0) == 1);
00086     assert(inFile.getNumMappedReadsFromIndex(23) == -1);
00087     assert(inFile.getNumUnMappedReadsFromIndex(23) == -1);
00088     assert(inFile.getNumMappedReadsFromIndex(-1) == 0);
00089     assert(inFile.getNumUnMappedReadsFromIndex(-1) == 2);
00090     assert(inFile.getNumMappedReadsFromIndex(-2) == -1);
00091     assert(inFile.getNumUnMappedReadsFromIndex(-2) == -1);
00092     assert(inFile.getNumMappedReadsFromIndex(22) == 0);
00093     assert(inFile.getNumUnMappedReadsFromIndex(22) == 0);
00094 
00095     assert(inFile.getNumMappedReadsFromIndex("2", samHeader) == 2);
00096     assert(inFile.getNumUnMappedReadsFromIndex("2", samHeader) == 0);
00097     assert(inFile.getNumMappedReadsFromIndex("1", samHeader) == 4);
00098     assert(inFile.getNumUnMappedReadsFromIndex("1", samHeader) == 1);
00099     assert(inFile.getNumMappedReadsFromIndex("22", samHeader) == 0);
00100     assert(inFile.getNumUnMappedReadsFromIndex("22", samHeader) == 0);
00101     assert(inFile.getNumMappedReadsFromIndex("", samHeader) == 0);
00102     assert(inFile.getNumUnMappedReadsFromIndex("*", samHeader) == 2);
00103     assert(inFile.getNumMappedReadsFromIndex("unknown", samHeader) == -1);
00104     assert(inFile.getNumUnMappedReadsFromIndex("unknown", samHeader) == -1);
00105     assert(inFile.getNumMappedReadsFromIndex("X", samHeader) == 0);
00106     assert(inFile.getNumUnMappedReadsFromIndex("X", samHeader) == 0);
00107 
00108    // Set the read section saying the reads must be fully enclosed in the section.
00109     assert(inFile.SetReadSection("1", 1010, 1011, false));
00110     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00111 
00112     assert(inFile.SetReadSection("1", 1011, 1012, false));
00113     assert(inFile.ReadRecord(samHeader, samRecord));
00114     validateRead2(samRecord);
00115     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00116 
00117     // Section -1 = Ref *: 2 records (8 & 10 from testSam.sam that is reflected
00118     // in the validation.
00119     assert(inFile.SetReadSection(-1));
00120     assert(inFile.ReadRecord(samHeader, samRecord));
00121     validateRead8(samRecord);
00122     assert(inFile.ReadRecord(samHeader, samRecord));
00123     validateRead10(samRecord);
00124     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00125 
00126     // Section 2 = Ref 3: 1 records (9 from testSam.sam that is reflected
00127     // in the validation.
00128     assert(inFile.SetReadSection(2));
00129     assert(inFile.ReadRecord(samHeader, samRecord));
00130     validateRead9(samRecord);
00131     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00132 
00133     // Section 0 = Ref 1: 5 records (3, 4, 1, 2, & 6 from testSam.sam that is
00134     // reflected in the validation.
00135     assert(inFile.SetReadSection(0));
00136     assert(inFile.ReadRecord(samHeader, samRecord));
00137     validateRead3(samRecord);
00138     assert(inFile.ReadRecord(samHeader, samRecord));
00139     validateRead4(samRecord);
00140     assert(inFile.ReadRecord(samHeader, samRecord));
00141     validateRead1(samRecord);
00142     assert(inFile.ReadRecord(samHeader, samRecord));
00143     validateRead2(samRecord);
00144     assert(inFile.ReadRecord(samHeader, samRecord));
00145     validateRead6(samRecord);
00146     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00147 
00148     // Section 1 = Ref 2: 2 records (5 & 7 from testSam.sam that is reflected
00149     // in the validation.
00150     assert(inFile.SetReadSection(1));
00151     assert(inFile.ReadRecord(samHeader, samRecord));
00152     validateRead5(samRecord);
00153     assert(inFile.ReadRecord(samHeader, samRecord));
00154     validateRead7(samRecord);
00155     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00156 
00157     // Section 3 to 22 (ref 4 - 23): 0 records.
00158     for(int i = 3; i < 23; i++)
00159     {
00160         assert(inFile.SetReadSection(i));
00161         assert(inFile.ReadRecord(samHeader, samRecord) == false);
00162     }
00163 
00164 
00165     // Set the read section.
00166     assert(inFile.SetReadSection("1", 1010, 1012));
00167     assert(inFile.ReadRecord(samHeader, samRecord));
00168     validateRead1(samRecord);
00169     assert(inFile.GetNumOverlaps(samRecord) == 2);
00170     assert(samRecord.getNumOverlaps(1010, 1012) == 2);
00171     assert(samRecord.getNumOverlaps(1010, 1020) == 5);
00172     assert(samRecord.getNumOverlaps(1010, 1011) == 1);
00173     assert(samRecord.getNumOverlaps(1011, 1012) == 1);
00174     assert(inFile.ReadRecord(samHeader, samRecord));
00175     validateRead2(samRecord);
00176     assert(inFile.GetNumOverlaps(samRecord) == 0);
00177     assert(samRecord.getNumOverlaps(1010, 1012) == 0);
00178     assert(samRecord.getNumOverlaps(1010, 1020) == 0);
00179     assert(samRecord.getNumOverlaps(1010, 1011) == 0);
00180     assert(samRecord.getNumOverlaps(1011, 1012) == 0);
00181     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00182            
00183     assert(inFile.SetReadSection("1", 1010, 1020));
00184     assert(inFile.ReadRecord(samHeader, samRecord));
00185     validateRead1(samRecord);
00186     assert(inFile.GetNumOverlaps(samRecord) == 5);
00187     assert(samRecord.getNumOverlaps(1010, 1012) == 2);
00188     assert(samRecord.getNumOverlaps(1010, 1020) == 5);
00189     assert(samRecord.getNumOverlaps(1010, 1011) == 1);
00190     assert(samRecord.getNumOverlaps(1011, 1012) == 1);
00191     assert(inFile.ReadRecord(samHeader, samRecord));
00192     validateRead2(samRecord);
00193     assert(inFile.GetNumOverlaps(samRecord) == 0);
00194     assert(samRecord.getNumOverlaps(1010, 1012) == 0);
00195     assert(samRecord.getNumOverlaps(1010, 1020) == 0);
00196     assert(samRecord.getNumOverlaps(1010, 1011) == 0);
00197     assert(samRecord.getNumOverlaps(1011, 1012) == 0);
00198     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00199            
00200     assert(inFile.SetReadSection("1", 1010, 1011));
00201     assert(inFile.ReadRecord(samHeader, samRecord));
00202     validateRead1(samRecord);
00203     assert(inFile.GetNumOverlaps(samRecord) == 1);
00204     assert(samRecord.getNumOverlaps(1010, 1012) == 2);
00205     assert(samRecord.getNumOverlaps(1010, 1020) == 5);
00206     assert(samRecord.getNumOverlaps(1010, 1011) == 1);
00207     assert(samRecord.getNumOverlaps(1011, 1012) == 1);
00208     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00209            
00210     assert(inFile.SetReadSection("1", 1011, 1012));
00211     assert(inFile.ReadRecord(samHeader, samRecord));
00212     validateRead1(samRecord);
00213     assert(inFile.GetNumOverlaps(samRecord) == 1);
00214     assert(samRecord.getNumOverlaps(1010, 1012) == 2);
00215     assert(samRecord.getNumOverlaps(1010, 1020) == 5);
00216     assert(samRecord.getNumOverlaps(1010, 1011) == 1);
00217     assert(samRecord.getNumOverlaps(1011, 1012) == 1);
00218     assert(inFile.ReadRecord(samHeader, samRecord));
00219     validateRead2(samRecord);
00220     assert(inFile.GetNumOverlaps(samRecord) == 0);
00221     assert(samRecord.getNumOverlaps(1010, 1012) == 0);
00222     assert(samRecord.getNumOverlaps(1010, 1020) == 0);
00223     assert(samRecord.getNumOverlaps(1010, 1011) == 0);
00224     assert(samRecord.getNumOverlaps(1011, 1012) == 0);
00225     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00226 }
00227 
00228 
00229 void testBamIndex()
00230 {
00231     // Create a bam index.
00232     BamIndex bamIndex;
00233     bamIndex.readIndex("testFiles/sortedBam.bam.bai");
00234     testIndex(bamIndex);
00235 
00236     BamIndexFileTest test1;
00237     bool caughtException = false;
00238     try
00239     {
00240         // Try reading the bam index without specifying a
00241         // filename and before opening a bam file.
00242         assert(test1.ReadBamIndex() == false);
00243     }
00244     catch (std::exception& e)
00245     {
00246         caughtException = true;
00247         assert(strcmp(e.what(), "FAIL_ORDER: Failed to read the bam Index file - the BAM file needs to be read first in order to determine the index filename.") == 0);
00248     }
00249     // Should have failed and thrown an exception.
00250     assert(caughtException);
00251 
00252     // Read the bam index with a specified name.
00253     assert(test1.ReadBamIndex("testFiles/sortedBam.bam.bai"));
00254     BamIndex* index = test1.getBamIndex();
00255     assert(index != NULL);
00256     testIndex(*index);
00257 
00258     // Open the bam file so the index can be opened.
00259     assert(test1.OpenForRead("testFiles/sortedBam.bam"));
00260     // Try reading the bam index without specifying a
00261     // filename after opening a bam file.
00262     assert(test1.ReadBamIndex() == true);
00263     index = test1.getBamIndex();
00264     assert(index != NULL);
00265     testIndex(*index);
00266 
00267     // Open the bam file so the index can be opened.
00268     // This time the index file does not have .bam in it.
00269     assert(test1.OpenForRead("testFiles/sortedBam2.bam"));
00270     // Try reading the bam index without specifying a
00271     // filename after opening a bam file.
00272     assert(test1.ReadBamIndex() == true);
00273     index = test1.getBamIndex();
00274     assert(index != NULL);
00275     testIndex(*index);
00276 
00277 }
00278 
00279 
Generated on Tue Aug 23 18:19:04 2011 for libStatGen Software by  doxygen 1.6.3