libStatGen Software  1
BamIndexTest.cpp
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "BamIndex.h"
00019 #include "TestValidate.h"
00020 #include "BamIndexTest.h"
00021 
00022 #include <assert.h>
00023 
00024 void testIndex(BamIndex& bamIndex)
00025 {
00026 #ifdef __ZLIB_AVAILABLE__
00027     assert(bamIndex.getNumMappedReads(1) == 2);
00028     assert(bamIndex.getNumUnMappedReads(1) == 0);
00029     assert(bamIndex.getNumMappedReads(0) == 4);
00030     assert(bamIndex.getNumUnMappedReads(0) == 1);
00031     assert(bamIndex.getNumMappedReads(23) == -1);
00032     assert(bamIndex.getNumUnMappedReads(23) == -1);
00033     assert(bamIndex.getNumMappedReads(-1) == 0);
00034     assert(bamIndex.getNumUnMappedReads(-1) == 2);
00035     assert(bamIndex.getNumMappedReads(-2) == -1);
00036     assert(bamIndex.getNumUnMappedReads(-2) == -1);
00037     assert(bamIndex.getNumMappedReads(22) == 0);
00038     assert(bamIndex.getNumUnMappedReads(22) == 0);
00039 
00040     // Get the chunks for reference id 1.
00041     Chunk testChunk;
00042     SortedChunkList chunkList;
00043     assert(bamIndex.getChunksForRegion(1, -1, -1, chunkList) == true);
00044     assert(!chunkList.empty());
00045     testChunk = chunkList.pop();
00046     assert(chunkList.empty());
00047     assert(testChunk.chunk_beg == 0x4e7);
00048     assert(testChunk.chunk_end == 0x599);
00049 
00050     // Get the chunks for reference id 0.
00051     assert(bamIndex.getChunksForRegion(0, -1, -1, chunkList) == true);
00052     assert(!chunkList.empty());
00053     testChunk = chunkList.pop();
00054     assert(chunkList.empty());
00055     assert(testChunk.chunk_beg == 0x360);
00056     assert(testChunk.chunk_end == 0x4e7);
00057 
00058 
00059     // Get the chunks for reference id 2.
00060     assert(bamIndex.getChunksForRegion(2, -1, -1, chunkList) == true);
00061     assert(!chunkList.empty());
00062     testChunk = chunkList.pop();
00063     assert(chunkList.empty());
00064     assert(testChunk.chunk_beg == 0x599);
00065     assert(testChunk.chunk_end == 0x5ea);
00066 
00067     // Get the chunks for reference id 3.
00068     // There isn't one for this ref id, but still successfully read the file,
00069     // so it should return true, but the list should be empty.
00070     assert(bamIndex.getChunksForRegion(3, -1, -1, chunkList) == true);
00071     assert(chunkList.empty());
00072 
00073     // Test reading an indexed bam file.
00074     SamFile inFile;
00075     assert(inFile.OpenForRead("testFiles/sortedBam.bam"));
00076     inFile.setSortedValidation(SamFile::COORDINATE);
00077     assert(inFile.ReadBamIndex("testFiles/sortedBam.bam.bai"));
00078     SamFileHeader samHeader;
00079     assert(inFile.ReadHeader(samHeader));
00080     SamRecord samRecord;
00081 
00082     // Test getting num mapped/unmapped reads.
00083     assert(inFile.getNumMappedReadsFromIndex(1) == 2);
00084     assert(inFile.getNumUnMappedReadsFromIndex(1) == 0);
00085     assert(inFile.getNumMappedReadsFromIndex(0) == 4);
00086     assert(inFile.getNumUnMappedReadsFromIndex(0) == 1);
00087     assert(inFile.getNumMappedReadsFromIndex(23) == -1);
00088     assert(inFile.getNumUnMappedReadsFromIndex(23) == -1);
00089     assert(inFile.getNumMappedReadsFromIndex(-1) == 0);
00090     assert(inFile.getNumUnMappedReadsFromIndex(-1) == 2);
00091     assert(inFile.getNumMappedReadsFromIndex(-2) == -1);
00092     assert(inFile.getNumUnMappedReadsFromIndex(-2) == -1);
00093     assert(inFile.getNumMappedReadsFromIndex(22) == 0);
00094     assert(inFile.getNumUnMappedReadsFromIndex(22) == 0);
00095 
00096     assert(inFile.getNumMappedReadsFromIndex("2", samHeader) == 2);
00097     assert(inFile.getNumUnMappedReadsFromIndex("2", samHeader) == 0);
00098     assert(inFile.getNumMappedReadsFromIndex("1", samHeader) == 4);
00099     assert(inFile.getNumUnMappedReadsFromIndex("1", samHeader) == 1);
00100     assert(inFile.getNumMappedReadsFromIndex("22", samHeader) == 0);
00101     assert(inFile.getNumUnMappedReadsFromIndex("22", samHeader) == 0);
00102     assert(inFile.getNumMappedReadsFromIndex("", samHeader) == 0);
00103     assert(inFile.getNumUnMappedReadsFromIndex("*", samHeader) == 2);
00104     assert(inFile.getNumMappedReadsFromIndex("unknown", samHeader) == -1);
00105     assert(inFile.getNumUnMappedReadsFromIndex("unknown", samHeader) == -1);
00106     assert(inFile.getNumMappedReadsFromIndex("X", samHeader) == 0);
00107     assert(inFile.getNumUnMappedReadsFromIndex("X", samHeader) == 0);
00108 
00109    // Set the read section saying the reads must be fully enclosed in the section.
00110     assert(inFile.SetReadSection("1", 1010, 1011, false));
00111     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00112 
00113     assert(inFile.SetReadSection("1", 1011, 1012, false));
00114     assert(inFile.ReadRecord(samHeader, samRecord));
00115     validateRead2(samRecord);
00116     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00117 
00118     // Section -1 = Ref *: 2 records (8 & 10 from testSam.sam that is reflected
00119     // in the validation.
00120     assert(inFile.SetReadSection(-1));
00121     assert(inFile.ReadRecord(samHeader, samRecord));
00122     validateRead8(samRecord);
00123     assert(inFile.ReadRecord(samHeader, samRecord));
00124     validateRead10(samRecord);
00125     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00126 
00127     // Section 2 = Ref 3: 1 records (9 from testSam.sam that is reflected
00128     // in the validation.
00129     assert(inFile.SetReadSection(2));
00130     assert(inFile.ReadRecord(samHeader, samRecord));
00131     validateRead9(samRecord);
00132     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00133 
00134     // Section 0 = Ref 1: 5 records (3, 4, 1, 2, & 6 from testSam.sam that is
00135     // reflected in the validation.
00136     assert(inFile.SetReadSection(0));
00137     assert(inFile.ReadRecord(samHeader, samRecord));
00138     validateRead3(samRecord);
00139     assert(inFile.ReadRecord(samHeader, samRecord));
00140     validateRead4(samRecord);
00141     assert(inFile.ReadRecord(samHeader, samRecord));
00142     validateRead1(samRecord);
00143     assert(inFile.ReadRecord(samHeader, samRecord));
00144     validateRead2(samRecord);
00145     assert(inFile.ReadRecord(samHeader, samRecord));
00146     validateRead6(samRecord);
00147     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00148 
00149     // Section 1 = Ref 2: 2 records (5 & 7 from testSam.sam that is reflected
00150     // in the validation.
00151     assert(inFile.SetReadSection(1));
00152     assert(inFile.ReadRecord(samHeader, samRecord));
00153     validateRead5(samRecord);
00154     assert(inFile.ReadRecord(samHeader, samRecord));
00155     validateRead7(samRecord);
00156     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00157 
00158     // Section 3 to 22 (ref 4 - 23): 0 records.
00159     for(int i = 3; i < 23; i++)
00160     {
00161         assert(inFile.SetReadSection(i));
00162         assert(inFile.ReadRecord(samHeader, samRecord) == false);
00163     }
00164 
00165 
00166     // Set the read section.
00167     assert(inFile.SetReadSection("1", 1010, 1012));
00168     assert(inFile.ReadRecord(samHeader, samRecord));
00169     validateRead1(samRecord);
00170     assert(inFile.GetNumOverlaps(samRecord) == 2);
00171     assert(samRecord.getNumOverlaps(1010, 1012) == 2);
00172     assert(samRecord.getNumOverlaps(1010, 1020) == 5);
00173     assert(samRecord.getNumOverlaps(1010, 1011) == 1);
00174     assert(samRecord.getNumOverlaps(1011, 1012) == 1);
00175     assert(inFile.ReadRecord(samHeader, samRecord));
00176     validateRead2(samRecord);
00177     assert(inFile.GetNumOverlaps(samRecord) == 0);
00178     assert(samRecord.getNumOverlaps(1010, 1012) == 0);
00179     assert(samRecord.getNumOverlaps(1010, 1020) == 0);
00180     assert(samRecord.getNumOverlaps(1010, 1011) == 0);
00181     assert(samRecord.getNumOverlaps(1011, 1012) == 0);
00182     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00183            
00184     assert(inFile.SetReadSection("1", 1010, 1020));
00185     assert(inFile.ReadRecord(samHeader, samRecord));
00186     validateRead1(samRecord);
00187     assert(inFile.GetNumOverlaps(samRecord) == 5);
00188     assert(samRecord.getNumOverlaps(1010, 1012) == 2);
00189     assert(samRecord.getNumOverlaps(1010, 1020) == 5);
00190     assert(samRecord.getNumOverlaps(1010, 1011) == 1);
00191     assert(samRecord.getNumOverlaps(1011, 1012) == 1);
00192     assert(inFile.ReadRecord(samHeader, samRecord));
00193     validateRead2(samRecord);
00194     assert(inFile.GetNumOverlaps(samRecord) == 0);
00195     assert(samRecord.getNumOverlaps(1010, 1012) == 0);
00196     assert(samRecord.getNumOverlaps(1010, 1020) == 0);
00197     assert(samRecord.getNumOverlaps(1010, 1011) == 0);
00198     assert(samRecord.getNumOverlaps(1011, 1012) == 0);
00199     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00200            
00201     assert(inFile.SetReadSection("1", 1010, 1011));
00202     assert(inFile.ReadRecord(samHeader, samRecord));
00203     validateRead1(samRecord);
00204     assert(inFile.GetNumOverlaps(samRecord) == 1);
00205     assert(samRecord.getNumOverlaps(1010, 1012) == 2);
00206     assert(samRecord.getNumOverlaps(1010, 1020) == 5);
00207     assert(samRecord.getNumOverlaps(1010, 1011) == 1);
00208     assert(samRecord.getNumOverlaps(1011, 1012) == 1);
00209     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00210            
00211     assert(inFile.SetReadSection("1", 1011, 1012));
00212     assert(inFile.ReadRecord(samHeader, samRecord));
00213     validateRead1(samRecord);
00214     assert(inFile.GetNumOverlaps(samRecord) == 1);
00215     assert(samRecord.getNumOverlaps(1010, 1012) == 2);
00216     assert(samRecord.getNumOverlaps(1010, 1020) == 5);
00217     assert(samRecord.getNumOverlaps(1010, 1011) == 1);
00218     assert(samRecord.getNumOverlaps(1011, 1012) == 1);
00219     assert(inFile.ReadRecord(samHeader, samRecord));
00220     validateRead2(samRecord);
00221     assert(inFile.GetNumOverlaps(samRecord) == 0);
00222     assert(samRecord.getNumOverlaps(1010, 1012) == 0);
00223     assert(samRecord.getNumOverlaps(1010, 1020) == 0);
00224     assert(samRecord.getNumOverlaps(1010, 1011) == 0);
00225     assert(samRecord.getNumOverlaps(1011, 1012) == 0);
00226     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00227 #endif
00228 }
00229 
00230 
00231 void testBamIndex()
00232 {
00233     // BAM indexes are compressed, so can't be tested without zlib.
00234 #ifdef __ZLIB_AVAILABLE__
00235     // Create a bam index.
00236     BamIndex bamIndex;
00237     bamIndex.readIndex("testFiles/sortedBam.bam.bai");
00238     testIndex(bamIndex);
00239 
00240     BamIndexFileTest test1;
00241     bool caughtException = false;
00242     try
00243     {
00244         // Try reading the bam index without specifying a
00245         // filename and before opening a bam file.
00246         assert(test1.ReadBamIndex() == false);
00247     }
00248     catch (std::exception& e)
00249     {
00250         caughtException = true;
00251         assert(strcmp(e.what(), "FAIL_ORDER: Failed to read the bam Index file - the BAM file needs to be read first in order to determine the index filename.") == 0);
00252     }
00253     // Should have failed and thrown an exception.
00254     assert(caughtException);
00255 
00256     // Read the bam index with a specified name.
00257     assert(test1.ReadBamIndex("testFiles/sortedBam.bam.bai"));
00258     BamIndex* index = test1.getBamIndex();
00259     assert(index != NULL);
00260     testIndex(*index);
00261 
00262     // Open the bam file so the index can be opened.
00263     assert(test1.OpenForRead("testFiles/sortedBam.bam"));
00264     // Try reading the bam index without specifying a
00265     // filename after opening a bam file.
00266     assert(test1.ReadBamIndex() == true);
00267     index = test1.getBamIndex();
00268     assert(index != NULL);
00269     testIndex(*index);
00270 
00271     // Open the bam file so the index can be opened.
00272     // This time the index file does not have .bam in it.
00273     assert(test1.OpenForRead("testFiles/sortedBam2.bam"));
00274     // Try reading the bam index without specifying a
00275     // filename after opening a bam file.
00276     assert(test1.ReadBamIndex() == true);
00277     index = test1.getBamIndex();
00278     assert(index != NULL);
00279     testIndex(*index);
00280 #endif
00281 }
00282 
00283 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends