BamIndexTest.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "BamIndex.h"
00019 #include "Validate.h"
00020 #include "BamIndexTest.h"
00021 
00022 #include <assert.h>
00023 
00024 void testIndex(BamIndex& bamIndex)
00025 {
00026     assert(bamIndex.getNumMappedReads(1) == 2);
00027     assert(bamIndex.getNumUnMappedReads(1) == 0);
00028     assert(bamIndex.getNumMappedReads(0) == 4);
00029     assert(bamIndex.getNumUnMappedReads(0) == 1);
00030     assert(bamIndex.getNumMappedReads(23) == -1);
00031     assert(bamIndex.getNumUnMappedReads(23) == -1);
00032     assert(bamIndex.getNumMappedReads(-1) == 0);
00033     assert(bamIndex.getNumUnMappedReads(-1) == 2);
00034     assert(bamIndex.getNumMappedReads(-2) == -1);
00035     assert(bamIndex.getNumUnMappedReads(-2) == -1);
00036     assert(bamIndex.getNumMappedReads(22) == 0);
00037     assert(bamIndex.getNumUnMappedReads(22) == 0);
00038 
00039     // Get the chunks for reference id 1.
00040     Chunk testChunk;
00041     SortedChunkList chunkList;
00042     assert(bamIndex.getChunksForRegion(1, -1, -1, chunkList) == true);
00043     assert(!chunkList.empty());
00044     testChunk = chunkList.pop();
00045     assert(chunkList.empty());
00046     assert(testChunk.chunk_beg == 0x4e7);
00047     assert(testChunk.chunk_end == 0x599);
00048 
00049     // Get the chunks for reference id 0.
00050     assert(bamIndex.getChunksForRegion(0, -1, -1, chunkList) == true);
00051     assert(!chunkList.empty());
00052     testChunk = chunkList.pop();
00053     assert(chunkList.empty());
00054     assert(testChunk.chunk_beg == 0x360);
00055     assert(testChunk.chunk_end == 0x4e7);
00056 
00057 
00058     // Get the chunks for reference id 2.
00059     assert(bamIndex.getChunksForRegion(2, -1, -1, chunkList) == true);
00060     assert(!chunkList.empty());
00061     testChunk = chunkList.pop();
00062     assert(chunkList.empty());
00063     assert(testChunk.chunk_beg == 0x599);
00064     assert(testChunk.chunk_end == 0x5ea);
00065 
00066     // Get the chunks for reference id 3.
00067     // There isn't one for this ref id, but still successfully read the file,
00068     // so it should return true, but the list should be empty.
00069     assert(bamIndex.getChunksForRegion(3, -1, -1, chunkList) == true);
00070     assert(chunkList.empty());
00071 
00072     // Test reading an indexed bam file.
00073     SamFile inFile;
00074     assert(inFile.OpenForRead("testFiles/sortedBam.bam"));
00075     inFile.setSortedValidation(SamFile::COORDINATE);
00076     assert(inFile.ReadBamIndex("testFiles/sortedBam.bam.bai"));
00077     SamFileHeader samHeader;
00078     assert(inFile.ReadHeader(samHeader));
00079     SamRecord samRecord;
00080 
00081     // Test getting num mapped/unmapped reads.
00082     assert(inFile.getNumMappedReadsFromIndex(1) == 2);
00083     assert(inFile.getNumUnMappedReadsFromIndex(1) == 0);
00084     assert(inFile.getNumMappedReadsFromIndex(0) == 4);
00085     assert(inFile.getNumUnMappedReadsFromIndex(0) == 1);
00086     assert(inFile.getNumMappedReadsFromIndex(23) == -1);
00087     assert(inFile.getNumUnMappedReadsFromIndex(23) == -1);
00088     assert(inFile.getNumMappedReadsFromIndex(-1) == 0);
00089     assert(inFile.getNumUnMappedReadsFromIndex(-1) == 2);
00090     assert(inFile.getNumMappedReadsFromIndex(-2) == -1);
00091     assert(inFile.getNumUnMappedReadsFromIndex(-2) == -1);
00092     assert(inFile.getNumMappedReadsFromIndex(22) == 0);
00093     assert(inFile.getNumUnMappedReadsFromIndex(22) == 0);
00094 
00095     assert(inFile.getNumMappedReadsFromIndex("2", samHeader) == 2);
00096     assert(inFile.getNumUnMappedReadsFromIndex("2", samHeader) == 0);
00097     assert(inFile.getNumMappedReadsFromIndex("1", samHeader) == 4);
00098     assert(inFile.getNumUnMappedReadsFromIndex("1", samHeader) == 1);
00099     assert(inFile.getNumMappedReadsFromIndex("22", samHeader) == 0);
00100     assert(inFile.getNumUnMappedReadsFromIndex("22", samHeader) == 0);
00101     assert(inFile.getNumMappedReadsFromIndex("", samHeader) == 0);
00102     assert(inFile.getNumUnMappedReadsFromIndex("*", samHeader) == 2);
00103     assert(inFile.getNumMappedReadsFromIndex("unknown", samHeader) == -1);
00104     assert(inFile.getNumUnMappedReadsFromIndex("unknown", samHeader) == -1);
00105     assert(inFile.getNumMappedReadsFromIndex("X", samHeader) == 0);
00106     assert(inFile.getNumUnMappedReadsFromIndex("X", samHeader) == 0);
00107 
00108     // Section -1 = Ref *: 2 records (8 & 10 from testSam.sam that is reflected
00109     // in the validation.
00110     assert(inFile.SetReadSection(-1));
00111     assert(inFile.ReadRecord(samHeader, samRecord));
00112     validateRead8(samRecord);
00113     assert(inFile.ReadRecord(samHeader, samRecord));
00114     validateRead10(samRecord);
00115     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00116 
00117     // Section 2 = Ref 3: 1 records (9 from testSam.sam that is reflected
00118     // in the validation.
00119     assert(inFile.SetReadSection(2));
00120     assert(inFile.ReadRecord(samHeader, samRecord));
00121     validateRead9(samRecord);
00122     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00123 
00124     // Section 0 = Ref 1: 5 records (3, 4, 1, 2, & 6 from testSam.sam that is
00125     // reflected in the validation.
00126     assert(inFile.SetReadSection(0));
00127     assert(inFile.ReadRecord(samHeader, samRecord));
00128     validateRead3(samRecord);
00129     assert(inFile.ReadRecord(samHeader, samRecord));
00130     validateRead4(samRecord);
00131     assert(inFile.ReadRecord(samHeader, samRecord));
00132     validateRead1(samRecord);
00133     assert(inFile.ReadRecord(samHeader, samRecord));
00134     validateRead2(samRecord);
00135     assert(inFile.ReadRecord(samHeader, samRecord));
00136     validateRead6(samRecord);
00137     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00138 
00139     // Section 1 = Ref 2: 2 records (5 & 7 from testSam.sam that is reflected
00140     // in the validation.
00141     assert(inFile.SetReadSection(1));
00142     assert(inFile.ReadRecord(samHeader, samRecord));
00143     validateRead5(samRecord);
00144     assert(inFile.ReadRecord(samHeader, samRecord));
00145     validateRead7(samRecord);
00146     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00147 
00148     // Section 3 to 22 (ref 4 - 23): 0 records.
00149     for(int i = 3; i < 23; i++)
00150     {
00151         assert(inFile.SetReadSection(i));
00152         assert(inFile.ReadRecord(samHeader, samRecord) == false);
00153     }
00154 
00155 
00156     // Set the read section.
00157     assert(inFile.SetReadSection("1", 1010, 1012));
00158     assert(inFile.ReadRecord(samHeader, samRecord));
00159     validateRead1(samRecord);
00160     assert(inFile.GetNumOverlaps(samRecord) == 2);
00161     assert(samRecord.getNumOverlaps(1010, 1012) == 2);
00162     assert(samRecord.getNumOverlaps(1010, 1020) == 5);
00163     assert(samRecord.getNumOverlaps(1010, 1011) == 1);
00164     assert(samRecord.getNumOverlaps(1011, 1012) == 1);
00165     assert(inFile.ReadRecord(samHeader, samRecord));
00166     validateRead2(samRecord);
00167     assert(inFile.GetNumOverlaps(samRecord) == 0);
00168     assert(samRecord.getNumOverlaps(1010, 1012) == 0);
00169     assert(samRecord.getNumOverlaps(1010, 1020) == 0);
00170     assert(samRecord.getNumOverlaps(1010, 1011) == 0);
00171     assert(samRecord.getNumOverlaps(1011, 1012) == 0);
00172     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00173            
00174     assert(inFile.SetReadSection("1", 1010, 1020));
00175     assert(inFile.ReadRecord(samHeader, samRecord));
00176     validateRead1(samRecord);
00177     assert(inFile.GetNumOverlaps(samRecord) == 5);
00178     assert(samRecord.getNumOverlaps(1010, 1012) == 2);
00179     assert(samRecord.getNumOverlaps(1010, 1020) == 5);
00180     assert(samRecord.getNumOverlaps(1010, 1011) == 1);
00181     assert(samRecord.getNumOverlaps(1011, 1012) == 1);
00182     assert(inFile.ReadRecord(samHeader, samRecord));
00183     validateRead2(samRecord);
00184     assert(inFile.GetNumOverlaps(samRecord) == 0);
00185     assert(samRecord.getNumOverlaps(1010, 1012) == 0);
00186     assert(samRecord.getNumOverlaps(1010, 1020) == 0);
00187     assert(samRecord.getNumOverlaps(1010, 1011) == 0);
00188     assert(samRecord.getNumOverlaps(1011, 1012) == 0);
00189     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00190            
00191     assert(inFile.SetReadSection("1", 1010, 1011));
00192     assert(inFile.ReadRecord(samHeader, samRecord));
00193     validateRead1(samRecord);
00194     assert(inFile.GetNumOverlaps(samRecord) == 1);
00195     assert(samRecord.getNumOverlaps(1010, 1012) == 2);
00196     assert(samRecord.getNumOverlaps(1010, 1020) == 5);
00197     assert(samRecord.getNumOverlaps(1010, 1011) == 1);
00198     assert(samRecord.getNumOverlaps(1011, 1012) == 1);
00199     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00200            
00201     assert(inFile.SetReadSection("1", 1011, 1012));
00202     assert(inFile.ReadRecord(samHeader, samRecord));
00203     validateRead1(samRecord);
00204     assert(inFile.GetNumOverlaps(samRecord) == 1);
00205     assert(samRecord.getNumOverlaps(1010, 1012) == 2);
00206     assert(samRecord.getNumOverlaps(1010, 1020) == 5);
00207     assert(samRecord.getNumOverlaps(1010, 1011) == 1);
00208     assert(samRecord.getNumOverlaps(1011, 1012) == 1);
00209     assert(inFile.ReadRecord(samHeader, samRecord));
00210     validateRead2(samRecord);
00211     assert(inFile.GetNumOverlaps(samRecord) == 0);
00212     assert(samRecord.getNumOverlaps(1010, 1012) == 0);
00213     assert(samRecord.getNumOverlaps(1010, 1020) == 0);
00214     assert(samRecord.getNumOverlaps(1010, 1011) == 0);
00215     assert(samRecord.getNumOverlaps(1011, 1012) == 0);
00216     assert(inFile.ReadRecord(samHeader, samRecord) == false);
00217            
00218 }
00219 
00220 
00221 void testBamIndex()
00222 {
00223     // Create a bam index.
00224     BamIndex bamIndex;
00225     bamIndex.readIndex("testFiles/sortedBam.bam.bai");
00226     testIndex(bamIndex);
00227 
00228     BamIndexFileTest test1;
00229     bool caughtException = false;
00230     try
00231     {
00232         // Try reading the bam index without specifying a
00233         // filename and before opening a bam file.
00234         assert(test1.ReadBamIndex() == false);
00235     }
00236     catch (std::exception& e)
00237     {
00238         caughtException = true;
00239         assert(strcmp(e.what(), "FAIL_ORDER: Failed to read the bam Index file - the BAM file needs to be read first in order to determine the index filename.") == 0);
00240     }
00241     // Should have failed and thrown an exception.
00242     assert(caughtException);
00243 
00244     // Read the bam index with a specified name.
00245     assert(test1.ReadBamIndex("testFiles/sortedBam.bam.bai"));
00246     BamIndex* index = test1.getBamIndex();
00247     assert(index != NULL);
00248     testIndex(*index);
00249 
00250     // Open the bam file so the index can be opened.
00251     assert(test1.OpenForRead("testFiles/sortedBam.bam"));
00252     // Try reading the bam index without specifying a
00253     // filename after opening a bam file.
00254     assert(test1.ReadBamIndex() == true);
00255     index = test1.getBamIndex();
00256     assert(index != NULL);
00257     testIndex(*index);
00258 
00259     // Open the bam file so the index can be opened.
00260     // This time the index file does not have .bam in it.
00261     assert(test1.OpenForRead("testFiles/sortedBam2.bam"));
00262     // Try reading the bam index without specifying a
00263     // filename after opening a bam file.
00264     assert(test1.ReadBamIndex() == true);
00265     index = test1.getBamIndex();
00266     assert(index != NULL);
00267     testIndex(*index);
00268 
00269 }
00270 
00271 
Generated on Tue Mar 22 22:50:13 2011 for StatGen Software by  doxygen 1.6.3