libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include "BamIndex.h" 00019 #include "TestValidate.h" 00020 #include "BamIndexTest.h" 00021 00022 #include <assert.h> 00023 00024 void testIndex(BamIndex& bamIndex) 00025 { 00026 #ifdef __ZLIB_AVAILABLE__ 00027 assert(bamIndex.getNumMappedReads(1) == 2); 00028 assert(bamIndex.getNumUnMappedReads(1) == 0); 00029 assert(bamIndex.getNumMappedReads(0) == 4); 00030 assert(bamIndex.getNumUnMappedReads(0) == 1); 00031 assert(bamIndex.getNumMappedReads(23) == -1); 00032 assert(bamIndex.getNumUnMappedReads(23) == -1); 00033 assert(bamIndex.getNumMappedReads(-1) == 0); 00034 assert(bamIndex.getNumUnMappedReads(-1) == 2); 00035 assert(bamIndex.getNumMappedReads(-2) == -1); 00036 assert(bamIndex.getNumUnMappedReads(-2) == -1); 00037 assert(bamIndex.getNumMappedReads(22) == 0); 00038 assert(bamIndex.getNumUnMappedReads(22) == 0); 00039 00040 // Get the chunks for reference id 1. 00041 Chunk testChunk; 00042 SortedChunkList chunkList; 00043 assert(bamIndex.getChunksForRegion(1, -1, -1, chunkList) == true); 00044 assert(!chunkList.empty()); 00045 testChunk = chunkList.pop(); 00046 assert(chunkList.empty()); 00047 assert(testChunk.chunk_beg == 0x4e7); 00048 assert(testChunk.chunk_end == 0x599); 00049 00050 // Get the chunks for reference id 0. 00051 assert(bamIndex.getChunksForRegion(0, -1, -1, chunkList) == true); 00052 assert(!chunkList.empty()); 00053 testChunk = chunkList.pop(); 00054 assert(chunkList.empty()); 00055 assert(testChunk.chunk_beg == 0x360); 00056 assert(testChunk.chunk_end == 0x4e7); 00057 00058 00059 // Get the chunks for reference id 2. 00060 assert(bamIndex.getChunksForRegion(2, -1, -1, chunkList) == true); 00061 assert(!chunkList.empty()); 00062 testChunk = chunkList.pop(); 00063 assert(chunkList.empty()); 00064 assert(testChunk.chunk_beg == 0x599); 00065 assert(testChunk.chunk_end == 0x5ea); 00066 00067 // Get the chunks for reference id 3. 00068 // There isn't one for this ref id, but still successfully read the file, 00069 // so it should return true, but the list should be empty. 00070 assert(bamIndex.getChunksForRegion(3, -1, -1, chunkList) == true); 00071 assert(chunkList.empty()); 00072 00073 // Test reading an indexed bam file. 00074 SamFile inFile; 00075 assert(inFile.OpenForRead("testFiles/sortedBam.bam")); 00076 inFile.setSortedValidation(SamFile::COORDINATE); 00077 assert(inFile.ReadBamIndex("testFiles/sortedBam.bam.bai")); 00078 SamFileHeader samHeader; 00079 assert(inFile.ReadHeader(samHeader)); 00080 SamRecord samRecord; 00081 00082 // Test getting num mapped/unmapped reads. 00083 assert(inFile.getNumMappedReadsFromIndex(1) == 2); 00084 assert(inFile.getNumUnMappedReadsFromIndex(1) == 0); 00085 assert(inFile.getNumMappedReadsFromIndex(0) == 4); 00086 assert(inFile.getNumUnMappedReadsFromIndex(0) == 1); 00087 assert(inFile.getNumMappedReadsFromIndex(23) == -1); 00088 assert(inFile.getNumUnMappedReadsFromIndex(23) == -1); 00089 assert(inFile.getNumMappedReadsFromIndex(-1) == 0); 00090 assert(inFile.getNumUnMappedReadsFromIndex(-1) == 2); 00091 assert(inFile.getNumMappedReadsFromIndex(-2) == -1); 00092 assert(inFile.getNumUnMappedReadsFromIndex(-2) == -1); 00093 assert(inFile.getNumMappedReadsFromIndex(22) == 0); 00094 assert(inFile.getNumUnMappedReadsFromIndex(22) == 0); 00095 00096 assert(inFile.getNumMappedReadsFromIndex("2", samHeader) == 2); 00097 assert(inFile.getNumUnMappedReadsFromIndex("2", samHeader) == 0); 00098 assert(inFile.getNumMappedReadsFromIndex("1", samHeader) == 4); 00099 assert(inFile.getNumUnMappedReadsFromIndex("1", samHeader) == 1); 00100 assert(inFile.getNumMappedReadsFromIndex("22", samHeader) == 0); 00101 assert(inFile.getNumUnMappedReadsFromIndex("22", samHeader) == 0); 00102 assert(inFile.getNumMappedReadsFromIndex("", samHeader) == 0); 00103 assert(inFile.getNumUnMappedReadsFromIndex("*", samHeader) == 2); 00104 assert(inFile.getNumMappedReadsFromIndex("unknown", samHeader) == -1); 00105 assert(inFile.getNumUnMappedReadsFromIndex("unknown", samHeader) == -1); 00106 assert(inFile.getNumMappedReadsFromIndex("X", samHeader) == 0); 00107 assert(inFile.getNumUnMappedReadsFromIndex("X", samHeader) == 0); 00108 00109 // Set the read section saying the reads must be fully enclosed in the section. 00110 assert(inFile.SetReadSection("1", 1010, 1011, false)); 00111 assert(inFile.ReadRecord(samHeader, samRecord) == false); 00112 00113 assert(inFile.SetReadSection("1", 1011, 1012, false)); 00114 assert(inFile.ReadRecord(samHeader, samRecord)); 00115 validateRead2(samRecord); 00116 assert(inFile.ReadRecord(samHeader, samRecord) == false); 00117 00118 // Section -1 = Ref *: 2 records (8 & 10 from testSam.sam that is reflected 00119 // in the validation. 00120 assert(inFile.SetReadSection(-1)); 00121 assert(inFile.ReadRecord(samHeader, samRecord)); 00122 validateRead8(samRecord); 00123 assert(inFile.ReadRecord(samHeader, samRecord)); 00124 validateRead10(samRecord); 00125 assert(inFile.ReadRecord(samHeader, samRecord) == false); 00126 00127 // Section 2 = Ref 3: 1 records (9 from testSam.sam that is reflected 00128 // in the validation. 00129 assert(inFile.SetReadSection(2)); 00130 assert(inFile.ReadRecord(samHeader, samRecord)); 00131 validateRead9(samRecord); 00132 assert(inFile.ReadRecord(samHeader, samRecord) == false); 00133 00134 // Section 0 = Ref 1: 5 records (3, 4, 1, 2, & 6 from testSam.sam that is 00135 // reflected in the validation. 00136 assert(inFile.SetReadSection(0)); 00137 assert(inFile.ReadRecord(samHeader, samRecord)); 00138 validateRead3(samRecord); 00139 assert(inFile.ReadRecord(samHeader, samRecord)); 00140 validateRead4(samRecord); 00141 assert(inFile.ReadRecord(samHeader, samRecord)); 00142 validateRead1(samRecord); 00143 assert(inFile.ReadRecord(samHeader, samRecord)); 00144 validateRead2(samRecord); 00145 assert(inFile.ReadRecord(samHeader, samRecord)); 00146 validateRead6(samRecord); 00147 assert(inFile.ReadRecord(samHeader, samRecord) == false); 00148 00149 // Section 1 = Ref 2: 2 records (5 & 7 from testSam.sam that is reflected 00150 // in the validation. 00151 assert(inFile.SetReadSection(1)); 00152 assert(inFile.ReadRecord(samHeader, samRecord)); 00153 validateRead5(samRecord); 00154 assert(inFile.ReadRecord(samHeader, samRecord)); 00155 validateRead7(samRecord); 00156 assert(inFile.ReadRecord(samHeader, samRecord) == false); 00157 00158 // Section 3 to 22 (ref 4 - 23): 0 records. 00159 for(int i = 3; i < 23; i++) 00160 { 00161 assert(inFile.SetReadSection(i)); 00162 assert(inFile.ReadRecord(samHeader, samRecord) == false); 00163 } 00164 00165 00166 // Set the read section. 00167 assert(inFile.SetReadSection("1", 1010, 1012)); 00168 assert(inFile.ReadRecord(samHeader, samRecord)); 00169 validateRead1(samRecord); 00170 assert(inFile.GetNumOverlaps(samRecord) == 2); 00171 assert(samRecord.getNumOverlaps(1010, 1012) == 2); 00172 assert(samRecord.getNumOverlaps(1010, 1020) == 5); 00173 assert(samRecord.getNumOverlaps(1010, 1011) == 1); 00174 assert(samRecord.getNumOverlaps(1011, 1012) == 1); 00175 assert(inFile.ReadRecord(samHeader, samRecord)); 00176 validateRead2(samRecord); 00177 assert(inFile.GetNumOverlaps(samRecord) == 0); 00178 assert(samRecord.getNumOverlaps(1010, 1012) == 0); 00179 assert(samRecord.getNumOverlaps(1010, 1020) == 0); 00180 assert(samRecord.getNumOverlaps(1010, 1011) == 0); 00181 assert(samRecord.getNumOverlaps(1011, 1012) == 0); 00182 assert(inFile.ReadRecord(samHeader, samRecord) == false); 00183 00184 assert(inFile.SetReadSection("1", 1010, 1020)); 00185 assert(inFile.ReadRecord(samHeader, samRecord)); 00186 validateRead1(samRecord); 00187 assert(inFile.GetNumOverlaps(samRecord) == 5); 00188 assert(samRecord.getNumOverlaps(1010, 1012) == 2); 00189 assert(samRecord.getNumOverlaps(1010, 1020) == 5); 00190 assert(samRecord.getNumOverlaps(1010, 1011) == 1); 00191 assert(samRecord.getNumOverlaps(1011, 1012) == 1); 00192 assert(inFile.ReadRecord(samHeader, samRecord)); 00193 validateRead2(samRecord); 00194 assert(inFile.GetNumOverlaps(samRecord) == 0); 00195 assert(samRecord.getNumOverlaps(1010, 1012) == 0); 00196 assert(samRecord.getNumOverlaps(1010, 1020) == 0); 00197 assert(samRecord.getNumOverlaps(1010, 1011) == 0); 00198 assert(samRecord.getNumOverlaps(1011, 1012) == 0); 00199 assert(inFile.ReadRecord(samHeader, samRecord) == false); 00200 00201 assert(inFile.SetReadSection("1", 1010, 1011)); 00202 assert(inFile.ReadRecord(samHeader, samRecord)); 00203 validateRead1(samRecord); 00204 assert(inFile.GetNumOverlaps(samRecord) == 1); 00205 assert(samRecord.getNumOverlaps(1010, 1012) == 2); 00206 assert(samRecord.getNumOverlaps(1010, 1020) == 5); 00207 assert(samRecord.getNumOverlaps(1010, 1011) == 1); 00208 assert(samRecord.getNumOverlaps(1011, 1012) == 1); 00209 assert(inFile.ReadRecord(samHeader, samRecord) == false); 00210 00211 assert(inFile.SetReadSection("1", 1011, 1012)); 00212 assert(inFile.ReadRecord(samHeader, samRecord)); 00213 validateRead1(samRecord); 00214 assert(inFile.GetNumOverlaps(samRecord) == 1); 00215 assert(samRecord.getNumOverlaps(1010, 1012) == 2); 00216 assert(samRecord.getNumOverlaps(1010, 1020) == 5); 00217 assert(samRecord.getNumOverlaps(1010, 1011) == 1); 00218 assert(samRecord.getNumOverlaps(1011, 1012) == 1); 00219 assert(inFile.ReadRecord(samHeader, samRecord)); 00220 validateRead2(samRecord); 00221 assert(inFile.GetNumOverlaps(samRecord) == 0); 00222 assert(samRecord.getNumOverlaps(1010, 1012) == 0); 00223 assert(samRecord.getNumOverlaps(1010, 1020) == 0); 00224 assert(samRecord.getNumOverlaps(1010, 1011) == 0); 00225 assert(samRecord.getNumOverlaps(1011, 1012) == 0); 00226 assert(inFile.ReadRecord(samHeader, samRecord) == false); 00227 #endif 00228 } 00229 00230 00231 void testBamIndex() 00232 { 00233 // BAM indexes are compressed, so can't be tested without zlib. 00234 #ifdef __ZLIB_AVAILABLE__ 00235 // Create a bam index. 00236 BamIndex bamIndex; 00237 bamIndex.readIndex("testFiles/sortedBam.bam.bai"); 00238 testIndex(bamIndex); 00239 00240 BamIndexFileTest test1; 00241 bool caughtException = false; 00242 try 00243 { 00244 // Try reading the bam index without specifying a 00245 // filename and before opening a bam file. 00246 assert(test1.ReadBamIndex() == false); 00247 } 00248 catch (std::exception& e) 00249 { 00250 caughtException = true; 00251 assert(strcmp(e.what(), "FAIL_ORDER: Failed to read the bam Index file - the BAM file needs to be read first in order to determine the index filename.") == 0); 00252 } 00253 // Should have failed and thrown an exception. 00254 assert(caughtException); 00255 00256 // Read the bam index with a specified name. 00257 assert(test1.ReadBamIndex("testFiles/sortedBam.bam.bai")); 00258 BamIndex* index = test1.getBamIndex(); 00259 assert(index != NULL); 00260 testIndex(*index); 00261 00262 // Open the bam file so the index can be opened. 00263 assert(test1.OpenForRead("testFiles/sortedBam.bam")); 00264 // Try reading the bam index without specifying a 00265 // filename after opening a bam file. 00266 assert(test1.ReadBamIndex() == true); 00267 index = test1.getBamIndex(); 00268 assert(index != NULL); 00269 testIndex(*index); 00270 00271 // Open the bam file so the index can be opened. 00272 // This time the index file does not have .bam in it. 00273 assert(test1.OpenForRead("testFiles/sortedBam2.bam")); 00274 // Try reading the bam index without specifying a 00275 // filename after opening a bam file. 00276 assert(test1.ReadBamIndex() == true); 00277 index = test1.getBamIndex(); 00278 assert(index != NULL); 00279 testIndex(*index); 00280 #endif 00281 } 00282 00283