libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include <stdio.h> 00019 #include <stdlib.h> 00020 #include <stdint.h> 00021 #include "SmithWaterman.h" 00022 00023 // put TEST below here, so that makedepend will see the .h, so that we 00024 // can get a clean dependency for SmithWaterman.o, so that we can at least 00025 // compile the header when we change it. 00026 00027 #if defined(TEST) 00028 00029 #include <getopt.h> 00030 #include "Generic.h" 00031 00032 // g++ -g -o testSW -DTEST SmithWaterman.cpp 00033 // 00034 // Smith-Waterman - test code uses a 256x256 array of int16 00035 // 00036 int swat( 00037 bool showAllCases, 00038 const char *A, 00039 const char *qualities, 00040 const char *B, 00041 int direction, 00042 const char *expectedCigarString, 00043 int expectedSumQ 00044 ) 00045 { 00046 int allowedInsertDelete = 1024; 00047 int errors = 0; 00048 00049 // read length 256 00050 // reference length 1024 00051 SmithWaterman<256, 1024, uint16_t, const char *, const char *, const char *, uint32_t, uint32_t > sw(&A, &qualities, &B, strlen(A), strlen(B), allowedInsertDelete, direction); 00052 00053 // 00054 // now we align the read: 00055 // 00056 sw.populateH(); 00057 sw.populateAlignment(); 00058 00059 int sumQ = sw.getSumQ(); 00060 00061 CigarRoller cigar; 00062 cigar.clear(); 00063 sw.rollCigar(cigar); 00064 00065 const char *cigarStr = cigar.getString(); 00066 00067 // 00068 // now we pretty print the results 00069 // 00070 00071 bool badCigar = false, badQuality = false; 00072 00073 if (strcmp(cigarStr, expectedCigarString)!=0) 00074 { 00075 badCigar = true; 00076 errors ++; 00077 } 00078 00079 if (sumQ != expectedSumQ) 00080 { 00081 badQuality = true; 00082 errors ++; 00083 } 00084 00085 00086 00087 if (showAllCases || errors>0) 00088 { 00089 cout << "=============" << endl; 00090 cout << " Read: " << A << endl; 00091 cout << " Reference: " << B << endl; 00092 cout << " Direction: " << direction << endl; 00093 cout << "Max Cell: " << sw.maxCostValue << " located at " << sw.maxCostPosition << endl; 00094 cout << "M: " << sw.m << " N: " << sw.n << endl; 00095 cout << "Cigar String: " << cigarStr ; 00096 if (badCigar) 00097 cout << " (EXPECTED: " << expectedCigarString << ")"; 00098 cout << endl; 00099 cout << " sumQ:" << sumQ; 00100 if (badQuality) 00101 cout << " (EXPECTED: " << expectedSumQ << ")"; 00102 cout << endl; 00103 00104 if (strlen(B) < 100 || showAllCases) 00105 sw.printH(false); 00106 00107 for (vector<pair<int,int> >::iterator i = sw.alignment.begin(); i != sw.alignment.end(); i++) cout << *i << endl; 00108 00109 cout << "=============" << endl << endl; 00110 } 00111 00112 delete cigarStr; 00113 00114 return errors; 00115 } 00116 00117 00118 // test with Sequence 1 = ACACACTA 00119 // Sequence 2 = AGCACACA 00120 int main(int argc, const char **argv) 00121 { 00122 int errors = 0; 00123 00124 bool showAllCasesFlag = false; 00125 int opt; 00126 00127 while ((opt = getopt(argc, (char **) argv, "v")) != -1) 00128 { 00129 switch (opt) 00130 { 00131 case 'v': 00132 showAllCasesFlag = true; 00133 break; 00134 default: 00135 cerr << "usage: testSW [-v]" << std::endl; 00136 exit(1); 00137 } 00138 } 00139 00140 00141 // CIGAR explanation - for backward SW runs, the corresponding 00142 // CIGAR string is generated from the back of the string to the 00143 // front. Recall that the soft clipping is only done at the 00144 // "end" of the string, taking direction into account. 00145 00146 // forwards - simple 00147 errors += swat(showAllCasesFlag, "1234", "\"#$-", "1235", 1, "3M1S", 0); 00148 00149 // backwards - simple 00150 errors += swat(showAllCasesFlag, "1234", "\"#$-", "1235", -1, "4M", 12); 00151 00152 // backwards - soft left clip 00153 errors += swat(showAllCasesFlag, "1234", "\"#$-", "0234", -1, "1S3M", 0); 00154 00155 // delete in read (arg 1) - forward 00156 errors += swat(showAllCasesFlag, "123467890", "\"#$%^&*()-", "1234567890", +1, "4M1D5M", 50); 00157 00158 // insert in read (arg 1) - forward 00159 errors += swat(showAllCasesFlag, "1234556789", "\"#$%^&*()-", "1234567890", +1, "5M1I4M", 50); 00160 00161 // delete in read (arg 1) - backward 00162 errors += swat(showAllCasesFlag, "X123467890", "#\"#$%^&*()-", "1234567890", -1, "1S4M1D5M", 50); 00163 00164 // insert in read (arg 1) - backward 00165 errors += swat(showAllCasesFlag, "1234556789", "\"#$%^&*()-", "0123456789", -1, "4M1I5M", 50); 00166 00167 // insert in read (arg 1) - backward 00168 errors += swat(showAllCasesFlag, "X1223456789", "00000000000", "00123456789", -1, "1S1M1I8M", 50); 00169 00170 // insert in read (arg 1) - backward 00171 errors += swat(showAllCasesFlag, "XY1223456789", "000000000000", "000123456789", -1, "2S1M1I8M", 50); 00172 00173 // forward - soft right clip of 2 bases - sumQ should be 0 00174 errors += swat(showAllCasesFlag, "123456700", "\"#$%^&*()-", "123456789", +1, "7M2S", 0); 00175 00176 // insert in read (arg 1) - forward w/2 mismatches at end 00177 errors += swat(showAllCasesFlag, "1023456700", "\"#$%^&*()-", "123456789", +1, "1M1I6M2S", 50); 00178 00179 // insert in read (arg 1) - forward w/2 mismatches at end 00180 errors += swat(showAllCasesFlag, "CTCCACCTCCCGGTT", "111111111111111", "TCCACCTCCCAGGTT", -1, "1S10M1D4M", 50); 00181 00182 // 00183 errors += swat(showAllCasesFlag, "1234", "0000", "12345", +1, "4M", 0); 00184 00185 // 00186 errors += swat(showAllCasesFlag, "1234X", "00000", "12345", +1, "4M1S", 0); 00187 00188 // 00189 errors += swat(showAllCasesFlag, "4321", "0000", "7654321", -1, "4M", 0); 00190 00191 // 00192 errors += swat(showAllCasesFlag, "X4321", "00000", "7654321", -1, "1S4M", 0); 00193 00194 // 00195 errors += swat(showAllCasesFlag, "X432A10", "0000000", "76543210", -1, "1S3M1I2M", 50); 00196 00197 // 00198 errors += swat(showAllCasesFlag, "1345", "0000", "12345", -1, "1M1D3M", 50); 00199 00200 errors += swat(showAllCasesFlag, "45689", "00000", "1234567890", -1, "3M1D2M", 50); 00201 00202 // errors += swat(showAllCasesFlag, "AATAATTTTTTATATACAGATCGCTGTAGAGTGTAGTTATAGTATGATTCCAACTTTTATTTCTTTCATGACTAATTATATGTATACATGTGCCATGTTGGTGTGCTG", "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", "TCCAATGTAGGGCTGTTATAAACAGTGTTGATACATATGTTTTTGTATAAGTCTTTGTTGAATACATGCTTTCATTTTTGTAGGGTATATGTCCAGGAATTAAATTTTTGCATTATTGGGGAAGTTCAAACGTAGATCAGTAGATGTTCCCAAATGATTTTCAGGATATGTATCCATGTAAATTCCTACCAGCAATGCAGGAGAATTCCAATTGCCCATGTTCTAATCAGAATATTGTTATATCCTAAGACTAATTTTAAATATTCTGATGGGTGTAGAGTGGAGGCATAGTATGATTTCAACTTGTATTTCTTTCATGACTAATTATCTTCTATGTTAATTGTTATTTTGTATGTTTATTGCAAAGTGCCTATCCAGAATTTTTGTCTATAATTTTGTTGTGCTGTCTCTTGCTTTATGAATTTTATAGGATTCTTAATATTATAATTGAGTTATCTTTCTTTTTTATTATTATTATTATACTTTAAGTTTTAGGGTATATGTGCACAACGTGCAAGTTTGTCACATATGTATACATGTGCCATGTTGGTGTGCTGCACCCATTAACTCATCATTTAGCATTAGGTATATCTCCTAATGCTATCCCTTCCTCCTCCCCCCACCCCACAACAGTCCCCGGTGTGTGATGTTCCCCTGCCTTTGTCCTCTTTCTTATACTTGCATGAGCAATCTCCTCAAACTGATACTTGCCTTTTTTGTCCTTGGTGTGGTTTGGCTCTGTGTTCCCACCCAAATCTTCATAATACCCATGTGCCAAGGGTGGGACTGGGTGGAGGTAATTGGGTCATGGGGATGGTTTCCCTCATACTATTATGATAGTGAGTGTTTTCACGAGACCTGATGGTTTTATAACTGTGTGGCATTTCCCTTGCTTCCACTCACTCCATCCTGCCACCCTGTGAAGAAGGTGCCTGCTTCTCCTTTGGTTACTGCTATGATTGTAAGTTTCCTGAGGCCTCCCCAGCAACGCAAAACTGTGAATCAATTAAACCTTTTTCCTTTATAAATTACTAAGTCTTGGGTATTTCTTCATAGTGTTGTGAGCATAGACTAAAACAGTAAGTTGTTACCAGGAGTGGGGTACTGCTGTAAGATAACTGAGAATGTGAAAGTGACTTAGGAACTAGGTAATGAGCAGAGGTTGGAACAGTTTAAAAGGCTCAGAAGAAGACAGAAAGATGTGGGAAAGTTTGGA", -1, "77M200D31M", 50); 00203 00204 errors += swat(showAllCasesFlag, "TTAGAATGCTATTGTGTTTGGAGATTTGAGGAAAGTGGGCGTGAAGACTTAGTGTTCATTTCCTCAACCTCTCTCTGTGTGAACATACGTCATCGGTCAGAAATTGGG", "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", "CCGAGATTGTGCCATTGCACTCCTGCCTGGGTAACAGAGTCAGACCCTGTCTCAAAAAAAAAAAAAAAAAAAAAAAAGATTAGGTTTTATAGATGGAAAATTCACAGCTCTCTCCAGATCAGAAATCTCCAAGAGTAAATTAGTGTCTTAAAGGGGTTGTAATAACTTTCCTATGTGACTAAGTGCATTATTAATCAATTTTTCTATGATCAAGTACTCCTTTACATACCTGCTAATACAATTTTTGATATGAAATCAGTCCTAGAGGGAATCAATGTAAGATACAGACTTGATGAGTGCTTGCAGTTTTTTATTGACAATCTGAAGAATGACTTGACTCTAAATTGCAGCTCAAGGCTTAGAATGCTATTGTGTTTGGAGATTTGAGGAAAGTGGGCGTGAAGACTTAGTGTTCATTTCCTCAACCTCTCTCTGTGTGAACATACAGGAATCAAATCTGTCTAGCCTCTCTTTTTGGCAAGGTTAAGAACAATTCCACTTCATCCTAATCCCAATGATTCCTGCCGACCCTCTTCCAAAAACTATTTAAAGACATGTTCTTCAAAGTTATATTTGTCTTTCCTTCAGGGAGAAAAAGAATACCAATCACTTATAATATGGAAACTAGCAGAAATGGGTCACATAAGTCATCTGTCAGAAATTGGGAAAATAGAGTAGGTCAGTCTTTCCAGTCATGGTACTTTTACCTTCAATCA", -1, "88M200D20M", 50); 00205 00206 // prefix TTAGAATGCTATTGTGTTTGGAGATTTGAGGAAAGTGGGCGTGAAGACTTAGTGTTCATTTCCTCAACCTCTCTCTGTGTGAACATAC 00207 // suffix GTCATCTGTCAGAAATTGGGA 00208 cout << endl << "Total Errors found: " << errors << endl; 00209 } 00210 #endif