00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #include "GreedyTupleAligner.h"
00018 #include <vector>
00019 #include <iostream>
00020 #include <sstream>
00021 #include <gtest/gtest.h>
00022
00023
00024 class MockGenomeSequence{
00025 public:
00026 MockGenomeSequence(std::string sequence) :
00027 sequence(sequence) {};
00028 char operator[] (int index) const {
00029 if (index < 0 || index >= (int)sequence.size()) {
00030 std::cerr << "exceeds boundary! at " << __FILE__ << ":" <<__LINE__ << std::endl;
00031 return 'N';
00032 }
00033 return sequence[index];
00034 }
00035 char& operator[] (int index) {
00036 if (index < 0 || index >= (int)sequence.size()) {
00037 std::cerr << "exceeds boundary! at " << __FILE__ << ":" <<__LINE__ << std::endl;
00038 return sequence[0];
00039 }
00040 return sequence[index];
00041 }
00042
00043 int getNumberBases() const{
00044 return sequence.size();
00045 }
00046 private:
00047 std::string sequence;
00048 };
00049
00050 void printRefQueryCigar(const std::vector<char>& prettyPrintReference,
00051 const std::vector<char>& prettyPrintQuery,
00052 CigarRoller& cs,
00053 std::ostream& out){
00054 out << " ref = ";
00055 for(std::vector<char>::const_iterator i=prettyPrintReference.begin(); i<prettyPrintReference.end(); i++) out << *i;
00056 out << std::endl;
00057 out <<"query = ";
00058 for(std::vector<char>::const_iterator i=prettyPrintQuery.begin(); i<prettyPrintQuery.end(); i++) out << *i;
00059 out << std::endl;
00060 out << "cigar = ";
00061 out << cs.getString();
00062 out << " ";
00063 }
00064
00065 void runAlign(const char *query, const char *ref, CigarRoller& cs, int& matchPosition) {
00066 Weight wt;
00067 MockGenomeSequence reference (ref);
00068 GreedyTupleAligner<const char *, MockGenomeSequence, int> ga(wt);
00069 ga.Align(query, strlen(query), reference, 0, strlen(ref), cs, matchPosition);
00070 }
00071
00072 void computePrettyOutput (std::vector<char>& prettyPrintReference,
00073 const char* ref,
00074 std::vector<char>& prettyPrintQuery,
00075 const char* query,
00076 const int matchPosition,
00077 const CigarRoller& expectedCigar)
00078 {
00079 const char* pRef = ref;
00080 const char* pQuery = query;
00081 for (int index = 0; index < matchPosition; index++){
00082 prettyPrintReference.push_back(*pRef++);
00083 prettyPrintQuery.push_back(' ');
00084 }
00085 for (int i = 0; i< expectedCigar.size(); i++) {
00086 switch( expectedCigar[i].operation) {
00087 case CigarRoller::mismatch:
00088 case CigarRoller::match:
00089 for (unsigned int j = 0; j < expectedCigar[i].count; j++){
00090 prettyPrintReference.push_back(*pRef++);
00091 prettyPrintQuery.push_back(*pQuery++);
00092 }
00093 break;
00094 case CigarRoller::del:
00095 for (unsigned int j = 0; j < expectedCigar[i].count; j++){
00096 prettyPrintReference.push_back(*pRef++);
00097 prettyPrintQuery.push_back(' ');
00098 }
00099 break;
00100 case CigarRoller::insert:
00101 for (unsigned int j = 0; j < expectedCigar[i].count; j++){
00102 prettyPrintReference.push_back(' ');
00103 prettyPrintQuery.push_back(*pQuery++);
00104 }
00105 break;
00106 default:
00107 break;
00108 }
00109 }
00110 while (*pRef !='\0')
00111 prettyPrintReference.push_back(*pRef++);
00112 while (*pQuery !='\0')
00113 prettyPrintReference.push_back(*pQuery++);
00114 }
00115
00116 bool verifyAlign(const char *query, const char *ref, const char *expectedCigarString, std::ostream& out = std::cout) {
00117 out.seekp(std::ios_base::beg);
00118
00119 CigarRoller cs;
00120 int matchPosition;
00121 CigarRoller expectedCigar(expectedCigarString);
00122
00123 runAlign(query, ref, cs, matchPosition);
00124
00125
00126 if (matchPosition < 0) {
00127 fprintf(stderr, "No match in %s, %d \n", __FILE__, __LINE__);
00128 return false;
00129 }
00130
00131 std::vector<char> prettyPrintReference,prettyPrintQuery;
00132 computePrettyOutput( prettyPrintReference, ref,
00133 prettyPrintQuery, query,
00134 matchPosition,
00135 expectedCigar);
00136
00137 const char *str = cs.getString();
00138
00139 if((unsigned int)expectedCigar.getExpectedQueryBaseCount() != strlen(query)) {
00140 printRefQueryCigar(prettyPrintReference, prettyPrintQuery, cs, out);
00141 out << std::endl;
00142 out << "Expected Cigar string length " << expectedCigar.getExpectedQueryBaseCount() << " does not match the length of the query " << strlen(query) << ". Please fix test case." << std::endl;
00143 return false;
00144 }
00145 if((unsigned int)cs.getExpectedQueryBaseCount() != strlen(query)) {
00146 printRefQueryCigar(prettyPrintReference, prettyPrintQuery, cs, out);
00147 out << std::endl;
00148 out << "Query Length of " << strlen(query) << " does not match computed cigar string length of " << cs.getExpectedQueryBaseCount() << std::endl;
00149 return false;
00150 }
00151 if (strcmp(expectedCigarString, str) == 0) {
00152
00153 return true;
00154 } else {
00155 printRefQueryCigar(prettyPrintReference, prettyPrintQuery, cs, out);
00156 out << "[Correct Answer = " << expectedCigarString << "] --------------------- Wrong!" << std::endl;
00157 return false;
00158 }
00159 return true;
00160 }
00161
00162 TEST(GreedyTupleAlignerTest, AlignToShortReference) {
00163 std::stringstream ss(std::stringstream::out);
00164 #if 0
00165
00166 EXPECT_TRUE( verifyAlign("12345", "123456789", "5M") ) << ss.str().c_str();
00167 EXPECT_TRUE( verifyAlign("23456", "123456789", "5M") ) << ss.str().c_str();
00168
00169 EXPECT_TRUE( verifyAlign("123B567", "123456789", "7M") ) << ss.str().c_str();
00170 EXPECT_TRUE( verifyAlign("234D678", "123456789", "7M") ) << ss.str().c_str();
00171
00172 EXPECT_TRUE( verifyAlign("123467890","1234567890", "4M1D5M") ) << ss.str().c_str();
00173 EXPECT_TRUE( verifyAlign("123467890","B1234567890B", "4M1D5M") ) << ss.str().c_str();
00174
00175 EXPECT_TRUE( verifyAlign("12345067890","1234567890", "5M1I5M") ) << ss.str().c_str();
00176 EXPECT_TRUE( verifyAlign("12345067890","BBBB1234567890BBBB", "5M1I5M") ) << ss.str().c_str();
00177
00178 EXPECT_TRUE( verifyAlign("1234", "1235", "3M1S") ) << ss.str().c_str();
00179
00180
00181 #endif
00182 EXPECT_TRUE( verifyAlign("1023456700", "123456789","1I7M2S") ) << ss.str().c_str();
00183 }
00184
00185 TEST(GreedyTupleTestAligner, AlignToLongReference) {
00186 std::stringstream ss(std::stringstream::out);
00187
00188 EXPECT_TRUE( verifyAlign("TTAGAATGCTATTGTGTTTGGAGATTTGAGGAAAGTGGGCGTGAAGACTTAGTGTTCATTTCCTCAACCTCTCTCTGTGTGAACATACGTCATCGGTCAGAAATTGGG","CCGAGATTGTGCCATTGCACTCCTGCCTGGGTAACAGAGTCAGACCCTGTCTCAAAAAAAAAAAAAAAAAAAAAAAAGATTAGGTTTTATAGATGGAAAATTCACAGCTCTCTCCAGATCAGAAATCTCCAAGAGTAAATTAGTGTCTTAAAGGGGTTGTAATAACTTTCCTATGTGACTAAGTGCATTATTAATCAATTTTTCTATGATCAAGTACTCCTTTACATACCTGCTAATACAATTTTTGATATGAAATCAGTCCTAGAGGGAATCAATGTAAGATACAGACTTGATGAGTGCTTGCAGTTTTTTATTGACAATCTGAAGAATGACTTGACTCTAAATTGCAGCTCAAGGCTTAGAATGCTATTGTGTTTGGAGATTTGAGGAAAGTGGGCGTGAAGACTTAGTGTTCATTTCCTCAACCTCTCTCTGTGTGAACATACAGGAATCAAATCTGTCTAGCCTCTCTTTTTGGCAAGGTTAAGAACAATTCCACTTCATCCTAATCCCAATGATTCCTGCCGACCCTCTTCCAAAAACTATTTAAAGACATGTTCTTCAAAGTTATATTTGTCTTTCCTTCAGGGAGAAAAAGAATACCAATCACTTATAATATGGAAACTAGCAGAAATGGGTCACATAAGTCATCTGTCAGAAATTGGGAAAATAGAGTAGGTCAGTCTTTCCAGTCATGGTACTTTTACCTTCAATCA", "88M200D20M") ) << ss.str().c_str();
00189
00190
00191
00192
00193
00194 EXPECT_TRUE( verifyAlign("GTGAAACTCCATCTCAAAAATAAGTAAATAAATAAATACATACATAGGCACAGTGCAGTTGTTAGTCAGAATTAGGTCACACTGGATTAGGGTGAGTACTTAATGCAACAGGTCTGGGG","GTGCCAGAGTTTAATTAATAGGATAAGGTTATGAGTCAGACTGTGTACCCCAAAAAAGATATGTTGAACTCCTAAGCCCCTGAACCACAGAATGGGATCCTATTCAGAAATAGGCACAGTGTCCGGGCACCATGGCTCACACTGGTAATCCCAGCACTCTGGGAGGCTGAGGTGGGTGCATCACCTGAGGTCAGGAGTTTGAGACCAGCCTGGCCAACATGGTGAAACCCCATCTCTACTAAAAATACAAACAGAACAGTTAGCCAGGTGTGGTGGTGGGCACCTGTAATCCCAGCTACTTGGGAGGCTGAGACAGGAGAATGGCTTGAACCCAGGAGGTGGAGGTTGCAGTGAGCCGAGATCGTGCCATTGCACTTCAGCCTGGGCCACAAGAGTGAAACTCCATCTCAAAAATAAGTAAATAAATAAATACATACGTAGGCACAGTGCAGTTGTTGTTAGTTAGAATTAGGTCACACTGGATTAGGGTGAGTCCTTAATCCAACAGGTCTGGTGTCCTTACAAATAGACAAATACACAGAAGGAACATGGCCACATGGAGATACAGACACACCAAAACATCATATTGAGATGTGGGCAAAGATTGGAGAGACACTTCTCCAAGTCAAGGAACATCTGGGACTACCCAGAAACTGTAAGAGGCAGAGAAAGGTCCTTCCCTGTAGGCTTTAGAGGAACATGGCCCTGCCAACATCTTGATCTTGGATTTCCAGCCTCCAGCATGTGAGACAAGTTTCTGGGTTTTTTTGGAGACAGAGTCTCACTCTTGTCACCCAGGCTGGAGTGCAGTGGCATGAACTTGGCTCACTGCAACCTCCTCCCAGGATCAAGGTATTGTCCTGCCTCAGCCTCCCGAGTAGCTGGGATGACAGGGGCCCGCCACCACGCCAGCTCATTTTTGTATTTTTTACTAGAGAAGGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGACCTCAAGTGATCCACCCGCCTTGGCCTCCCAAAGTGCTAGGATTACAGGTGTGAGCCACTGCGCCTGGCAAGTTTCTGTTGCCTTAAGCCACTCTTTCTGTGGTAATTTGTTATCATGGCCCTAAGAAATGACTAGAGAGAGAAAGCAAATCCCTTTGTTTCTGCATTTACTGAAACAGATGAATAGATTTCTAGCTCCCTTGGGGTCTGAACTTTTAAAAGAGAGATTTCTTATACATATGATAATCATGATATTGT", "63M3D56M") ) << ss.str().c_str();
00195
00196 EXPECT_TRUE( verifyAlign("ATATTGTTTTTTTCAATGCATATCAAAACAATGTTTACAATATACTACAGCCTAAGTGTGCAATAGCATTATGTGTAGAAATGCACATACCATAATTAGTTTTTTTTTTTGAAAAAACT","GTTCCAAAAGATTATATTTGTTAGGTTAGAGAATTTTAACTTATTTATATAATGGAGATTTTCTAATACTGAGAATACCTTAATTCTTATTGTAAGCCTACTTAACAGTGACAAAATGTTATTATAACGTGGTATTGAAATTAATATGATAGTATTTTATATGGATATTTGCATATGCAATTGACATATATTGTATATACAATATATAACTGTGTATTATATATTATATTTATATAATGTTATATTGTATATGAATATATTTGAATTATATGTATATACATATATATAGGCATTCATCAGAAATATTGCAGGTTTGGTTTCAGACGACTATAATAAAGTGAATATTGCAATAAAGCGAATCACAAGAAATTATTGTTTTTTTCAATGCATATCAAAACAATGTTTACAATATACTACAGCCTAAGTGTGCAATAGCATTATGTGTAGAAATGCACATACCATAATTAGTTTTTTTTTTGAAAAAACTGTTAATGATTATCTGAGCCTTCAGTGAGTTGTAATCTTTTCATGGTGGAGGATCATACCTCTACGTTGATGTCAGCTGACTGATCAGGGTAGTAGTTGCTGAAGGCTTGGGTGGCTGTGGCAATTTCTTAAAATAGGATAACAATGGCATTTACCACATTAATTGACTCCTTCTTTCACAAAAGATTTCTCTGTCTCATGCAATGCTGTTTGACAGCATTTTCCCCACAGTAGAATTTCTTTAAAAATTGG", "109M249D10M") ) << ss.str().c_str();
00197
00198 EXPECT_TRUE( verifyAlign("CCAGACTATCTCAAGCAATCAACAGATTTAATGTAAGGAGTGTCAAAATCTGAATGATGCTTTTTGCAGAAATAGAAAATCCCTTTCTAATATTTTTATATTTTTGAC","TTATCGAGGCTGGCGGATTTTGTGAGGCCAGGAGTTCAAGACCAGCCTGGCCAACATGGCAAAACTCTGTCTCTACTAAAAATACAAAAGTTAGCTGGGCATAGTGGCACATGTCTATAGTTCTAGCTATGTGGGAGGCTGAGACACGAGAATCGCTTGAACCCAGGAGGTGGAGGTTGTGGTGAGCCGAGCTCACGCCATTGCTCTCCAGCCTGGGCAACAGAGCAAGACTGTCTCAAAAACAAAAACAAAAAACACAAAAACTACAAGACTTTTATGAAATAACTTAAGGAAGATATAAATAAATGGAAAGATATCCCATGTTCCTGACTTGGAAGACTTAATTTTGTTAAGATGTCCATACTATCTCAAGCAATCAACAGATTTAATGTAAGGAGTGTCAAAATCTGAATGATGCTTTTTGCAGAAATAGAAAATCCCTTTCTAATATTTTTATGTAATCTCAAGGGACCCCAAATAGCCAAAAGAATCCTGAAAAAGTAGAATAAAGCTGGAGGACTCATGATTCCTGATTTGAAAACTTACTACCAGATACAATAATCAAAACAGTTCCGTGCTTGTCATAAAGACAAACATATAGACCAATGGAACAGAATAGAGATTACAGGGACAAATCCTCATATATATGGTCAAATGATTTTTGACCAGTGCCAAGATCATTCATGGGTGAAAAGACAATCTTTTCAATAAAAGAT", "99M200D9M") ) << ss.str().c_str();
00199
00200
00201
00202 EXPECT_FALSE( verifyAlign("ATGAGGTCAGGAGATGGAGACCATCCTGGCTAACATGGTGAAACCCCATCTCTAAAAAAAGTGTAACAGAGGTGCATACTCAAAACTACAAAAGTCTCGTGAAAGGAA","CAAGAAAAAGAAATAAAATACATTTTAGTAGGAAAGGAAGAAGTTAAATTGTCTCCATTTGGTGACAACATGAGCTTATATGCAGAAAACCTAAAGACTCTACCAAAAAAACTGCTGGAACTGATAAATGAATTCGGTGAAGTCCTAGGGTATAAAATCAATGTACAAAATAAGTGGTGTTTCTATATTCTAATAAATTATTCAAAAGGGAAATTAAGAAATCAATCCCATTTTCAATAGCAACAACAAAAAAAATGACAATGCCAAAGTATAAATTTAACCAAGAAGCTACAAGAGTCTGGGCGCAGTGGCTTATGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGTGGATCATGAGGTCAGGAGATGGAGACCATCCTGGCTAACATGGTGAAACCCCATCTCTACTAAAAAAAAATAATAATAATAATAATAATAATAATAATTAGCCGGGCGTGGTGGTAGGCATCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTAGGAGAATGGCATGAACCTGGGAGGTGGAGCTTGCAGTGAGCAGAGATCACACCCCTGCACTCCAGCCTGGGTGACAGGGCGAGACTCCGTCTCAAAAAAAAAAAAAAAAAAAGTGTAACAGAGTTGCATACTGAAAACTATAAAATTCTGATGAAAGAAAATGAAGCAACAAATAATTAATATAATAAAAAAGTCCATACTATCCAAAAT", "54M200D54M") ) << ss.str().c_str();
00203
00204 }
00205
00206 #if 0
00207
00208
00209
00210
00211
00212
00213 EXPECT_TRUE( verifyAlign("GAATCAATACGCTCGGGATGCAGCGCCTAGCCGTTGGTTTGAGAATGGTTCTCTAGAGTTATCTTCACCCTCTACCTTGTGTGGCACTATTTCTTCTATGACCTTGAC","TGGCTCAAGACCTGACCTTGTGCACGTCTTGGATGCCAGTTCTATTCCCCTCACAGGCCATATGAATCCTGTCCTTTCTGCCTCAAAATGCCCATCCAGAGCCTCTACATTGATTAGCTTTTCCCTCCCTTCCAGAAAAGTTCAAAGGCTACCTCCTCCTTGAAGCCTTCACAAATACCTTAATCTAACTGTTTATAACCCTCTGCCATCTTAGCACTGTGGAAAATACATAAACTTGGGGTTAGAACATCATCAGTTTTAATGTAAGCACCATCCTTTCTAGCTGTACAGCCCTCCTGAGCCTTAGTTCTTACATCTTGAAGATGGAACCAGCTCAACAAGCATAGGGATGTAGCAAGAATCAAGACACTGTAGATGCAGCACCTGGCCGGTGGTAAGAGCTTGGTTATCACAAGTTATCTTCACCCTCTACCTTGTGTGGCACTATTTCTTCTATGACCTTGACTGCTCTCTGCTCTGATCTGGAAGTTCGCTGGGAAAAGGTGTCCCCTTTTTTATTACCTACCGGGAGAACCATGAGTGATGCTCTACTTGTAGTATATATACCCTGAGATGATTATTCTTAAAGACTAGTTCTCATGACTTGAGAGTTTGCTCTGTGTTAGGTACCATTCTAACACTGGATGTTGACTATGTATGTTATTTAATACTTCCATCAACCCCATAATGTAGGGAGAATCATTATGCCCATTTTA", "108M") ) << ss.str().c_str();
00214 #endif
00215
00216
00217 TEST(GreedyTupleTestAligner, EquivalentAlignment) {
00218 std::stringstream ss(std::stringstream::out);
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229 EXPECT_FALSE( verifyAlign("TTTTCTTTTCAAAAATTTAAAAGTGACATACAAAATTATATGTGTATGTACAACAAAAGCTTAACTATAACACCTTGTTACATACTTTGGAATTGAAAGGCAGGAATG","CAGCACCCTAATTCACTATGCCCTAAGCTTCAAGGGCTTCAGAGTAAGCTCTCAGTGGAGTCTGATTGGAATCCCTCTTCGCCAGCTTGTGAGGTATGGGGCTAGGTTCCACAATATTCCCTTTGAGGGAGTAGATCTTCCAGCCTTCTGGGGCATGCTCTGAAAGTCCTCTTTGCAGAAGTAGCTCTTTAAAATCATATTCTCTTTCCAATTTGACCTCTTTTTTTATCCTTGTTCTGTCCATGCTGTCCAAAGCATCTTGGACTAAGTTTTGACTTTTTTTTTAAGTGCTGCATTTCCATTTGACATTTTACCTTTGTAAATTTCTATTTTTTTACCTTTGTGACTTATTAAAATATTTTCTTTTCAAAAATTTAAAAGTGACATACAAAATTATATGTGTATGTACAACAAAAGCTTAACTATAACACCTTGTTACATACTTTGCTATCCAGGCCACTGATCCTTTCTTACATAGTAAGTCAGCTATAGTTCATTAGCTTACAGTTTTTAGATACAAGTCTTAATCCATCCCTTCTCCTTTTGTATTCTTTACTTTCTGCAATATTTAAGACTTTTTGCGTTCTGACTAAAAGAAACCACCTGAAATTGGCATATGCAACTGTTCATGAATGAGAACTCGCATGGAATTGAAAGGCAGGAATGCAGCTTGACCTTAGAATGGATTTGATCCAGGAACTAGAAGGTGGGTAGGA", "87M200D21M") ) << ss.str().c_str();
00230
00231
00232 EXPECT_FALSE( verifyAlign("AACAGTTGAGAGGTACTAAAATTGAGTTTTCTTGAAAAATATATTTAATCTAAAGTACTGAAAATTTGGGGGAAAATGCTTAAGGTCATATTCCTTTTTTGAAAAGAT","TCATCTTTCTCCCATACTGGCTGTTTCCTGCCCTCAAACACTGGACTCCAAGTTCTTCAGCTTGTGGACTCTTGGACCTACAACCAGTGGTCTGCCAGGGCCCTTTGGGCCTTCGGCCACAGACTGATGGCTACACTGTCGGCTCCCCTACTTTTGAGGTTTTGTGTCTTGGACTGGCTTTCTTGCTCCTCAGCTTGCAGACAGCCTACTGTAGGACTTCACTTTGTGACTATTTGAGTCAATACTCCTTAATAAACACCCTTTCATATATACATATATCCTATTAGTCCTGTTCCTCTAGAGAACCCTAATACAGTGTTGTACATTGAAATAAATATAATTATTCTGGTTTTGGTTGAACAGTTGAGAGGTACTAAAATTGAGTTTTCTTGAAAAATATATTTAATCTAAAGTACTGAAAATTTGGGGGAAAATGCTTCTGTAAATCCTAAGTTATTATTTCTTCAACTATATTCTGTAGTTAATTTCTCCAGCAATTCTTAATTTCAGCACAAATTAGCCACTGTTTGAATTAGGAATACTGAATCGTCTCCATTGCAGTGCAGTTAATAAGTCATTTCTTGATGAAGTAGTCCATGTAGGACTTGAAATCTTGTCTTTTTCATGATACATTATCATAAGGTCATATTCCTTTTTTGAAAAGATTGATGATACTATTCTGAAAGACACTAGTAGAGTTAGGCTTGGTTTTATGA", "80M200D28M") ) << ss.str().c_str();
00233
00234 }
00235