libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2011 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include <getopt.h> 00019 #include "Generic.h" 00020 #include <stdio.h> 00021 #include "ReferenceSequence.h" 00022 #include "UnitTest.h" 00023 00024 #include <assert.h> 00025 #include <sstream> 00026 00027 00028 class ReferenceSequenceTest : public UnitTest 00029 { 00030 public: 00031 ReferenceSequenceTest(const char *title) : UnitTest(title) {;} 00032 void test1(); 00033 void test2(); 00034 void test3(); 00035 void humanGenomeTest1(); 00036 00037 void test() { 00038 test1(); 00039 test2(); 00040 test3(); 00041 humanGenomeTest1(); 00042 } 00043 }; 00044 00045 void ReferenceSequenceTest::test1(void) 00046 { 00047 std::string sequence("ACTGACTGACTGACTGACTGACTGACTGACTGACTGACTG"); 00048 std::string word; 00049 00050 word="ACTG"; 00051 check(m_failures, ++m_testNum, "Test wordMatch with std::string", true, 00052 Sequence::wordMatch(sequence, 4, word)); 00053 00054 std::stringstream output; 00055 00056 Sequence::printNearbyWords(output, sequence, 8, word, 4); 00057 00058 std::string expect("\ 00059 word 'ACTG' found -4 away from position 8.\n\ 00060 word 'ACTG' found 0 away from position 8.\n\ 00061 "); 00062 00063 check(m_failures, ++m_testNum, "Test printNearbyWords with std::string", expect, output.str()); 00064 00065 00066 Sequence::getString(sequence, 4, 4, word); 00067 00068 check(m_failures, ++m_testNum, "Test getString with std::string", "ACTG", word); 00069 00070 Sequence::getHighLightedString(sequence, 0, 12, word, 4, 8); 00071 check(m_failures, ++m_testNum, "Test getHighLightedStribng with std::string", "ACTGactgACTG",word); 00072 00073 #if 0 00074 // busted test - don't know why 00075 output.clear(); 00076 output.str(std::string()); 00077 // Sequence::printBaseContext(std::cout, sequence, 8, 4); 00078 Sequence::printBaseContext(output, sequence, 8, 4); 00079 expect="\ 00080 index: 8\n\ 00081 ACTGACTGA\n\ 00082 ^\n\ 00083 "; 00084 check(m_failures, ++m_testNum, "Test printBaseContext with std::string", expect, output.str()); 00085 #endif 00086 std::string result; 00087 std::string read("ACTGZZZZACTG"); 00088 expect = " ^^^^ "; 00089 Sequence::getMismatchHatString(sequence, 4, result, read); 00090 check(m_failures, ++m_testNum, "Test getMismatchHatString with std::string", expect, result); 00091 00092 00093 read="ACTG"; 00094 std::string quality(""); 00095 size_t location = Sequence::simpleLocalAligner(sequence, 0, read, quality, 12); 00096 check(m_failures, ++m_testNum, "Test simpleLocalAligner with std::string", (size_t) 0, location); 00097 00098 read="ACNG"; 00099 int misMatches = Sequence::getMismatchCount(sequence, 0, read); 00100 check(m_failures, ++m_testNum, "Test getMismatchCount with std::string", 1, misMatches); 00101 00102 read="ACNG"; 00103 quality="$$$$"; 00104 int sumQ = Sequence::getSumQ(sequence, 0, read, quality); 00105 check(m_failures, ++m_testNum, "Test getSumQ with std::string", 3, sumQ); 00106 } 00107 00108 void ReferenceSequenceTest::test2(void) 00109 { 00110 PackedSequenceData sequence; 00111 std::string word; 00112 00113 sequence.push_back('A'); 00114 sequence.push_back('C'); 00115 sequence.push_back('T'); 00116 sequence.push_back('G'); 00117 00118 sequence.push_back('A'); 00119 sequence.push_back('C'); 00120 sequence.push_back('T'); 00121 sequence.push_back('G'); 00122 00123 sequence.push_back('A'); 00124 sequence.push_back('C'); 00125 sequence.push_back('T'); 00126 sequence.push_back('G'); 00127 00128 sequence.push_back('A'); 00129 sequence.push_back('C'); 00130 sequence.push_back('T'); 00131 sequence.push_back('G'); 00132 00133 Sequence::getString(sequence, 4, 4, word); 00134 00135 check(m_failures, ++m_testNum, "Test getString with PackedSequenceData", "ACTG", word); 00136 00137 std::cout << "test2 sequence utilization is " << sequence.getUtilization() * 100 << "% - expect around 6.25%" << std::endl; 00138 00139 } 00140 00141 void ReferenceSequenceTest::test3(void) 00142 { 00143 std::vector<PackedSequenceData> chromosomeSequence; 00144 std::vector<std::string> chromosomeNames; 00145 00146 bool result = loadFastaFile("../phiX.fa", chromosomeSequence, chromosomeNames); 00147 00148 if(result) { 00149 std::cout << "../phiX.fa not found - skipping these tests." << std::endl; 00150 return; 00151 } 00152 00153 std::cout << "phiX reference utilization is " << chromosomeSequence[0].getUtilization() * 100 << "% - expect around 96.8%" << std::endl; 00154 00155 00156 00157 check(m_failures, ++m_testNum, "Test loadFastaFile with PackedSequenceData", (size_t) 1, chromosomeNames.size()); 00158 check(m_failures, ++m_testNum, "Test loadFastaFile with PackedSequenceData", (size_t) 1, chromosomeSequence.size()); 00159 check(m_failures, ++m_testNum, "Test loadFastaFile with PackedSequenceData", "1", chromosomeNames[0]); 00160 00161 std::string word; 00162 00163 Sequence::getString(chromosomeSequence[0], 60, 10, word); 00164 00165 check(m_failures, ++m_testNum, "Test loadFastaFile with PackedSequenceData", "AAATTATCTT", word); 00166 00167 } 00168 00169 void ReferenceSequenceTest::humanGenomeTest1(void) 00170 { 00171 std::vector<PackedSequenceData> chromosomeSequence; 00172 std::vector<std::string> chromosomeNames; 00173 00174 #define HUMAN_GENOME "/data/local/ref/karma.ref/human.g1k.v37.fa" 00175 bool result = loadFastaFile(HUMAN_GENOME, chromosomeSequence, chromosomeNames); 00176 00177 if(result) { 00178 std::cout << HUMAN_GENOME << " not found - skipping these tests." << std::endl; 00179 return; 00180 } 00181 00182 } 00183 00184 int main(int argc, char **argv) 00185 { 00186 ReferenceSequenceTest test("ReferenceSequenceTest"); 00187 00188 test.test(); 00189 00190 std::cout << test; 00191 00192 exit(test.getFailureCount()); 00193 }