libStatGen Software  1
ReferenceSequenceTest.cpp
00001 /*
00002  *  Copyright (C) 2011  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include <getopt.h>
00019 #include "Generic.h"
00020 #include <stdio.h>
00021 #include "ReferenceSequence.h"
00022 #include "UnitTest.h"
00023 
00024 #include <assert.h>
00025 #include <sstream>
00026 
00027 
00028 class ReferenceSequenceTest : public UnitTest
00029 {
00030 public:
00031     ReferenceSequenceTest(const char *title) : UnitTest(title) {;}
00032     void test1();
00033     void test2();
00034     void test3();
00035     void humanGenomeTest1();
00036 
00037     void test() {
00038         test1();
00039         test2();
00040         test3();
00041         humanGenomeTest1();
00042     }
00043 };
00044 
00045 void ReferenceSequenceTest::test1(void)
00046 {
00047     std::string sequence("ACTGACTGACTGACTGACTGACTGACTGACTGACTGACTG");
00048     std::string word;
00049 
00050     word="ACTG";
00051     check(m_failures, ++m_testNum, "Test wordMatch with std::string", true,
00052             Sequence::wordMatch(sequence, 4, word));
00053 
00054     std::stringstream output;
00055 
00056     Sequence::printNearbyWords(output, sequence, 8, word, 4);
00057 
00058     std::string expect("\
00059 word 'ACTG' found -4 away from position 8.\n\
00060 word 'ACTG' found 0 away from position 8.\n\
00061 ");
00062 
00063     check(m_failures, ++m_testNum, "Test printNearbyWords with std::string", expect, output.str());
00064 
00065 
00066     Sequence::getString(sequence, 4, 4, word);
00067 
00068     check(m_failures, ++m_testNum, "Test getString with std::string", "ACTG", word);
00069 
00070     Sequence::getHighLightedString(sequence, 0, 12, word, 4, 8);
00071     check(m_failures, ++m_testNum, "Test getHighLightedStribng with std::string", "ACTGactgACTG",word);
00072 
00073 #if 0
00074     // busted test - don't know why
00075     output.clear();
00076     output.str(std::string());
00077 //    Sequence::printBaseContext(std::cout, sequence, 8, 4);
00078     Sequence::printBaseContext(output, sequence, 8, 4);
00079     expect="\
00080 index: 8\n\
00081 ACTGACTGA\n\
00082     ^\n\
00083 ";
00084     check(m_failures, ++m_testNum, "Test printBaseContext with std::string", expect, output.str());
00085 #endif
00086     std::string result;
00087     std::string   read("ACTGZZZZACTG");
00088               expect = "    ^^^^    ";
00089     Sequence::getMismatchHatString(sequence, 4, result, read);
00090     check(m_failures, ++m_testNum, "Test getMismatchHatString with std::string", expect, result);
00091 
00092 
00093     read="ACTG";
00094     std::string quality("");
00095     size_t location = Sequence::simpleLocalAligner(sequence, 0, read, quality, 12);
00096     check(m_failures, ++m_testNum, "Test simpleLocalAligner with std::string", (size_t) 0, location);
00097 
00098     read="ACNG";
00099     int misMatches = Sequence::getMismatchCount(sequence, 0, read);
00100     check(m_failures, ++m_testNum, "Test getMismatchCount with std::string", 1, misMatches);
00101 
00102     read="ACNG";
00103     quality="$$$$";
00104     int sumQ = Sequence::getSumQ(sequence, 0, read, quality);
00105     check(m_failures, ++m_testNum, "Test getSumQ with std::string", 3, sumQ);
00106 }
00107 
00108 void ReferenceSequenceTest::test2(void)
00109 {
00110     PackedSequenceData sequence;
00111     std::string word;
00112 
00113     sequence.push_back('A');
00114     sequence.push_back('C');
00115     sequence.push_back('T');
00116     sequence.push_back('G');
00117 
00118     sequence.push_back('A');
00119     sequence.push_back('C');
00120     sequence.push_back('T');
00121     sequence.push_back('G');
00122 
00123     sequence.push_back('A');
00124     sequence.push_back('C');
00125     sequence.push_back('T');
00126     sequence.push_back('G');
00127 
00128     sequence.push_back('A');
00129     sequence.push_back('C');
00130     sequence.push_back('T');
00131     sequence.push_back('G');
00132 
00133     Sequence::getString(sequence, 4, 4, word);
00134 
00135     check(m_failures, ++m_testNum, "Test getString with PackedSequenceData", "ACTG", word);
00136     
00137     std::cout << "test2 sequence utilization is " << sequence.getUtilization() * 100 << "% - expect around 6.25%" << std::endl;
00138 
00139 }
00140 
00141 void ReferenceSequenceTest::test3(void)
00142 {
00143     std::vector<PackedSequenceData> chromosomeSequence;
00144     std::vector<std::string> chromosomeNames;
00145 
00146     bool result = loadFastaFile("../phiX.fa", chromosomeSequence, chromosomeNames);
00147 
00148     if(result) {
00149         std::cout << "../phiX.fa not found - skipping these tests." << std::endl;
00150         return;
00151     }
00152 
00153     std::cout << "phiX reference utilization is " << chromosomeSequence[0].getUtilization() * 100 << "% - expect around 96.8%" << std::endl;
00154 
00155 
00156 
00157     check(m_failures, ++m_testNum, "Test loadFastaFile with PackedSequenceData", (size_t) 1, chromosomeNames.size());
00158     check(m_failures, ++m_testNum, "Test loadFastaFile with PackedSequenceData", (size_t) 1, chromosomeSequence.size());
00159     check(m_failures, ++m_testNum, "Test loadFastaFile with PackedSequenceData", "1", chromosomeNames[0]);
00160 
00161     std::string word;
00162 
00163     Sequence::getString(chromosomeSequence[0], 60, 10, word);
00164 
00165     check(m_failures, ++m_testNum, "Test loadFastaFile with PackedSequenceData", "AAATTATCTT", word);
00166 
00167 }
00168 
00169 void ReferenceSequenceTest::humanGenomeTest1(void)
00170 {
00171     std::vector<PackedSequenceData> chromosomeSequence;
00172     std::vector<std::string> chromosomeNames;
00173 
00174 #define HUMAN_GENOME "/data/local/ref/karma.ref/human.g1k.v37.fa"
00175     bool result = loadFastaFile(HUMAN_GENOME, chromosomeSequence, chromosomeNames);
00176 
00177     if(result) {
00178         std::cout << HUMAN_GENOME << " not found - skipping these tests." << std::endl;
00179         return;
00180     }
00181 
00182 }
00183 
00184 int main(int argc, char **argv)
00185 {
00186     ReferenceSequenceTest test("ReferenceSequenceTest");
00187 
00188     test.test();
00189 
00190     std::cout << test;
00191 
00192     exit(test.getFailureCount());
00193 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends