00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <gtest/gtest.h>
00019
00020 #include "GenomeSequence.h"
00021
00022 TEST(GenomeSequenceTest, staticLookupTest)
00023 {
00024 GenomeSequence s;
00025
00026 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'A']], 'A');
00027 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'a']], 'A');
00028 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'T']], 'T');
00029 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 't']], 'T');
00030 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'C']], 'C');
00031 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'c']], 'C');
00032 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'G']], 'G');
00033 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'g']], 'G');
00034 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'N']], 'N');
00035 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'n']], 'N');
00036 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'M']], 'M');
00037 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'm']], 'M');
00038
00039 EXPECT_EQ(GenomeSequence::base2int[(int) 'N'], 4);
00040 EXPECT_EQ(GenomeSequence::base2int[(int) 'n'], 4);
00041 EXPECT_EQ(GenomeSequence::base2int[(int) 'A'], 0);
00042 EXPECT_EQ(GenomeSequence::base2int[(int) 'a'], 0);
00043 EXPECT_EQ(GenomeSequence::base2int[(int) 'T'], 3);
00044 EXPECT_EQ(GenomeSequence::base2int[(int) 't'], 3);
00045 EXPECT_EQ(GenomeSequence::base2int[(int) 'C'], 1);
00046 EXPECT_EQ(GenomeSequence::base2int[(int) 'c'], 1);
00047 EXPECT_EQ(GenomeSequence::base2int[(int) 'G'], 2);
00048 EXPECT_EQ(GenomeSequence::base2int[(int) 'g'], 2);
00049 }
00050
00051 TEST(GenomeSequenceTest, testBaseSpaceReference)
00052 {
00053 GenomeSequence s;
00054 int exitCode = system("rm -f ../../testdata/phiX-bs.umfa");
00055 EXPECT_EQ(exitCode, 0);
00056
00057 s.setReferenceName("../../testdata/phiX.fa");
00058 bool rc = s.create(false);
00059 EXPECT_EQ(rc, false);
00060 EXPECT_EQ(s[0], 'G');
00061 EXPECT_EQ(s[1], 'A');
00062 EXPECT_EQ(s[2], 'G');
00063 EXPECT_EQ(s[s.getNumberBases()-3], 'G');
00064 EXPECT_EQ(s[s.getNumberBases()-2], 'C');
00065 EXPECT_EQ(s[s.getNumberBases()-1], 'A');
00066 EXPECT_EQ(s[s.getNumberBases()], 'N');
00067
00068 s.close();
00069 }
00070
00071 TEST(GenomeSequenceTest, testColorSpaceReference)
00072 {
00073 GenomeSequence s;
00074 int exitCode = system("rm -f ../../testdata/phiX-cs.umfa");
00075 EXPECT_EQ(exitCode, 0);
00076
00077 s.setReferenceName("../../testdata/phiX.fa");
00078 bool rc = s.create(true);
00079
00080
00081
00082
00083
00084 EXPECT_EQ(rc, false);
00085 EXPECT_EQ(s[0], 'N');
00086 EXPECT_EQ(s[1], '2');
00087 EXPECT_EQ(s[2], '2');
00088 EXPECT_EQ(s[s.getNumberBases()-3], '1');
00089 EXPECT_EQ(s[s.getNumberBases()-2], '3');
00090 EXPECT_EQ(s[s.getNumberBases()-1], '1');
00091 EXPECT_EQ(s[s.getNumberBases()], 'N');
00092
00093 s.close();
00094 }
00095
00096 #if 0
00097 void simplestExample(void)
00098 {
00099 GenomeSequence reference;
00100 genomeIndex_t index;
00101
00102
00103
00104
00105
00106
00107
00108 if (reference.open()) {
00109 perror("GenomeSequence::open");
00110 exit(1);
00111 }
00112
00113
00114 index = 1000000000;
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124 std::cout << "base[" << index << "] = " << reference[index] << std::endl;
00125
00126
00127
00128
00129
00130 int chr = reference.getChromosome(index);
00131 genomeIndex_t chrIndex = index - reference.getChromosomeStart(chr) + 1;
00132
00133 std::cout << "genome index " << index << " corresponds to chromosome " << chr << " position " << chrIndex << std::endl;
00134
00135
00136
00137
00138
00139 const char *chromosomeName = "5";
00140 chr = reference.getChromosome(chromosomeName);
00141 chrIndex = 100000;
00142
00143 index = reference.getChromosomeStart(chr) + chrIndex - 1;
00144
00145 std::cout << "Chromosome '" << chromosomeName << "' position " << chrIndex << " corresponds to genome index position " << index << std::endl;
00146
00147 reference.close();
00148 }
00149
00150 void testGenomeSequence(void)
00151 {
00152 GenomeSequence reference;
00153
00154 #if 0
00155 std::string referenceName = "someotherreference";
00156 if (reference.setFastaName(referenceName)) {
00157 std::cerr << "failed to open reference file "
00158 << referenceName
00159 << std::endl;
00160 exit(1);
00161 }
00162 #endif
00163
00164 std::cerr << "open and prefetch the reference genome: ";
00165
00166
00167 if (reference.open()) {
00168 exit(1);
00169 }
00170 std::cerr << "done!" << std::endl;
00171
00172
00173
00174
00175 genomeIndex_t genomeIndex;
00176 unsigned int chromosomeIndex;
00177 unsigned int chromosome;
00178 std::string chromosomeName;
00179
00180
00181
00182
00183
00184 chromosomeName = "2";
00185 chromosomeIndex = 1234567;
00186
00187 genomeIndex = reference.getGenomePosition(chromosomeName.c_str(), chromosomeIndex);
00188 assert(genomeIndex!=INVALID_GENOME_INDEX);
00189 std::cout << "Chromosome " << chromosomeName << ", index ";
00190 std::cout << chromosomeIndex << " contains base " << reference[genomeIndex];
00191 std::cout << " at genome index position " << genomeIndex << std::endl;
00192
00193
00194
00195
00196
00197
00198
00199 chromosome = reference.getChromosome(genomeIndex);
00200 unsigned int newChromosomeIndex;
00201
00202 newChromosomeIndex = genomeIndex - reference.getChromosomeStart(chromosome) + 1;
00203
00204 assert(chromosomeIndex == newChromosomeIndex);
00205
00206
00207
00208 PackedRead pr;
00209
00210 pr.set("ATCGATCG", 0);
00211 assert(pr.size()==8);
00212 assert(pr[0]==GenomeSequence::base2int[(int) 'A']);
00213 assert(pr[1]==GenomeSequence::base2int[(int) 'T']);
00214 assert(pr[2]==GenomeSequence::base2int[(int) 'C']);
00215 assert(pr[3]==GenomeSequence::base2int[(int) 'G']);
00216 pr.set("ATCGATCG", 1);
00217 assert(pr.size()==9);
00218 pr.set("", 0);
00219 assert(pr.size()==0);
00220 pr.set("", 1);
00221 assert(pr.size()==1);
00222 pr.set("", 2);
00223 assert(pr.size()==2);
00224 pr.set("", 3);
00225 assert(pr.size()==3);
00226 assert(pr[0]==GenomeSequence::base2int[(int) 'N']);
00227 assert(pr[1]==GenomeSequence::base2int[(int) 'N']);
00228 assert(pr[2]==GenomeSequence::base2int[(int) 'N']);
00229 pr.set("C", 1);
00230 assert(pr.size()==2);
00231 assert(pr[0]==GenomeSequence::base2int[(int) 'N']);
00232 assert(pr[1]==GenomeSequence::base2int[(int) 'C']);
00233
00234 }
00235
00236 #endif