00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <gtest/gtest.h>
00019
00020 #include "GenomeSequence.h"
00021
00022 const char* RM_BS_REFERENCE = "rm -f ../../../src/karma/test/phiX-bs.umfa";
00023 const char* RM_CS_REFERENCE = "rm -f ../../../src/karma/test/phiX-cs.umfa";
00024 const char* REFERENCE_NAME = "../../../src/karma/test/phiX.fa";
00025
00026 TEST(GenomeSequenceTest, staticLookupTest)
00027 {
00028 GenomeSequence s;
00029
00030 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'A']], 'A');
00031 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'a']], 'A');
00032 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'T']], 'T');
00033 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 't']], 'T');
00034 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'C']], 'C');
00035 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'c']], 'C');
00036 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'G']], 'G');
00037 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'g']], 'G');
00038 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'N']], 'N');
00039 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'n']], 'N');
00040 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'M']], 'M');
00041 EXPECT_EQ(GenomeSequence::int2base[GenomeSequence::base2int[(int) 'm']], 'M');
00042
00043 EXPECT_EQ(GenomeSequence::base2int[(int) 'N'], 4);
00044 EXPECT_EQ(GenomeSequence::base2int[(int) 'n'], 4);
00045 EXPECT_EQ(GenomeSequence::base2int[(int) 'A'], 0);
00046 EXPECT_EQ(GenomeSequence::base2int[(int) 'a'], 0);
00047 EXPECT_EQ(GenomeSequence::base2int[(int) 'T'], 3);
00048 EXPECT_EQ(GenomeSequence::base2int[(int) 't'], 3);
00049 EXPECT_EQ(GenomeSequence::base2int[(int) 'C'], 1);
00050 EXPECT_EQ(GenomeSequence::base2int[(int) 'c'], 1);
00051 EXPECT_EQ(GenomeSequence::base2int[(int) 'G'], 2);
00052 EXPECT_EQ(GenomeSequence::base2int[(int) 'g'], 2);
00053 }
00054
00055
00056 TEST(GenomeSequenceTest, testBaseSpaceReference)
00057 {
00058 GenomeSequence s;
00059 int exitCode = system(RM_BS_REFERENCE);
00060 EXPECT_EQ(exitCode, 0);
00061
00062 s.setReferenceName(REFERENCE_NAME);
00063 bool rc = s.create(false);
00064 EXPECT_EQ(rc, false);
00065 EXPECT_EQ(s[0], 'G');
00066 EXPECT_EQ(s[1], 'A');
00067 EXPECT_EQ(s[2], 'G');
00068 EXPECT_EQ(s[s.getNumberBases()-3], 'G');
00069 EXPECT_EQ(s[s.getNumberBases()-2], 'C');
00070 EXPECT_EQ(s[s.getNumberBases()-1], 'A');
00071 EXPECT_EQ(s[s.getNumberBases()], 'N');
00072
00073 s.close();
00074 }
00075
00076 TEST(GenomeSequenceTest, testColorSpaceReference)
00077 {
00078 GenomeSequence s;
00079 int exitCode = system(RM_CS_REFERENCE);
00080 EXPECT_EQ(exitCode, 0);
00081
00082 s.setReferenceName(REFERENCE_NAME);
00083 bool rc = s.create(true);
00084
00085
00086
00087
00088
00089 EXPECT_EQ(rc, false);
00090 EXPECT_EQ(s[0], 'N');
00091 EXPECT_EQ(s[1], '2');
00092 EXPECT_EQ(s[2], '2');
00093 EXPECT_EQ(s[s.getNumberBases()-3], '1');
00094 EXPECT_EQ(s[s.getNumberBases()-2], '3');
00095 EXPECT_EQ(s[s.getNumberBases()-1], '1');
00096 EXPECT_EQ(s[s.getNumberBases()], 'N');
00097
00098 s.close();
00099 }
00100
00101 #if 0
00102 void simplestExample(void)
00103 {
00104 GenomeSequence reference;
00105 genomeIndex_t index;
00106
00107
00108
00109
00110
00111
00112
00113 if (reference.open()) {
00114 perror("GenomeSequence::open");
00115 exit(1);
00116 }
00117
00118
00119 index = 1000000000;
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129 std::cout << "base[" << index << "] = " << reference[index] << std::endl;
00130
00131
00132
00133
00134
00135 int chr = reference.getChromosome(index);
00136 genomeIndex_t chrIndex = index - reference.getChromosomeStart(chr) + 1;
00137
00138 std::cout << "genome index " << index << " corresponds to chromosome " << chr << " position " << chrIndex << std::endl;
00139
00140
00141
00142
00143
00144 const char *chromosomeName = "5";
00145 chr = reference.getChromosome(chromosomeName);
00146 chrIndex = 100000;
00147
00148 index = reference.getChromosomeStart(chr) + chrIndex - 1;
00149
00150 std::cout << "Chromosome '" << chromosomeName << "' position " << chrIndex << " corresponds to genome index position " << index << std::endl;
00151
00152 reference.close();
00153 }
00154
00155 void testGenomeSequence(void)
00156 {
00157 GenomeSequence reference;
00158
00159 #if 0
00160 std::string referenceName = "someotherreference";
00161 if (reference.setFastaName(referenceName)) {
00162 std::cerr << "failed to open reference file "
00163 << referenceName
00164 << std::endl;
00165 exit(1);
00166 }
00167 #endif
00168
00169 std::cerr << "open and prefetch the reference genome: ";
00170
00171
00172 if (reference.open()) {
00173 exit(1);
00174 }
00175 std::cerr << "done!" << std::endl;
00176
00177
00178
00179
00180 genomeIndex_t genomeIndex;
00181 unsigned int chromosomeIndex;
00182 unsigned int chromosome;
00183 std::string chromosomeName;
00184
00185
00186
00187
00188
00189 chromosomeName = "2";
00190 chromosomeIndex = 1234567;
00191
00192 genomeIndex = reference.getGenomePosition(chromosomeName.c_str(), chromosomeIndex);
00193 assert(genomeIndex!=INVALID_GENOME_INDEX);
00194 std::cout << "Chromosome " << chromosomeName << ", index ";
00195 std::cout << chromosomeIndex << " contains base " << reference[genomeIndex];
00196 std::cout << " at genome index position " << genomeIndex << std::endl;
00197
00198
00199
00200
00201
00202
00203
00204 chromosome = reference.getChromosome(genomeIndex);
00205 unsigned int newChromosomeIndex;
00206
00207 newChromosomeIndex = genomeIndex - reference.getChromosomeStart(chromosome) + 1;
00208
00209 assert(chromosomeIndex == newChromosomeIndex);
00210
00211
00212
00213 PackedRead pr;
00214
00215 pr.set("ATCGATCG", 0);
00216 assert(pr.size()==8);
00217 assert(pr[0]==GenomeSequence::base2int[(int) 'A']);
00218 assert(pr[1]==GenomeSequence::base2int[(int) 'T']);
00219 assert(pr[2]==GenomeSequence::base2int[(int) 'C']);
00220 assert(pr[3]==GenomeSequence::base2int[(int) 'G']);
00221 pr.set("ATCGATCG", 1);
00222 assert(pr.size()==9);
00223 pr.set("", 0);
00224 assert(pr.size()==0);
00225 pr.set("", 1);
00226 assert(pr.size()==1);
00227 pr.set("", 2);
00228 assert(pr.size()==2);
00229 pr.set("", 3);
00230 assert(pr.size()==3);
00231 assert(pr[0]==GenomeSequence::base2int[(int) 'N']);
00232 assert(pr[1]==GenomeSequence::base2int[(int) 'N']);
00233 assert(pr[2]==GenomeSequence::base2int[(int) 'N']);
00234 pr.set("C", 1);
00235 assert(pr.size()==2);
00236 assert(pr[0]==GenomeSequence::base2int[(int) 'N']);
00237 assert(pr[1]==GenomeSequence::base2int[(int) 'C']);
00238
00239 }
00240
00241 #endif