GenomeSequenceHelpers.h
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef _GENOME_SEQUENCE_HELPERS_H
00019 #define _GENOME_SEQUENCE_HELPERS_H
00020
00021 #if !defined(MD5_DIGEST_LENGTH)
00022 #define MD5_DIGEST_LENGTH 16
00023 #endif
00024
00025 #include "MemoryMapArray.h"
00026
00027 #include <stdint.h>
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 struct ChromosomeInfo
00041 {
00042 static const int MAX_GENOME_INFO_STRING=128;
00043
00044 void constructorClear()
00045 {
00046 memset(this,0, sizeof(*this));
00047 }
00048 void setChromosomeName(const char *n)
00049 {
00050 strncpy(name, n, sizeof(name)-1);
00051 }
00052 genomeIndex_t start;
00053 genomeIndex_t size;
00054 char md5[2*MD5_DIGEST_LENGTH + 1];
00055 char name[MAX_GENOME_INFO_STRING];
00056 char assemblyID[MAX_GENOME_INFO_STRING];
00057 char uri[MAX_GENOME_INFO_STRING];
00058 char species[MAX_GENOME_INFO_STRING];
00059
00060
00061 void setAssemblyID(const char *newID)
00062 {
00063 strncpy(assemblyID, newID, sizeof(assemblyID) - 1);
00064 }
00065 void setSpecies(const char *newSpecies)
00066 {
00067 strncpy(species, newSpecies, sizeof(newSpecies));
00068 }
00069 void setURI(const char *newURI)
00070 {
00071 strncpy(uri, newURI, sizeof(uri));
00072 }
00073 };
00074
00075 class genomeSequenceMmapHeader : public MemoryMapArrayHeader
00076 {
00077 friend class GenomeSequence;
00078 friend std::ostream &operator << (std::ostream &, genomeSequenceMmapHeader &);
00079 private:
00080 uint32_t _chromosomeCount;
00081 bool _colorSpace;
00082
00083 ChromosomeInfo _chromosomes[0];
00084
00085 public:
00086
00087
00088
00089
00090
00091 static size_t getHeaderSize(int chromosomeCount)
00092 {
00093 return sizeof(genomeSequenceMmapHeader) + sizeof(ChromosomeInfo[1]) * chromosomeCount;
00094 }
00095
00096
00097
00098
00099 };
00100
00101 std::ostream &operator << (std::ostream &stream, genomeSequenceMmapHeader &h);
00102
00103
00104
00105
00106
00107
00108
00109 inline uint8_t genomeSequenceAccess(void *base, genomeIndex_t index)
00110 {
00111 if ((index&1)==0)
00112 {
00113 return ((uint8_t *) base)[index>>1] & 0xf;
00114 }
00115 else
00116 {
00117 return (((uint8_t *) base)[index>>1] & 0xf0) >> 4;
00118 }
00119 };
00120 inline void genomeSequenceSet(void *base, genomeIndex_t index, uint8_t v)
00121 {
00122 if ((index&1)==0)
00123 {
00124 ((uint8_t *) base)[index>>1] = (((uint8_t *) base)[index>>1] & 0xf0) | v;
00125 }
00126 else
00127 {
00128 ((uint8_t *) base)[index>>1] = (((uint8_t *) base)[index>>1] & 0x0f) | v<<4;
00129 }
00130 }
00131
00132 inline size_t mmapGenomeSequenceElementCount2Bytes(genomeIndex_t i)
00133 {
00134 return sizeof(uint8_t) * i / 2;
00135 }
00136
00137 class PackedRead
00138 {
00139 void set(int index, int val)
00140 {
00141 packedBases[index>>1] =
00142 (packedBases[index>>1]
00143 & ~(7<<((index&0x01)<<2)))
00144 | ((val&0x0f)<<((index&0x1)<<2));
00145 }
00146 public:
00147 std::vector<uint8_t> packedBases;
00148 uint32_t length;
00149 int size()
00150 {
00151 return length;
00152 }
00153 void clear()
00154 {
00155 packedBases.clear();
00156 length=0;
00157 }
00158 void set(const char *rhs, int padWithNCount = 0);
00159 uint8_t operator [](int index)
00160 {
00161 return (packedBases[index>>1] >> ((index&0x1)<<2)) & 0xf;
00162 }
00163 };
00164
00165 #endif