SamRecord.h
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef __SAM_RECORD_H__
00019 #define __SAM_RECORD_H__
00020
00021 #include <stdint.h>
00022
00023 #include "SamStatus.h"
00024 #include "LongHash.h"
00025 #include "MathVector.h"
00026 #include "StringArray.h"
00027 #include "IntArray.h"
00028 #include "SamFileHeader.h"
00029 #include "CigarRoller.h"
00030
00031 struct bamRecordStruct
00032 {
00033 public:
00034 int32_t myBlockSize;
00035 int32_t myReferenceID;
00036 int32_t myPosition;
00037 uint32_t myReadNameLength : 8, myMapQuality : 8, myBin : 16;
00038 uint32_t myCigarLength : 16, myFlag : 16;
00039 int32_t myReadLength;
00040 int32_t myMateReferenceID;
00041 int32_t myMatePosition;
00042 int32_t myInsertSize;
00043 char myData[1];
00044 };
00045
00046 class SamRecord
00047 {
00048 public:
00049
00050
00051 SamRecord();
00052
00053
00054
00055 SamRecord(ErrorHandler::HandlingType errorHandlingType);
00056
00057 ~SamRecord();
00058
00059
00060 void resetRecord();
00061
00062 void resetTagIter();
00063
00064
00065
00066
00067 bool isValid(SamFileHeader& header);
00068
00069
00070
00071
00072
00073
00074 bool setReadName(const char* readName);
00075 bool setFlag(uint16_t flag);
00076 bool setReferenceName(SamFileHeader& header,
00077 const char* referenceName);
00078 bool set1BasedPosition(int32_t position);
00079 bool set0BasedPosition(int32_t position);
00080 bool setMapQuality(uint8_t mapQuality);
00081 bool setCigar(const char* cigar);
00082 bool setCigar(const Cigar& cigar);
00083 bool setMateReferenceName(SamFileHeader& header,
00084 const char* mateReferenceName);
00085 bool set1BasedMatePosition(int32_t matePosition);
00086 bool set0BasedMatePosition(int32_t matePosition);
00087 bool setInsertSize(int32_t insertSize);
00088 bool setSequence(const char* seq);
00089 bool setQuality(const char* quality);
00090
00091
00092 SamStatus::Status setBufferFromFile(IFILE filePtr, SamFileHeader& header);
00093
00094
00095 SamStatus::Status setBuffer(const char* fromBuffer, uint32_t fromBufferSize,
00096 SamFileHeader& header);
00097
00098
00099
00100
00101 bool addTag(const char* tag, char vtype, const char* value);
00102
00103
00104
00105 const void* getRecordBuffer();
00106 SamStatus::Status writeRecordBuffer(IFILE filePtr);
00107 int32_t getBlockSize();
00108 const char* getReferenceName();
00109 int32_t getReferenceID();
00110 int32_t get1BasedPosition();
00111 int32_t get0BasedPosition();
00112 uint8_t getReadNameLength();
00113 uint8_t getMapQuality();
00114 uint16_t getBin();
00115 uint16_t getCigarLength();
00116 uint16_t getFlag();
00117 int32_t getReadLength();
00118
00119
00120
00121 const char* getMateReferenceName();
00122
00123
00124
00125
00126 const char* getMateReferenceNameOrEqual();
00127 int32_t getMateReferenceID();
00128 int32_t get1BasedMatePosition();
00129 int32_t get0BasedMatePosition();
00130 int32_t getInsertSize();
00131
00132
00133 int32_t get0BasedAlignmentEnd();
00134 int32_t get1BasedAlignmentEnd();
00135
00136
00137 int32_t getAlignmentLength();
00138
00139
00140 int32_t get0BasedUnclippedStart();
00141 int32_t get1BasedUnclippedStart();
00142
00143 int32_t get0BasedUnclippedEnd();
00144 int32_t get1BasedUnclippedEnd();
00145
00146 const char* getReadName();
00147 const char* getCigar();
00148 const char* getSequence();
00149 const char* getQuality();
00150
00151
00152
00153 char getSequence(int index);
00154
00155
00156
00157 char getQuality(int index);
00158
00159
00160 Cigar* getCigarInfo();
00161
00162 uint32_t getTagLength();
00163
00164
00165
00166
00167 bool getNextSamTag(char* tag, char& vtype, void** value);
00168
00169
00170 bool getFields(bamRecordStruct& recStruct, String& readName,
00171 String& cigar, String& sequence, String& quality);
00172
00173
00174 bool isIntegerType(char vtype) const;
00175 bool isDoubleType(char vtype) const;
00176 bool isCharType(char vtype) const;
00177 bool isStringType(char vtype) const;
00178
00179
00180 void clearTags();
00181
00182
00183
00184 const SamStatus& getStatus();
00185
00186
00187 String & getString(const char * tag);
00188 int & getInteger(const char * tag);
00189 double & getDouble(const char * tag);
00190
00191
00192
00193
00194
00195 bool checkString(const char * tag) { return checkTag(tag, 'Z'); }
00196 bool checkInteger(const char * tag) { return checkTag(tag, 'i'); }
00197 bool checkDouble(const char * tag) { return checkTag(tag, 'f'); }
00198 bool checkTag(const char * tag, char type);
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212 uint32_t getNumOverlaps(int32_t start, int32_t end);
00213
00214
00215 private:
00216 static int MAKEKEY(char ch1, char ch2, char type)
00217 { return (type << 16) + (ch2 << 8) + ch1; }
00218
00219
00220
00221
00222
00223 bool allocateRecordStructure(int size);
00224
00225
00226 void* getStringPtr(int offset);
00227 void* getIntegerPtr(int offset);
00228 void* getDoublePtr(int offset);
00229
00230
00231
00232 bool fixBuffer();
00233
00234
00235
00236
00237
00238 void setSequenceAndQualityFromBuffer();
00239
00240
00241
00242
00243 bool parseCigar();
00244
00245
00246
00247 bool parseCigarBinary();
00248
00249
00250
00251 bool parseCigarString();
00252
00253
00254
00255 bool setTagsFromBuffer();
00256
00257
00258
00259 bool setTagsInBuffer();
00260
00261 void setVariablesForNewBuffer(SamFileHeader& header);
00262
00263 void getVtype(int key, char& vtype) const;
00264 void getTag(int key, char* tag) const;
00265
00266 String & getString(int offset);
00267 int & getInteger(int offset);
00268 double & getDouble(int offset);
00269
00270 static const int DEFAULT_BLOCK_SIZE = 40;
00271 static const int DEFAULT_BIN = 4680;
00272 static const int DEFAULT_READ_NAME_LENGTH = 8;
00273 static const char* DEFAULT_READ_NAME;
00274 static const char* FIELD_ABSENT_STRING;
00275
00276 bamRecordStruct * myRecordPtr;
00277 int allocatedSize;
00278
00279
00280
00281 uint32_t* myCigarTempBuffer;
00282
00283
00284 int myCigarTempBufferAllocatedSize;
00285
00286
00287 int myCigarTempBufferLength;
00288
00289
00290
00291
00292
00293 bool myIsBufferSynced;
00294
00295
00296 bool myNeedToSetTagsFromBuffer;
00297
00298
00299
00300
00301 bool myNeedToSetTagsInBuffer;
00302
00303 int myTagBufferSize;
00304 int myLastTagIndex;
00305
00306 String myReadName;
00307 String myReferenceName;
00308 String myMateReferenceName;
00309 String myCigar;
00310 String mySequence;
00311 String myQuality;
00312
00313
00314 int32_t myAlignmentLength;
00315
00316 int32_t myUnclippedStartOffset;
00317 int32_t myUnclippedEndOffset;
00318
00319 CigarRoller myCigarRoller;
00320
00321 LongHash<int> extras;
00322 StringArray strings;
00323 IntArray integers;
00324 Vector doubles;
00325
00326
00327
00328
00329 bool myIsReadNameBufferValid;
00330 bool myIsCigarBufferValid;
00331 bool myIsSequenceBufferValid;
00332 bool myIsQualityBufferValid;
00333 bool myIsTagsBufferValid;
00334 bool myIsBinValid;
00335
00336 SamStatus myStatus;
00337
00338 String NOT_FOUND_TAG_STRING;
00339 int NOT_FOUND_TAG_INT;
00340 double NOT_FOUND_TAG_DOUBLE;
00341 };
00342
00343 #endif