SamRecord.cpp

00001 /*
00002  *  Copyright (C) 2010-2011  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include <stdlib.h>
00019 #include <limits>
00020 #include <stdexcept>
00021 
00022 #include "bam.h"
00023 
00024 #include "SamRecord.h"
00025 #include "SamValidation.h"
00026 
00027 #include "BaseUtilities.h"
00028 #include "SamQuerySeqWithRefHelper.h"
00029 
00030 const char* SamRecord::DEFAULT_READ_NAME = "UNKNOWN";
00031 const char* SamRecord::FIELD_ABSENT_STRING = "=";
00032 
00033 
00034 SamRecord::SamRecord()
00035     : myStatus(),
00036       myRefPtr(NULL),
00037       mySequenceTranslation(NONE)
00038 {
00039     int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00040 
00041     myRecordPtr = 
00042         (bamRecordStruct *) malloc(defaultAllocSize);
00043 
00044     myCigarTempBuffer = NULL;
00045     myCigarTempBufferAllocatedSize = 0;
00046 
00047     allocatedSize = defaultAllocSize;
00048 
00049     resetRecord();
00050 }
00051 
00052 
00053 SamRecord::SamRecord(ErrorHandler::HandlingType errorHandlingType)
00054     : myStatus(errorHandlingType),
00055       myRefPtr(NULL),
00056       mySequenceTranslation(NONE)
00057 {
00058     int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00059 
00060     myRecordPtr = 
00061         (bamRecordStruct *) malloc(defaultAllocSize);
00062 
00063     myCigarTempBuffer = NULL;
00064     myCigarTempBufferAllocatedSize = 0;
00065 
00066     allocatedSize = defaultAllocSize;
00067 
00068     resetRecord();
00069 }
00070 
00071 
00072 SamRecord::~SamRecord()
00073 {
00074     resetRecord();
00075 
00076     if(myRecordPtr != NULL)
00077     {
00078         free(myRecordPtr);
00079         myRecordPtr = NULL;
00080     }
00081     if(myCigarTempBuffer != NULL)
00082     {
00083         free(myCigarTempBuffer);
00084         myCigarTempBuffer = NULL;
00085         myCigarTempBufferAllocatedSize = 0;
00086     }
00087 }
00088 
00089 
00090 // Resets the fields of the record to a default value.
00091 void SamRecord::resetRecord()
00092 {
00093     myIsBufferSynced = true;
00094 
00095     myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE;
00096     myRecordPtr->myReferenceID = -1;
00097     myRecordPtr->myPosition = -1;
00098     myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00099     myRecordPtr->myMapQuality = 0;
00100     myRecordPtr->myBin = DEFAULT_BIN;
00101     myRecordPtr->myCigarLength = 0;
00102     myRecordPtr->myFlag = 0;
00103     myRecordPtr->myReadLength = 0;
00104     myRecordPtr->myMateReferenceID = -1;
00105     myRecordPtr->myMatePosition = -1;
00106     myRecordPtr->myInsertSize = 0;
00107    
00108     // Set the sam values for the variable length fields.
00109     // TODO - one way to speed this up might be to not set to "*" and just
00110     // clear them, and write out a '*' for SAM if it is empty.
00111     myReadName = DEFAULT_READ_NAME;
00112     myReferenceName = "*";
00113     myMateReferenceName = "*";
00114     myCigar = "*";
00115     mySequence = "*";
00116     mySeqWithEq.clear();
00117     mySeqWithoutEq.clear();
00118     myQuality = "*";
00119     myNeedToSetTagsFromBuffer = false;
00120     myNeedToSetTagsInBuffer = false;
00121 
00122     // Initialize the calculated alignment info to the uncalculated value.
00123     myAlignmentLength = -1;
00124     myUnclippedStartOffset = -1;
00125     myUnclippedEndOffset = -1;
00126 
00127     clearTags();
00128 
00129     // Set the bam values for the variable length fields.
00130     // Only the read name needs to be set, the others are a length of 0.
00131     // Set the read name.  The min size of myRecordPtr includes the size for
00132     // the default read name.
00133     memcpy(&(myRecordPtr->myData), myReadName.c_str(), 
00134            myRecordPtr->myReadNameLength);
00135 
00136     // Set that the variable length buffer fields are valid.
00137     myIsReadNameBufferValid = true;
00138     myIsCigarBufferValid = true;
00139     myIsSequenceBufferValid = true;
00140     myBufferSequenceTranslation = NONE;
00141     myIsQualityBufferValid = true;
00142     myIsTagsBufferValid = true;
00143     myIsBinValid = true;
00144 
00145     myCigarTempBufferLength = -1;
00146 
00147     myStatus = SamStatus::SUCCESS;
00148 
00149     NOT_FOUND_TAG_STRING = "";
00150     NOT_FOUND_TAG_INT = -1;
00151     NOT_FOUND_TAG_DOUBLE = -1;
00152 }
00153 
00154 
00155 // Returns whether or not the record is valid.
00156 // Header is needed to perform some validation against it.
00157 bool SamRecord::isValid(SamFileHeader& header)
00158 {
00159     myStatus = SamStatus::SUCCESS;
00160     SamValidationErrors invalidSamErrors;
00161     if(!SamValidator::isValid(header, *this, invalidSamErrors))
00162     {
00163         // The record is not valid.
00164         std::string errorMessage = "";
00165         invalidSamErrors.getErrorString(errorMessage);
00166         myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str());
00167         return(false);
00168     }
00169     // The record is valid.
00170     return(true);
00171 }
00172 
00173 
00174 void SamRecord::setReference(GenomeSequence* reference)
00175 {
00176     myRefPtr = reference;
00177 }
00178 
00179 
00180 // Set the type of sequence translation to use when getting
00181 // the sequence.  The default type (if this method is never called) is
00182 // NONE (the sequence is left as-is).  This is used 
00183 void SamRecord::setSequenceTranslation(SequenceTranslation translation)
00184 {
00185     mySequenceTranslation = translation;
00186 }
00187 
00188 
00189 bool SamRecord::setReadName(const char* readName) 
00190 {
00191     myReadName = readName;
00192     myIsBufferSynced = false;
00193     myIsReadNameBufferValid = false;
00194     myStatus = SamStatus::SUCCESS;
00195 
00196     // The read name must at least have some length, otherwise this is a parsing
00197     // error.
00198     if(myReadName.Length() == 0)
00199     {
00200         // Invalid - reset ReadName return false.
00201         myReadName = DEFAULT_READ_NAME;
00202         myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00203         myStatus.setStatus(SamStatus::INVALID, "0 length Query Name.");
00204         return(false);
00205     }
00206 
00207     return true;
00208 }
00209 
00210 
00211 bool SamRecord::setFlag(uint16_t flag)
00212 {
00213     myStatus = SamStatus::SUCCESS;
00214     myRecordPtr->myFlag = flag;
00215     return true;
00216 }
00217 
00218 
00219 bool SamRecord::setReferenceName(SamFileHeader& header,
00220                                  const char* referenceName) 
00221 {
00222     myStatus = SamStatus::SUCCESS;
00223 
00224     myReferenceName = referenceName;
00225     // If the reference ID does not already exist, add it (pass true)
00226     myRecordPtr->myReferenceID = header.getReferenceID(referenceName, true);
00227 
00228     return true;
00229 }
00230 
00231 
00232 bool SamRecord::set1BasedPosition(int32_t position) 
00233 {
00234     return(set0BasedPosition(position - 1));
00235 }
00236 
00237 
00238 bool SamRecord::set0BasedPosition(int32_t position)
00239 {
00240     myStatus = SamStatus::SUCCESS;
00241     myRecordPtr->myPosition = position;
00242     myIsBinValid = false;
00243     return true;
00244 }
00245 
00246 
00247 bool SamRecord::setMapQuality(uint8_t mapQuality)
00248 {
00249     myStatus = SamStatus::SUCCESS;
00250     myRecordPtr->myMapQuality = mapQuality;
00251     return true;
00252 }
00253 
00254 
00255 bool SamRecord::setCigar(const char* cigar)
00256 {
00257     myStatus = SamStatus::SUCCESS;
00258     myCigar = cigar;
00259  
00260     myIsBufferSynced = false;
00261     myIsCigarBufferValid = false;
00262     myCigarTempBufferLength = -1;
00263     myIsBinValid = false;
00264 
00265     // Initialize the calculated alignment info to the uncalculated value.
00266     myAlignmentLength = -1;
00267     myUnclippedStartOffset = -1;
00268     myUnclippedEndOffset = -1;
00269 
00270     return true;
00271 }
00272 
00273 
00274 bool SamRecord::setCigar(const Cigar& cigar)
00275 {
00276     myStatus = SamStatus::SUCCESS;
00277     cigar.getCigarString(myCigar);
00278  
00279     myIsBufferSynced = false;
00280     myIsCigarBufferValid = false;
00281     myCigarTempBufferLength = -1;
00282     myIsBinValid = false;
00283 
00284     // Initialize the calculated alignment info to the uncalculated value.
00285     myAlignmentLength = -1;
00286     myUnclippedStartOffset = -1;
00287     myUnclippedEndOffset = -1;
00288 
00289     return true;
00290 }
00291 
00292 
00293 bool SamRecord::setMateReferenceName(SamFileHeader& header,
00294                                      const char* mateReferenceName) 
00295 {
00296     myStatus = SamStatus::SUCCESS;
00297     // Set the mate reference, if it is "=", set it to be equal
00298     // to myReferenceName.  This assumes that myReferenceName has already
00299     // been called.
00300     if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0)
00301     {
00302         myMateReferenceName = myReferenceName;
00303     }
00304     else
00305     {
00306         myMateReferenceName = mateReferenceName;
00307     }
00308 
00309     // Set the Mate Reference ID.
00310     // If the reference ID does not already exist, add it (pass true)
00311     myRecordPtr->myMateReferenceID = 
00312         header.getReferenceID(myMateReferenceName, true);
00313 
00314     return true;
00315 }
00316 
00317 
00318 bool SamRecord::set1BasedMatePosition(int32_t matePosition)
00319 {
00320     return(set0BasedMatePosition(matePosition - 1));
00321 }
00322 
00323 
00324 bool SamRecord::set0BasedMatePosition(int32_t matePosition)
00325 {
00326     myStatus = SamStatus::SUCCESS;
00327     myRecordPtr->myMatePosition = matePosition;
00328     return true;
00329 }
00330 
00331 
00332 bool SamRecord::setInsertSize(int32_t insertSize)
00333 {
00334     myStatus = SamStatus::SUCCESS;
00335     myRecordPtr->myInsertSize = insertSize;
00336     return true;
00337 }
00338 
00339 
00340 bool SamRecord::setSequence(const char* seq) 
00341 {
00342     myStatus = SamStatus::SUCCESS;
00343     mySequence = seq;
00344     mySeqWithEq.clear();
00345     mySeqWithoutEq.clear();
00346    
00347     myIsBufferSynced = false;
00348     myIsSequenceBufferValid = false;
00349     return true;
00350 }
00351 
00352 
00353 bool SamRecord::setQuality(const char* quality) 
00354 {
00355     myStatus = SamStatus::SUCCESS;
00356     myQuality = quality;
00357     myIsBufferSynced = false;
00358     myIsQualityBufferValid = false;
00359     return true;
00360 }
00361 
00362 
00363 //Shift indels to the left
00364 bool SamRecord::shiftIndelsLeft()
00365 {
00366     // Check to see whether or not the Cigar has already been
00367     // set - this is determined by checking if alignment length
00368     // is set since alignment length and the cigar are set
00369     // at the same time.
00370     if(myAlignmentLength == -1)
00371     {
00372         // Not been set, so calculate it.
00373         parseCigar();
00374     }
00375     
00376     // Track whether or not there was a shift.
00377     bool shifted = false;
00378 
00379     // Cigar is set, so now myCigarRoller can be used.
00380     // Track where in the read we are.
00381     uint32_t currentPos = 0;
00382 
00383     // Since the loop starts at 1 because the first operation can't be shifted,
00384     // increment the currentPos past the first operation.
00385     if(Cigar::foundInQuery(myCigarRoller[0]))
00386     {
00387         // This op was found in the read, increment the current position.
00388         currentPos += myCigarRoller[0].count;
00389     }
00390    
00391     int numOps = myCigarRoller.size();
00392     
00393     // Loop through the cigar operations from the 2nd operation since
00394     // the first operation is already on the end and can't shift.
00395     for(int currentOp = 1; currentOp < numOps; currentOp++)
00396     {
00397         if(myCigarRoller[currentOp].operation == Cigar::insert)
00398         {
00399             // For now, only shift a max of 1 operation.
00400             int prevOpIndex = currentOp-1;
00401             // Track the next op for seeing if it is the same as the
00402             // previous for merging reasons.
00403             int nextOpIndex = currentOp+1;
00404             if(nextOpIndex == numOps)
00405             {
00406                 // There is no next op, so set it equal to the current one.
00407                 nextOpIndex = currentOp;
00408             }
00409             // The start of the previous operation, so we know when we hit it
00410             // so we don't shift past it.
00411             uint32_t prevOpStart = 
00412                 currentPos - myCigarRoller[prevOpIndex].count;
00413 
00414             // We can only shift if the previous operation
00415             if(!Cigar::isMatchOrMismatch(myCigarRoller[prevOpIndex]))
00416             {
00417                 // TODO - shift past pads
00418                 // An insert is in the read, so increment the position.
00419                 currentPos += myCigarRoller[currentOp].count;                 
00420                 // Not a match/mismatch, so can't shift into it.
00421                 continue;
00422             }
00423                     
00424             // It is a match or mismatch, so check to see if we can
00425             // shift into it.
00426 
00427             // The end of the insert is calculated by adding the size
00428             // of this insert minus 1 to the start of the insert.
00429             uint32_t insertEndPos = 
00430                 currentPos + myCigarRoller[currentOp].count - 1;
00431                 
00432             // The insert starts at the current position.
00433             uint32_t insertStartPos = currentPos;
00434                 
00435             // Loop as long as the position before the insert start
00436             // matches the last character in the insert. If they match,
00437             // the insert can be shifted one index left because the
00438             // implied reference will not change.  If they do not match,
00439             // we can't shift because the implied reference would change.
00440             // Stop loop when insertStartPos = prevOpStart, because we 
00441             // don't want to move past that.
00442             while((insertStartPos > prevOpStart) && 
00443                   (getSequence(insertEndPos,BASES) == 
00444                    getSequence(insertStartPos - 1, BASES)))
00445             {
00446                 // We can shift, so move the insert start & end one left.
00447                 --insertEndPos;
00448                 --insertStartPos;
00449             }
00450 
00451             // Determine if a shift has occurred.
00452             int shiftLen = currentPos - insertStartPos;
00453             if(shiftLen > 0)
00454             {
00455                 // Shift occured, so adjust the cigar if the cigar will
00456                 // not become more operations.
00457                 // If the next operation is the same as the previous or
00458                 // if the insert and the previous operation switch positions
00459                 // then the cigar has the same number of operations.
00460                 // If the next operation is different, and the shift splits
00461                 // the previous operation in 2, then the cigar would
00462                 // become longer, so we do not want to shift.
00463                 if(myCigarRoller[nextOpIndex].operation == 
00464                    myCigarRoller[prevOpIndex].operation)
00465                 {
00466                     // The operations are the same, so merge them by adding
00467                     // the length of the shift to the next operation.
00468                     myCigarRoller.IncrementCount(nextOpIndex, shiftLen);
00469                     myCigarRoller.IncrementCount(prevOpIndex, -shiftLen);
00470 
00471                     // If the previous op length is 0, just remove that
00472                     // operation.
00473                     if(myCigarRoller[prevOpIndex].count == 0)
00474                     {
00475                         myCigarRoller.Remove(prevOpIndex);
00476                     }
00477                     shifted = true;
00478                 } 
00479                 else
00480                 {
00481                     // Can only shift if the insert shifts past the
00482                     // entire previous operation, otherwise an operation
00483                     // would need to be added.
00484                     if(insertStartPos == prevOpStart)
00485                     { 
00486                         // Swap the positions of the insert and the
00487                         // previous operation.
00488                         myCigarRoller.Update(currentOp,
00489                                              myCigarRoller[prevOpIndex].operation,
00490                                              myCigarRoller[prevOpIndex].count);
00491                         // Size of the previous op is the entire
00492                         // shift length.
00493                         myCigarRoller.Update(prevOpIndex, 
00494                                              Cigar::insert,
00495                                              shiftLen);
00496                         shifted = true;
00497                     }
00498                 }
00499             }
00500             // An insert is in the read, so increment the position.
00501             currentPos += myCigarRoller[currentOp].count;                 
00502         }
00503         else if(Cigar::foundInQuery(myCigarRoller[currentOp]))
00504         {
00505             // This op was found in the read, increment the current position.
00506             currentPos += myCigarRoller[currentOp].count;
00507         }
00508     }
00509     if(shifted)
00510     {
00511         // TODO - setCigar is currently inefficient because later the cigar
00512         // roller will be recalculated, but for now it will work.
00513         setCigar(myCigarRoller);
00514     }
00515     return(shifted);
00516 }
00517 
00518 
00519 // Set the BAM record from the passeed in buffer of the specified size.
00520 // Note: The size includes the block size.
00521 SamStatus::Status SamRecord::setBuffer(const char* fromBuffer,
00522                                        uint32_t fromBufferSize,
00523                                        SamFileHeader& header)
00524 {
00525     myStatus = SamStatus::SUCCESS;
00526     if((fromBuffer == NULL) || (fromBufferSize == 0))
00527     {
00528         // Buffer is empty.
00529         myStatus.setStatus(SamStatus::FAIL_PARSE,
00530                            "Cannot parse an empty file.");
00531         return(SamStatus::FAIL_PARSE);
00532     }
00533 
00534     // Clear the record.   
00535     resetRecord();
00536 
00537     // allocate space for the record size.
00538     if(!allocateRecordStructure(fromBufferSize))
00539     {
00540         // Failed to allocate space.
00541         return(SamStatus::FAIL_MEM);
00542     }
00543    
00544     memcpy(myRecordPtr, fromBuffer, fromBufferSize);
00545 
00546     setVariablesForNewBuffer(header);
00547 
00548     // Return the status of the record.
00549     return(SamStatus::SUCCESS);
00550 }
00551 
00552 
00553 // Read the BAM record from a file.
00554 SamStatus::Status SamRecord::setBufferFromFile(IFILE filePtr, 
00555                                                SamFileHeader& header)
00556 {
00557     myStatus = SamStatus::SUCCESS;
00558     if((filePtr == NULL) || (filePtr->isOpen() == false))
00559     {
00560         // File is not open, return failure.
00561         myStatus.setStatus(SamStatus::FAIL_ORDER, 
00562                            "Can't read from an unopened file.");
00563         return(SamStatus::FAIL_ORDER);
00564     }
00565 
00566     // Clear the record.
00567     resetRecord();
00568 
00569     // read the record size.
00570     int numBytes = 
00571         ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t));
00572 
00573     // Check to see if the end of the file was hit and no bytes were read.
00574     if(ifeof(filePtr) && (numBytes == 0))
00575     {
00576         // End of file, nothing was read, no more records.
00577         myStatus.setStatus(SamStatus::NO_MORE_RECS,
00578                            "No more records left to read.");
00579         return(SamStatus::NO_MORE_RECS);
00580     }
00581     
00582     if(numBytes != sizeof(int32_t))
00583     {
00584         // Failed to read the entire block size.  Either the end of the file
00585         // was reached early or there was an error.
00586         if(ifeof(filePtr))
00587         {
00588             // Error: end of the file reached prior to reading the rest of the
00589             // record.
00590             myStatus.setStatus(SamStatus::FAIL_PARSE, 
00591                                "EOF reached in the middle of a record.");
00592             return(SamStatus::FAIL_PARSE);
00593         }
00594         else
00595         {
00596             // Error reading.
00597             myStatus.setStatus(SamStatus::FAIL_IO, 
00598                                "Failed to read the record size.");
00599             return(SamStatus::FAIL_IO);
00600         }
00601     }
00602 
00603     // allocate space for the record size.
00604     if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t)))
00605     {
00606         // Failed to allocate space.
00607         // Status is set by allocateRecordStructure.
00608         return(SamStatus::FAIL_MEM);
00609     }
00610 
00611     // Read the rest of the alignment block, starting at the reference id.
00612     if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize)
00613        != (unsigned int)myRecordPtr->myBlockSize)
00614     {
00615         // Error reading the record.  Reset it and return failure.
00616         resetRecord();
00617         myStatus.setStatus(SamStatus::FAIL_IO,
00618                            "Failed to read the record");
00619         return(SamStatus::FAIL_IO);
00620     }
00621 
00622     setVariablesForNewBuffer(header);
00623 
00624     // Return the status of the record.
00625     return(SamStatus::SUCCESS);
00626 }
00627 
00628 
00629 // Add the specified tag to the record.
00630 // Returns true if the tag was successfully added, false otherwise.
00631 bool SamRecord::addIntTag(const char* tag, int32_t value)
00632 {
00633     myStatus = SamStatus::SUCCESS;
00634     int key = 0;
00635     int index = 0;
00636     char bamvtype;
00637 
00638     int tagBufferSize = 0;
00639 
00640     // First check to see if the tags need to be synced to the buffer.
00641     if(myNeedToSetTagsFromBuffer)
00642     {
00643         if(!setTagsFromBuffer())
00644         {
00645             // Failed to read tags from the buffer, so cannot add new ones.
00646             return(false);
00647         }
00648     }
00649 
00650     // Ints come in as int.  But it can be represented in fewer bits.
00651     // So determine a more specific type that is in line with the
00652     // types for BAM files.
00653     // First check to see if it is a negative.
00654     if(value < 0)
00655     {
00656         // The int is negative, so it will need to use a signed type.
00657         // See if it is greater than the min value for a char.
00658         if(value > std::numeric_limits<char>::min())
00659         {
00660             // It can be stored in a signed char.
00661             bamvtype = 'c';
00662             tagBufferSize += 4;
00663         }
00664         else if(value > std::numeric_limits<short>::min())
00665         {
00666             // It fits in a signed short.
00667             bamvtype = 's';
00668             tagBufferSize += 5;
00669         }
00670         else
00671         {
00672             // Just store it as a signed int.
00673             bamvtype = 'i';
00674             tagBufferSize += 7;
00675         }
00676     }
00677     else
00678     {
00679         // It is positive, so an unsigned type can be used.
00680         if(value < std::numeric_limits<unsigned char>::max())
00681         {
00682             // It is under the max of an unsigned char.
00683             bamvtype = 'C';
00684             tagBufferSize += 4;
00685         }
00686         else if(value < std::numeric_limits<unsigned short>::max())
00687         {
00688             // It is under the max of an unsigned short.
00689             bamvtype = 'S';
00690             tagBufferSize += 5;
00691         }
00692         else
00693         {
00694             // Just store it as an unsigned int.
00695             bamvtype = 'I';
00696             tagBufferSize += 7;
00697         }
00698     }
00699 
00700     // Check to see if the tag is already there.
00701     key = MAKEKEY(tag[0], tag[1], bamvtype);
00702     unsigned int hashIndex = extras.Find(key);
00703     if(hashIndex != LH_NOTFOUND)
00704     {
00705         // Tag was already found.
00706         index = extras[hashIndex];
00707 
00708         // First check to see if the value changed.
00709         if((integers[index] == value) && (intType[index] == bamvtype))
00710         {
00711             // The value has not changed, so do nothing.
00712             return(true);
00713         }
00714         else
00715         {
00716             // Not the same value, so adjust the settings.
00717             // Subtract the size of the previous tag from tagBufferSize to get
00718             // the adjusted size.
00719             switch(intType[index])
00720             {
00721                 case 'c':
00722                 case 'C':
00723                     tagBufferSize -= 4;
00724                     break;
00725                 case 's':
00726                 case 'S':
00727                     tagBufferSize -= 5;
00728                     break;
00729                 case 'i':
00730                 case 'I':
00731                     tagBufferSize -= 7;
00732                     break;
00733                 default:
00734                 myStatus.setStatus(SamStatus::INVALID, 
00735                                    "unknown tag inttype type found.\n");
00736                 return(false);              
00737             }
00738             
00739             // Update the integer value and type.
00740             integers[index] = value;
00741             intType[index] = bamvtype;
00742         }
00743     }
00744     else
00745     {
00746         // Tag is not already there, so add it.
00747         index = integers.Length();
00748         
00749         integers.Push(value);
00750         intType.push_back(bamvtype);
00751 
00752         extras.Add(key, index);
00753     }
00754 
00755     // The buffer tags are now out of sync.
00756     myNeedToSetTagsInBuffer = true;
00757     myIsTagsBufferValid = false;
00758     myIsBufferSynced = false;
00759     myTagBufferSize += tagBufferSize;
00760 
00761     return(true);
00762 }
00763 
00764 
00765 // Add the specified tag to the record, replacing it if it is already there and
00766 // is different from the previous value.
00767 // Returns true if the tag was successfully added (or was already there), false otherwise.
00768 bool SamRecord::addTag(const char* tag, char vtype, const char* valuePtr)
00769 {
00770     if(vtype == 'i')
00771     {
00772         // integer type.  Call addIntTag to handle it.
00773         int intVal = atoi(valuePtr);
00774         return(addIntTag(tag, intVal));
00775     }
00776 
00777     // Non-int type.
00778     myStatus = SamStatus::SUCCESS;
00779     bool status = true; // default to successful.
00780     int key = 0;
00781     int index = 0;
00782 
00783     int tagBufferSize = 0;
00784 
00785     // First check to see if the tags need to be synced to the buffer.
00786     if(myNeedToSetTagsFromBuffer)
00787     {
00788         if(!setTagsFromBuffer())
00789         {
00790             // Failed to read tags from the buffer, so cannot add new ones.
00791             return(false);
00792         }
00793     }
00794 
00795     // First check to see if the tag is already there.
00796     key = MAKEKEY(tag[0], tag[1], vtype);
00797     unsigned int hashIndex = extras.Find(key);
00798     if(hashIndex != LH_NOTFOUND)
00799     {
00800         // The key was found in the hash, so get the lookup index.
00801         index = extras[hashIndex];
00802 
00803         // Adjust the currently pointed to value to the new setting.
00804         switch (vtype)
00805         {
00806             case 'A' :
00807                 // First check to see if the value changed.
00808                 if(integers[index] == (const int)*(valuePtr))
00809                 {
00810                     // The value has not changed, so do nothing.
00811                     return(true);
00812                 }
00813                 else
00814                 {
00815                     // Tag buffer size doesn't change between different 'A' entries.
00816                     integers[index] = (const int)*(valuePtr);
00817                     intType[index] = vtype;
00818                 }
00819                 break;
00820             case 'Z' :
00821                 // First check to see if the value changed.
00822                 if(strings[index] == valuePtr)
00823                 {
00824                     // The value has not changed, so do nothing.
00825                     return(true);
00826                 }
00827                 else
00828                 {
00829                     // Adjust the tagBufferSize by removing the size of the old string.
00830                     tagBufferSize -= strings[index].Length();
00831                     strings[index] = valuePtr;
00832                     // Adjust the tagBufferSize by adding the size of the new string.
00833                     tagBufferSize += strings[index].Length();
00834                 }
00835                 break;
00836             case 'f' :
00837                 // First check to see if the value changed.
00838                 if(doubles[index] == atof(valuePtr))
00839                 {
00840                     // The value has not changed, so do nothing.
00841                     return(true);
00842                 }
00843                 else
00844                 {
00845                     // Tag buffer size doesn't change between different 'f' entries.
00846                     doubles[index] = atof(valuePtr);
00847                 }
00848                 break;
00849             default :
00850                 fprintf(stderr,
00851                         "samFile::ReadSAM() - Unknown custom field of type %c\n",
00852                         vtype);
00853                 myStatus.setStatus(SamStatus::FAIL_PARSE, 
00854                                    "Unknown custom field in a tag");
00855                 status = false;
00856                 break;
00857         }
00858     }
00859     else
00860     {
00861         // The key was found not found in the hash, so add it.
00862         switch (vtype)
00863         {
00864             case 'A' :
00865                 index = integers.Length();
00866                 integers.Push((const int)*(valuePtr));
00867                 intType.push_back(vtype);
00868                 tagBufferSize += 4;
00869                 break;
00870             case 'Z' :
00871                 index = strings.Length();
00872                 strings.Push(valuePtr);
00873                 tagBufferSize += 4 + strings.Last().Length();
00874                 break;
00875             case 'f' :
00876                 index = doubles.Length();
00877                 doubles.Push(atof(valuePtr));
00878                 tagBufferSize += 7;
00879                 break;
00880             default :
00881                 fprintf(stderr,
00882                         "samFile::ReadSAM() - Unknown custom field of type %c\n",
00883                         vtype);
00884                 myStatus.setStatus(SamStatus::FAIL_PARSE, 
00885                                    "Unknown custom field in a tag");
00886                 status = false;
00887                 break;
00888         }
00889         if(status)
00890         {
00891             // If successful, add the key to extras.
00892             extras.Add(key, index);
00893         }
00894     }
00895 
00896     // Only add the tag if it has so far been successfully processed.
00897     if(status)
00898     {
00899         // The buffer tags are now out of sync.
00900         myNeedToSetTagsInBuffer = true;
00901         myIsTagsBufferValid = false;
00902         myIsBufferSynced = false;
00903         myTagBufferSize += tagBufferSize;
00904     }
00905     return(status);
00906 }
00907 
00908 
00909 void SamRecord::clearTags()
00910 {
00911     if(extras.Entries() != 0)
00912     {
00913         extras.Clear();
00914     }
00915     strings.Clear();
00916     integers.Clear();
00917     intType.clear();
00918     doubles.Clear();
00919     myTagBufferSize = 0;
00920     resetTagIter();
00921 }
00922 
00923 
00924 bool SamRecord::rmTag(const char* tag, char type)
00925 {
00926     // Check the length of tag.
00927     if(strlen(tag) != 2)
00928     {
00929         // Tag is the wrong length.
00930         myStatus.setStatus(SamStatus::INVALID, 
00931                            "rmTag called with tag that is not 2 characters\n");
00932         return(false);
00933     }
00934 
00935     myStatus = SamStatus::SUCCESS;
00936     if(myNeedToSetTagsFromBuffer)
00937     {
00938         if(!setTagsFromBuffer())
00939         {
00940             // Failed to read the tags from the buffer, so cannot
00941             // get tags.
00942             return(false);
00943         }
00944     }
00945 
00946     // Construct the key.
00947     int key = MAKEKEY(tag[0], tag[1], type);
00948     // Look to see if the key exsists in the hash.
00949     int offset = extras.Find(key);
00950 
00951     if(offset < 0)
00952     {
00953         // Not found, so return true, successfully removed since
00954         // it is not in tag.
00955         return(true);
00956     }
00957 
00958     // Offset is set, so the key was found.
00959     // First if it is an integer, determine the actual type of the int.
00960     char vtype;
00961     getTypeFromKey(key, vtype);
00962     if(vtype == 'i')
00963     {
00964         vtype = getIntegerType(offset);
00965     }
00966 
00967     // Offset is set, so recalculate the buffer size without this entry.
00968     // Do NOT remove from strings, integers, or doubles because then
00969     // extras would need to be updated for all entries with the new indexes
00970     // into those variables.
00971     int rmBuffSize = 0;
00972     switch(vtype)
00973     {
00974         case 'A':
00975         case 'c':
00976         case 'C':
00977             rmBuffSize = 4;
00978             break;
00979         case 's':
00980         case 'S':
00981             rmBuffSize = 5;
00982             break;
00983         case 'i':
00984         case 'I':
00985             rmBuffSize = 7;
00986             break;
00987         case 'f':
00988             rmBuffSize = 7;
00989             break;
00990         case 'Z':
00991             rmBuffSize = 4 + getString(offset).Length();
00992             break;
00993         default:
00994             myStatus.setStatus(SamStatus::INVALID, 
00995                                "rmTag called with unknown type.\n");
00996             return(false);
00997             break;
00998     };
00999 
01000     // The buffer tags are now out of sync.
01001     myNeedToSetTagsInBuffer = true;
01002     myIsTagsBufferValid = false;
01003     myIsBufferSynced = false;
01004     myTagBufferSize -= rmBuffSize;
01005 
01006     // Remove from the hash.
01007     extras.Delete(offset);
01008     return(true);
01009 }
01010 
01011 
01012 bool SamRecord::rmTags(const char* tags)
01013 {
01014     const char* currentTagPtr = tags;
01015 
01016     myStatus = SamStatus::SUCCESS;
01017     if(myNeedToSetTagsFromBuffer)
01018     {
01019         if(!setTagsFromBuffer())
01020         {
01021             // Failed to read the tags from the buffer, so cannot
01022             // get tags.
01023             return(false);
01024         }
01025     }
01026     
01027     bool returnStatus = true;
01028 
01029     int rmBuffSize = 0;
01030     while(*currentTagPtr != '\0')
01031     {
01032 
01033         // Tags are formatted as: XY:Z
01034         // Where X is [A-Za-z], Y is [A-Za-z], and
01035         // Z is A,i,f,Z,H (cCsSI are also excepted)
01036         if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
01037            (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
01038         {
01039             myStatus.setStatus(SamStatus::INVALID, 
01040                                "rmTags called with improperly formatted tags.\n");
01041             returnStatus = false;
01042             break;
01043         }
01044 
01045         // Construct the key.
01046         int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1], 
01047                           currentTagPtr[3]);
01048         // Look to see if the key exsists in the hash.
01049         int offset = extras.Find(key);
01050 
01051         if(offset >= 0)
01052         {
01053             // Offset is set, so the key was found.
01054             // First if it is an integer, determine the actual type of the int.
01055             char vtype;
01056             getTypeFromKey(key, vtype);
01057             if(vtype == 'i')
01058             {
01059                 vtype = getIntegerType(offset);
01060             }
01061             
01062             // Offset is set, so recalculate the buffer size without this entry.
01063             // Do NOT remove from strings, integers, or doubles because then
01064             // extras would need to be updated for all entries with the new indexes
01065             // into those variables.
01066             switch(vtype)
01067             {
01068                 case 'A':
01069                 case 'c':
01070                 case 'C':
01071                     rmBuffSize += 4;
01072                     break;
01073                 case 's':
01074                 case 'S':
01075                     rmBuffSize += 5;
01076                     break;
01077                 case 'i':
01078                 case 'I':
01079                     rmBuffSize += 7;
01080                     break;
01081                 case 'f':
01082                     rmBuffSize += 7;
01083                     break;
01084                 case 'Z':
01085                     rmBuffSize += 4 + getString(offset).Length();
01086                     break;
01087                 default:
01088                     myStatus.setStatus(SamStatus::INVALID, 
01089                                        "rmTag called with unknown type.\n");
01090                     returnStatus = false;
01091                     break;
01092             };
01093             
01094             // Remove from the hash.
01095             extras.Delete(offset);
01096         }
01097         // Increment to the next tag.
01098         if(currentTagPtr[4] == ';')
01099         {
01100             // Increment once more.
01101             currentTagPtr += 5;
01102         }
01103         else if(currentTagPtr[4] != '\0')
01104         {
01105             // Invalid tag format. 
01106             myStatus.setStatus(SamStatus::INVALID, 
01107                                "rmTags called with improperly formatted tags.\n");
01108             returnStatus = false;
01109             break;
01110         }
01111         else
01112         {
01113             // Last Tag.
01114             currentTagPtr += 4;
01115         }
01116     }
01117 
01118     // The buffer tags are now out of sync.
01119     myNeedToSetTagsInBuffer = true;
01120     myIsTagsBufferValid = false;
01121     myIsBufferSynced = false;
01122     myTagBufferSize -= rmBuffSize;
01123     
01124 
01125     return(returnStatus);
01126 }
01127 
01128 
01129 // Get methods for record fields.
01130 const void* SamRecord::getRecordBuffer()
01131 {
01132     return(getRecordBuffer(mySequenceTranslation));
01133 }
01134 
01135 
01136 // Get methods for record fields.
01137 const void* SamRecord::getRecordBuffer(SequenceTranslation translation) 
01138 {
01139     myStatus = SamStatus::SUCCESS;
01140     bool status = true;
01141     // If the buffer is not synced or the sequence in the buffer is not
01142     // properly translated, fix the buffer.
01143     if((myIsBufferSynced == false) ||
01144        (myBufferSequenceTranslation != translation))
01145     {
01146         status &= fixBuffer(translation);
01147     }
01148     // If the buffer is synced, check to see if the tags need to be synced.
01149     if(myNeedToSetTagsInBuffer)
01150     {
01151         status &= setTagsInBuffer();
01152     }
01153     if(!status)
01154     {
01155         return(NULL);
01156     }
01157     return (const void *)myRecordPtr;
01158 }
01159 
01160 
01161 // Write the record as a buffer into the file using the class's 
01162 // sequence translation setting.
01163 SamStatus::Status SamRecord::writeRecordBuffer(IFILE filePtr)
01164 {
01165     return(writeRecordBuffer(filePtr, mySequenceTranslation));
01166 }
01167 
01168 
01169 // Write the record as a buffer into the file using the specified translation.
01170 SamStatus::Status SamRecord::writeRecordBuffer(IFILE filePtr, 
01171                                                SequenceTranslation translation)
01172 {
01173     myStatus = SamStatus::SUCCESS;
01174     if((filePtr == NULL) || (filePtr->isOpen() == false))
01175     {
01176         // File is not open, return failure.
01177         myStatus.setStatus(SamStatus::FAIL_ORDER,
01178                            "Can't write to an unopened file.");
01179         return(SamStatus::FAIL_ORDER);
01180     }
01181 
01182     if((myIsBufferSynced == false) ||
01183        (myBufferSequenceTranslation != translation))
01184     {
01185         if(!fixBuffer(translation))
01186         {
01187             return(myStatus.getStatus());
01188         }
01189     }
01190 
01191     // Write the record.
01192     unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t);
01193     unsigned int numBytesWritten = 
01194         ifwrite(filePtr, myRecordPtr, numBytesToWrite);
01195 
01196     // Return status based on if the correct number of bytes were written.
01197     if(numBytesToWrite == numBytesWritten)
01198     {
01199         return(SamStatus::SUCCESS);
01200     }
01201     // The correct number of bytes were not written.
01202     myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record.");
01203     return(SamStatus::FAIL_IO);
01204 }
01205 
01206 
01207 int32_t SamRecord::getBlockSize() 
01208 {
01209     myStatus = SamStatus::SUCCESS;
01210     // If the buffer isn't synced, sync the buffer to determine the
01211     // block size.
01212     if(myIsBufferSynced == false)
01213     {
01214         // Since this just returns the block size, the translation of
01215         // the sequence does not matter, so just use the currently set
01216         // value.
01217         fixBuffer(myBufferSequenceTranslation);
01218     }
01219     return myRecordPtr->myBlockSize;
01220 }
01221 
01222 
01223 // This method returns the reference name.
01224 const char* SamRecord::getReferenceName()
01225 {
01226     myStatus = SamStatus::SUCCESS;
01227     return myReferenceName.c_str();
01228 }
01229 
01230 
01231 int32_t SamRecord::getReferenceID()
01232 {
01233     myStatus = SamStatus::SUCCESS;
01234     return myRecordPtr->myReferenceID;
01235 }
01236 
01237 
01238 int32_t SamRecord::get1BasedPosition()
01239 {
01240     myStatus = SamStatus::SUCCESS;
01241     return (myRecordPtr->myPosition + 1);
01242 }
01243 
01244 
01245 int32_t SamRecord::get0BasedPosition()
01246 {
01247     myStatus = SamStatus::SUCCESS;
01248     return myRecordPtr->myPosition;
01249 }
01250 
01251 
01252 uint8_t SamRecord::getReadNameLength() 
01253 {
01254     myStatus = SamStatus::SUCCESS;
01255     // If the buffer is valid, return the size from there, otherwise get the 
01256     // size from the string length + 1 (ending null).
01257     if(myIsReadNameBufferValid)
01258     {
01259         return(myRecordPtr->myReadNameLength);
01260     }
01261    
01262     return(myReadName.Length() + 1);
01263 }
01264 
01265 
01266 uint8_t SamRecord::getMapQuality()
01267 {
01268     myStatus = SamStatus::SUCCESS;
01269     return myRecordPtr->myMapQuality;
01270 }
01271 
01272 
01273 uint16_t SamRecord::getBin()
01274 {
01275     myStatus = SamStatus::SUCCESS;
01276     if(!myIsBinValid)
01277     {
01278         // The bin that is set in the record is not valid, so
01279         // reset it.
01280         myRecordPtr->myBin = 
01281             bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());      
01282         myIsBinValid = true;
01283     }
01284     return(myRecordPtr->myBin);
01285 }
01286 
01287 
01288 uint16_t SamRecord::getCigarLength()
01289 {
01290     myStatus = SamStatus::SUCCESS;
01291     // If the cigar buffer is valid
01292     // then get the length from there.
01293     if(myIsCigarBufferValid)
01294     {
01295         return myRecordPtr->myCigarLength;      
01296     }
01297 
01298     if(myCigarTempBufferLength == -1)
01299     {
01300         // The cigar buffer is not valid and the cigar temp buffer is not set,
01301         // so parse the string.
01302         parseCigarString();
01303     }
01304    
01305     // The temp buffer is now set, so return the size.
01306     return(myCigarTempBufferLength);
01307 }
01308 
01309 
01310 uint16_t SamRecord::getFlag()
01311 {
01312     myStatus = SamStatus::SUCCESS;
01313     return myRecordPtr->myFlag;
01314 }
01315 
01316 
01317 int32_t SamRecord::getReadLength() 
01318 {
01319     myStatus = SamStatus::SUCCESS;
01320     if(myIsSequenceBufferValid == false)
01321     {
01322         // If the sequence is "*", then return 0.
01323         if((mySequence.Length() == 1) && (mySequence[0] == '*'))
01324         {
01325             return(0);
01326         }
01327         // Do not add 1 since it is not null terminated.
01328         return(mySequence.Length());
01329     }
01330     return(myRecordPtr->myReadLength);
01331 }
01332 
01333 
01334 // This method returns the mate reference name.  If it is equal to the
01335 // reference name, it still returns the reference name.
01336 const char* SamRecord::getMateReferenceName()
01337 {
01338     myStatus = SamStatus::SUCCESS;
01339     return myMateReferenceName.c_str();
01340 }
01341 
01342 
01343 // This method returns the mate reference name.  If it is equal to the
01344 // reference name, it returns "=", unless they are both "*" in which case
01345 // "*" is returned.
01346 const char* SamRecord::getMateReferenceNameOrEqual()
01347 {
01348     myStatus = SamStatus::SUCCESS;
01349     if(myMateReferenceName == "*")
01350     {
01351         return(myMateReferenceName);
01352     }
01353     if(myMateReferenceName == getReferenceName())
01354     {
01355         return(FIELD_ABSENT_STRING);
01356     }
01357     else
01358     {
01359         return(myMateReferenceName);
01360     }
01361 }
01362 
01363 
01364 int32_t SamRecord::getMateReferenceID()
01365 {
01366     myStatus = SamStatus::SUCCESS;
01367     return myRecordPtr->myMateReferenceID;
01368 }
01369 
01370 
01371 int32_t SamRecord::get1BasedMatePosition()
01372 {
01373     myStatus = SamStatus::SUCCESS;
01374     return (myRecordPtr->myMatePosition + 1);
01375 }
01376 
01377 
01378 int32_t SamRecord::get0BasedMatePosition()
01379 {
01380     myStatus = SamStatus::SUCCESS;
01381     return myRecordPtr->myMatePosition;
01382 }
01383 
01384 
01385 int32_t SamRecord::getInsertSize()
01386 {
01387     myStatus = SamStatus::SUCCESS;
01388     return myRecordPtr->myInsertSize;
01389 }
01390 
01391 
01392 // Returns the inclusive rightmost position of the clipped sequence.
01393 int32_t SamRecord::get0BasedAlignmentEnd()
01394 {
01395     myStatus = SamStatus::SUCCESS;
01396     if(myAlignmentLength == -1)
01397     {
01398         // Alignment end has not been set, so calculate it.
01399         parseCigar();
01400     }
01401     // If alignment length > 0, subtract 1 from it to get the end.
01402     if(myAlignmentLength == 0)
01403     {
01404         // Length is 0, just return the start position.
01405         return(myRecordPtr->myPosition);
01406     }
01407     return(myRecordPtr->myPosition + myAlignmentLength - 1);
01408 }
01409 
01410 
01411 // Returns the inclusive rightmost position of the clipped sequence.
01412 int32_t SamRecord::get1BasedAlignmentEnd()
01413 {
01414     return(get0BasedAlignmentEnd() + 1);
01415 }
01416 
01417    
01418 // Return the length of the alignment.
01419 int32_t SamRecord::getAlignmentLength()
01420 {
01421     myStatus = SamStatus::SUCCESS;
01422     if(myAlignmentLength == -1)
01423     {
01424         // Alignment end has not been set, so calculate it.
01425         parseCigar();
01426     }
01427     // Return the alignment length.
01428     return(myAlignmentLength);
01429 }
01430 
01431 // Returns the inclusive left-most position adjust for clipped bases.
01432 int32_t SamRecord::get0BasedUnclippedStart()
01433 {
01434     myStatus = SamStatus::SUCCESS;
01435     if(myUnclippedStartOffset == -1)
01436     {
01437         // Unclipped has not yet been calculated, so parse the cigar to get it
01438         parseCigar();
01439     }
01440     return(myRecordPtr->myPosition - myUnclippedStartOffset);
01441 }
01442 
01443 
01444 // Returns the inclusive left-most position adjust for clipped bases.
01445 int32_t SamRecord::get1BasedUnclippedStart()
01446 {
01447     return(get0BasedUnclippedStart() + 1);
01448 }
01449 
01450 
01451 // Returns the inclusive right-most position adjust for clipped bases.
01452 int32_t SamRecord::get0BasedUnclippedEnd()
01453 {
01454     // myUnclippedEndOffset will be set by get0BasedAlignmentEnd if the 
01455     // cigar has not yet been parsed, so no need to check it here.
01456     return(get0BasedAlignmentEnd() + myUnclippedEndOffset);
01457 }
01458 
01459 
01460 // Returns the inclusive right-most position adjust for clipped bases.
01461 int32_t SamRecord::get1BasedUnclippedEnd()
01462 {
01463     return(get0BasedUnclippedEnd() + 1);
01464 }
01465 
01466 
01467 // Get the read name.
01468 const char* SamRecord::getReadName()
01469 {
01470     myStatus = SamStatus::SUCCESS;
01471     if(myReadName.Length() == 0)
01472     {
01473         // 0 Length, means that it is in the buffer, but has not yet
01474         // been synced to the string, so do the sync.
01475         myReadName = (char*)&(myRecordPtr->myData);
01476     }
01477     return myReadName.c_str();
01478 }
01479 
01480 
01481 const char* SamRecord::getCigar()
01482 {
01483     myStatus = SamStatus::SUCCESS;
01484     if(myCigar.Length() == 0)
01485     {
01486         // 0 Length, means that it is in the buffer, but has not yet
01487         // been synced to the string, so do the sync.
01488         parseCigarBinary();
01489     }
01490     return myCigar.c_str();
01491 }
01492 
01493 
01494 const char* SamRecord::getSequence()
01495 {
01496     return(getSequence(mySequenceTranslation));
01497 }
01498 
01499 
01500 const char* SamRecord::getSequence(SequenceTranslation translation)
01501 {
01502     myStatus = SamStatus::SUCCESS;
01503     if(mySequence.Length() == 0)
01504     {
01505         // 0 Length, means that it is in the buffer, but has not yet
01506         // been synced to the string, so do the sync.
01507         setSequenceAndQualityFromBuffer();
01508     }
01509 
01510     // Determine if translation needs to be done.
01511     if((translation == NONE) || (myRefPtr == NULL))
01512     {
01513         return mySequence.c_str();
01514     }
01515     else if(translation == EQUAL)
01516     {
01517         if(mySeqWithEq.length() == 0)
01518         {
01519             // Check to see if the sequence is defined.
01520             if(mySequence == "*")
01521             {
01522                 // Sequence is undefined, so no translation necessary.
01523                 mySeqWithEq = '*';
01524             }
01525             else
01526             {
01527                 // Sequence defined, so translate it.
01528                 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(), 
01529                                                   myRecordPtr->myPosition,
01530                                                   *(getCigarInfo()),
01531                                                   getReferenceName(),
01532                                                   *myRefPtr,
01533                                                   mySeqWithEq);
01534             }
01535         }
01536         return(mySeqWithEq.c_str());
01537     }
01538     else
01539     {
01540         // translation == BASES
01541         if(mySeqWithoutEq.length() == 0)
01542         {
01543             if(mySequence == "*")
01544             {
01545                 // Sequence is undefined, so no translation necessary.
01546                 mySeqWithoutEq = '*';
01547             }
01548             else
01549             {
01550                 // Sequence defined, so translate it.
01551                 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(), 
01552                                                      myRecordPtr->myPosition,
01553                                                      *(getCigarInfo()),
01554                                                      getReferenceName(),
01555                                                      *myRefPtr,
01556                                                      mySeqWithoutEq);
01557             }
01558         }
01559         return(mySeqWithoutEq.c_str());
01560     }
01561 }
01562 
01563 
01564 const char* SamRecord::getQuality() 
01565 {
01566     myStatus = SamStatus::SUCCESS;
01567     if(myQuality.Length() == 0)
01568     {
01569         // 0 Length, means that it is in the buffer, but has not yet
01570         // been synced to the string, so do the sync.
01571         setSequenceAndQualityFromBuffer();      
01572     }
01573     return myQuality.c_str();
01574 }
01575 
01576 
01577 char SamRecord::getSequence(int index)
01578 {
01579     return(getSequence(index, mySequenceTranslation));
01580 }
01581 
01582 
01583 char SamRecord::getSequence(int index, SequenceTranslation translation)
01584 {
01585     static const char * asciiBases = "=AC.G...T......N";
01586 
01587     // Determine the read length.
01588     int32_t readLen = getReadLength();
01589 
01590     // If the read length is 0, this method should not be called.
01591     if(readLen == 0)
01592     {
01593         String exceptionString = "SamRecord::getSequence(";
01594         exceptionString += index;
01595         exceptionString += ") is not allowed since sequence = '*'";
01596         throw std::runtime_error(exceptionString.c_str());
01597     }
01598     else if((index < 0) || (index >= readLen))
01599     {
01600         // Only get here if the index was out of range, so thow an exception.
01601         String exceptionString = "SamRecord::getSequence(";
01602         exceptionString += index;
01603         exceptionString += ") is out of range. Index must be between 0 and ";
01604         exceptionString += (readLen - 1);
01605         throw std::runtime_error(exceptionString.c_str());
01606     }
01607 
01608     // Determine if translation needs to be done.
01609     if((translation == NONE) || (myRefPtr == NULL))
01610     {
01611         // No translation needs to be done.
01612         if(mySequence.Length() == 0)
01613         {
01614             // Parse BAM sequence.
01615             // TODO - maybe store this pointer - and use that to track when
01616             // valid?
01617             unsigned char * packedSequence = 
01618                 (unsigned char *)myRecordPtr->myData + 
01619                 myRecordPtr->myReadNameLength +
01620                 myRecordPtr->myCigarLength * sizeof(int);
01621             
01622             return(index & 1 ?
01623                    asciiBases[packedSequence[index / 2] & 0xF] :
01624                    asciiBases[packedSequence[index / 2] >> 4]);
01625         }
01626         // Already have string.
01627         return(mySequence[index]);
01628     }
01629     else
01630     {
01631         // Need to translate the sequence either to have '=' or to not
01632         // have it.
01633         // First check to see if the sequence has been set.
01634         if(mySequence.Length() == 0)
01635         {
01636             // 0 Length, means that it is in the buffer, but has not yet
01637             // been synced to the string, so do the sync.
01638             setSequenceAndQualityFromBuffer();
01639         }
01640 
01641         // Check the type of translation.
01642         if(translation == EQUAL)
01643         {
01644             // Check whether or not the string has already been 
01645             // retrieved that has the '=' in it.
01646             if(mySeqWithEq.length() == 0)
01647             {
01648                 // The string with '=' has not yet been determined,
01649                 // so get the string.
01650                 // Check to see if the sequence is defined.
01651                 if(mySequence == "*")
01652                 {
01653                     // Sequence is undefined, so no translation necessary.
01654                     mySeqWithEq = '*';
01655                 }
01656                 else
01657                 {
01658                     // Sequence defined, so translate it.
01659                     SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(), 
01660                                                       myRecordPtr->myPosition, 
01661                                                       *(getCigarInfo()),
01662                                                       getReferenceName(),
01663                                                       *myRefPtr,
01664                                                       mySeqWithEq);
01665                 }
01666             }
01667             // Sequence is set, so return it.
01668             return(mySeqWithEq[index]);
01669         }
01670         else
01671         {
01672             // translation == BASES
01673             // Check whether or not the string has already been 
01674             // retrieved that does not have the '=' in it.
01675             if(mySeqWithoutEq.length() == 0)
01676             {
01677                 // The string with '=' has not yet been determined,
01678                 // so get the string.
01679                 // Check to see if the sequence is defined.
01680                 if(mySequence == "*")
01681                 {
01682                     // Sequence is undefined, so no translation necessary.
01683                     mySeqWithoutEq = '*';
01684                 }
01685                 else
01686                 {
01687                     // Sequence defined, so translate it.
01688                     // The string without '=' has not yet been determined,
01689                     // so get the string.
01690                     SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(), 
01691                                                          myRecordPtr->myPosition, 
01692                                                          *(getCigarInfo()),
01693                                                          getReferenceName(),
01694                                                          *myRefPtr,
01695                                                          mySeqWithoutEq);
01696                 }
01697             }
01698             // Sequence is set, so return it.
01699             return(mySeqWithoutEq[index]);
01700         }
01701     }
01702 }
01703 
01704 
01705 char SamRecord::getQuality(int index)
01706 {
01707     // Determine the read length.
01708     int32_t readLen = getReadLength();
01709 
01710     // If the read length is 0, return ' ' whose ascii code is below
01711     // the minimum ascii code for qualities.
01712     if(readLen == 0)
01713     {
01714         return(BaseUtilities::UNKNOWN_QUALITY_CHAR);
01715     }
01716     else if((index < 0) || (index >= readLen))
01717     {
01718         // Only get here if the index was out of range, so thow an exception.
01719         String exceptionString = "SamRecord::getQuality(";
01720         exceptionString += index;
01721         exceptionString += ") is out of range. Index must be between 0 and ";
01722         exceptionString += (readLen - 1);
01723         throw std::runtime_error(exceptionString.c_str());
01724     }
01725 
01726     if(myQuality.Length() == 0)
01727     {
01728         // Parse BAM Quality.
01729         unsigned char * packedQuality = 
01730             (unsigned char *)myRecordPtr->myData +
01731             myRecordPtr->myReadNameLength + 
01732             myRecordPtr->myCigarLength * sizeof(int) + 
01733             (myRecordPtr->myReadLength + 1) / 2;
01734         return(packedQuality[index] + 33);
01735     }
01736     else
01737     {
01738         // Already have string.
01739         if((myQuality.Length() == 1) && (myQuality[0] == '*'))
01740         {
01741             // Return the unknown quality character.
01742             return(BaseUtilities::UNKNOWN_QUALITY_CHAR);
01743         }
01744         else
01745         {
01746             return(myQuality[index]);
01747         }
01748     }
01749 }
01750 
01751    
01752 Cigar* SamRecord::getCigarInfo()
01753 {
01754     // Check to see whether or not the Cigar has already been
01755     // set - this is determined by checking if alignment length
01756     // is set since alignment length and the cigar are set
01757     // at the same time.
01758     if(myAlignmentLength == -1)
01759     {
01760         // Not been set, so calculate it.
01761         parseCigar();
01762     }
01763     return(&myCigarRoller);
01764 }
01765 
01766 
01767 // Return the number of bases in this read that overlap the passed in
01768 // region.  (start & end are 0-based)
01769 uint32_t SamRecord::getNumOverlaps(int32_t start, int32_t end)
01770 {
01771     // Determine whether or not the cigar has been parsed, which sets up
01772     // the cigar roller.  This is determined by checking the alignment length.
01773     if(myAlignmentLength == -1)
01774     {
01775         parseCigar();
01776     }
01777     return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition()));
01778 }
01779 
01780 
01781 // Returns the values of all fields except the tags.
01782 bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName, 
01783                           String& cigar, String& sequence, String& quality)
01784 {
01785     return(getFields(recStruct, readName, cigar, sequence, quality,
01786                      mySequenceTranslation));
01787 }
01788 
01789 
01790 // Returns the values of all fields except the tags.
01791 bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName, 
01792                           String& cigar, String& sequence, String& quality,
01793                           SequenceTranslation translation)
01794 {
01795     myStatus = SamStatus::SUCCESS;
01796     if(myIsBufferSynced == false)
01797     {
01798         if(!fixBuffer(translation))
01799         {
01800             // failed to set the buffer, return false.
01801             return(false);
01802         }
01803     }
01804     memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct));
01805 
01806     readName = getReadName();
01807     // Check the status.
01808     if(myStatus != SamStatus::SUCCESS)
01809     {
01810         // Failed to set the fields, return false.
01811         return(false);
01812     }
01813     cigar = getCigar();
01814     // Check the status.
01815     if(myStatus != SamStatus::SUCCESS)
01816     {
01817         // Failed to set the fields, return false.
01818         return(false);
01819     }
01820     sequence = getSequence(translation);
01821     // Check the status.
01822     if(myStatus != SamStatus::SUCCESS)
01823     {
01824         // Failed to set the fields, return false.
01825         return(false);
01826     }
01827     quality = getQuality();
01828     // Check the status.
01829     if(myStatus != SamStatus::SUCCESS)
01830     {
01831         // Failed to set the fields, return false.
01832         return(false);
01833     }
01834     return(true);
01835 }
01836 
01837 
01838 // Returns the reference pointer.
01839 GenomeSequence* SamRecord::getReference()
01840 {
01841     return(myRefPtr);
01842 }
01843 
01844 
01845 uint32_t SamRecord::getTagLength()
01846 {
01847     myStatus = SamStatus::SUCCESS;
01848     if(myNeedToSetTagsFromBuffer)
01849     {
01850         // Tags are only set in the buffer, so the size of the tags is 
01851         // the length of the record minus the starting location of the tags.
01852         unsigned char * tagStart = 
01853             (unsigned char *)myRecordPtr->myData 
01854             + myRecordPtr->myReadNameLength 
01855             + myRecordPtr->myCigarLength * sizeof(int)
01856             + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength;
01857       
01858         // The non-tags take up from the start of the record to the tag start.
01859         // Do not include the block size part of the record since it is not
01860         // included in the size.
01861         uint32_t nonTagSize = 
01862             tagStart - (unsigned char*)&(myRecordPtr->myReferenceID);
01863         // Tags take up the size of the block minus the non-tag section.
01864         uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize;
01865         return(tagSize);
01866     }
01867 
01868     // Tags are stored outside the buffer, so myTagBufferSize is set.
01869     return(myTagBufferSize);
01870 }
01871 
01872 
01873 // Returns true if there is another tag and sets tag and vtype to the
01874 // appropriate values, and returns a pointer to the value.
01875 // Sets the Status to SUCCESS when a tag is successfully returned or
01876 // when there are no more tags.  Otherwise the status is set to describe
01877 // why it failed (parsing, etc).
01878 bool SamRecord::getNextSamTag(char* tag, char& vtype, void** value)
01879 {
01880     myStatus = SamStatus::SUCCESS;
01881     if(myNeedToSetTagsFromBuffer)
01882     {
01883         if(!setTagsFromBuffer())
01884         {
01885             // Failed to read the tags from the buffer, so cannot
01886             // get tags.
01887             return(false);
01888         }
01889     }
01890 
01891     // Increment the tag index to start looking at the next tag.
01892     // At the beginning, it is set to -1.
01893     myLastTagIndex++;
01894     int maxTagIndex = extras.Capacity();
01895     if(myLastTagIndex >= maxTagIndex)
01896     {
01897         // Hit the end of the tags, return false, no more tags.
01898         // Status is still success since this is not an error, 
01899         // it is just the end of the list.
01900         return(false);
01901     }
01902 
01903     bool tagFound = false;
01904     // Loop until a tag is found or the end of extras is hit.
01905     while((tagFound == false) && (myLastTagIndex < maxTagIndex))
01906     {
01907         if(extras.SlotInUse(myLastTagIndex))
01908         {
01909             // Found a slot to use.
01910             int key = extras.GetKey(myLastTagIndex);
01911             getTag(key, tag);
01912             getTypeFromKey(key, vtype);
01913             tagFound = true;
01914             // Get the value associated with the key based on the vtype.
01915             switch (vtype)
01916             {
01917                 case 'f' :
01918                     *value = getDoublePtr(myLastTagIndex);
01919                     break;
01920                 case 'i' :
01921                     *value = getIntegerPtr(myLastTagIndex, vtype);
01922                     if(vtype != 'A')
01923                     {
01924                         // Convert all int types to 'i'
01925                         vtype = 'i';
01926                     }
01927                     break;
01928                 case 'Z' :
01929                     *value = getStringPtr(myLastTagIndex);
01930                     break;
01931                 default:
01932                     myStatus.setStatus(SamStatus::FAIL_PARSE,
01933                                        "Unknown tag type");
01934                     tagFound = false;
01935                     break;
01936             }
01937         }
01938         if(!tagFound)
01939         {
01940             // Increment the index since a tag was not found.
01941             myLastTagIndex++;
01942         }
01943     }
01944     return(tagFound);
01945 }
01946 
01947 
01948 // Reset the tag iterator to the beginning of the tags.
01949 void SamRecord::resetTagIter()
01950 {
01951     myLastTagIndex = -1;
01952 }
01953 
01954 
01955 bool SamRecord::isIntegerType(char vtype) const
01956 {
01957     if((vtype == 'c') || (vtype == 'C') ||
01958        (vtype == 's') || (vtype == 'S') ||
01959        (vtype == 'i') || (vtype == 'I'))
01960     {
01961         return(true);
01962     }
01963     return(false);
01964 }
01965 
01966 
01967 bool SamRecord::isDoubleType(char vtype) const
01968 {
01969     if(vtype == 'f')
01970     {
01971         return(true);
01972     }
01973     return(false);
01974 }
01975 
01976 
01977 bool SamRecord::isCharType(char vtype) const
01978 {
01979     if(vtype == 'A')
01980     {
01981         return(true);
01982     }
01983     return(false);
01984 }
01985 
01986 
01987 bool SamRecord::isStringType(char vtype) const
01988 {
01989     if(vtype == 'Z')
01990     {
01991         return(true);
01992     }
01993     return(false);
01994 }
01995 
01996 
01997 bool SamRecord::getTagsString(const char* tags, String& returnString, char delim)
01998 {
01999     const char* currentTagPtr = tags;
02000 
02001     returnString.Clear();
02002     myStatus = SamStatus::SUCCESS;
02003     if(myNeedToSetTagsFromBuffer)
02004     {
02005         if(!setTagsFromBuffer())
02006         {
02007             // Failed to read the tags from the buffer, so cannot
02008             // get tags.
02009             return(false);
02010         }
02011     }
02012     
02013     bool returnStatus = true;
02014 
02015     while(*currentTagPtr != '\0')
02016     {
02017         // Tags are formatted as: XY:Z
02018         // Where X is [A-Za-z], Y is [A-Za-z], and
02019         // Z is A,i,f,Z,H (cCsSI are also excepted)
02020         if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
02021            (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
02022         {
02023             myStatus.setStatus(SamStatus::INVALID, 
02024                                "getTagsString called with improperly formatted tags.\n");
02025             returnStatus = false;
02026             break;
02027         }
02028 
02029         // Construct the key.
02030         int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1], 
02031                           currentTagPtr[3]);
02032         // Look to see if the key exsists in the hash.
02033         int offset = extras.Find(key);
02034 
02035         if(offset >= 0)
02036         {
02037             // Offset is set, so the key was found.
02038             if(!returnString.IsEmpty())
02039             {
02040                 returnString += delim;
02041             }
02042             returnString += currentTagPtr[0];
02043             returnString += currentTagPtr[1];
02044             returnString += ':';
02045             returnString += currentTagPtr[3];
02046             returnString += ':';
02047 
02048             // First if it is an integer, determine the actual type of the int.
02049             char vtype;
02050             getTypeFromKey(key, vtype);
02051 
02052             
02053             // Offset is set, so recalculate the buffer size without this entry.
02054             // Do NOT remove from strings, integers, or doubles because then
02055             // extras would need to be updated for all entries with the new indexes
02056             // into those variables.
02057             switch(vtype)
02058             {
02059                 case 'i':
02060                     returnString += *(int*)getIntegerPtr(offset, vtype);
02061                     break;
02062                 case 'f':
02063                     returnString += *(double*)getDoublePtr(offset);
02064                     break;
02065                 case 'Z':
02066                     returnString += *(String*)getStringPtr(offset);
02067                     break;
02068                 default:
02069                     myStatus.setStatus(SamStatus::INVALID, 
02070                                        "rmTag called with unknown type.\n");
02071                     returnStatus = false;
02072                     break;
02073             };
02074         }
02075         // Increment to the next tag.
02076         if(currentTagPtr[4] == ';')
02077         {
02078             // Increment once more.
02079             currentTagPtr += 5;
02080         }
02081         else if(currentTagPtr[4] != '\0')
02082         {
02083             // Invalid tag format. 
02084             myStatus.setStatus(SamStatus::INVALID, 
02085                                "rmTags called with improperly formatted tags.\n");
02086             returnStatus = false;
02087             break;
02088         }
02089         else
02090         {
02091             // Last Tag.
02092             currentTagPtr += 4;
02093         }
02094     }
02095     return(returnStatus);
02096 }
02097 
02098 
02099 String* SamRecord::getStringTag(const char * tag)
02100 {
02101     // Parse the buffer if necessary.
02102     if(myNeedToSetTagsFromBuffer)
02103     {
02104         if(!setTagsFromBuffer())
02105         {
02106             // Failed to read the tags from the buffer, so cannot
02107             // get tags.  setTagsFromBuffer set the errors,
02108             // so just return null.
02109             return(NULL);
02110         }
02111     }
02112     
02113     int key = MAKEKEY(tag[0], tag[1], 'Z');
02114     int offset = extras.Find(key);
02115 
02116     int value;
02117     if (offset < 0)
02118     {
02119         // Tag not found.
02120         return(NULL);
02121     }
02122 
02123     // Offset is valid, so return the tag.
02124     value = extras[offset];
02125     return(&(strings[value]));
02126 }
02127 
02128 
02129 int* SamRecord::getIntegerTag(const char * tag)
02130 {
02131     // Init to success.
02132     myStatus = SamStatus::SUCCESS;
02133     // Parse the buffer if necessary.
02134     if(myNeedToSetTagsFromBuffer)
02135     {
02136         if(!setTagsFromBuffer())
02137         {
02138             // Failed to read the tags from the buffer, so cannot
02139             // get tags.  setTagsFromBuffer set the errors,
02140             // so just return null.
02141             return(NULL);
02142         }
02143     }
02144     
02145     int key = MAKEKEY(tag[0], tag[1], 'i');
02146     int offset = extras.Find(key);
02147 
02148     int value;
02149     if (offset < 0)
02150     {
02151         // Failed to find the tag.
02152         return(NULL);
02153     }
02154     else
02155         value = extras[offset];
02156 
02157     return(&(integers[value]));
02158 }
02159 
02160 
02161 double* SamRecord::getDoubleTag(const char * tag)
02162 {
02163     // Init to success.
02164     myStatus = SamStatus::SUCCESS;
02165     // Parse the buffer if necessary.
02166     if(myNeedToSetTagsFromBuffer)
02167     {
02168         if(!setTagsFromBuffer())
02169         {
02170             // Failed to read the tags from the buffer, so cannot
02171             // get tags.  setTagsFromBuffer set the errors,
02172             // so just return null.
02173             return(NULL);
02174         }
02175     }
02176     
02177     int key = MAKEKEY(tag[0], tag[1], 'f');
02178     int offset = extras.Find(key);
02179 
02180     int value;
02181     if (offset < 0)
02182     {
02183         // Failed to find the tag.
02184         return(NULL);
02185     }
02186     else
02187         value = extras[offset];
02188 
02189     return(&(doubles[value]));
02190 }
02191 
02192 
02193 String & SamRecord::getString(const char * tag)
02194 {
02195     // Init to success.
02196     myStatus = SamStatus::SUCCESS;
02197     // Parse the buffer if necessary.
02198     if(myNeedToSetTagsFromBuffer)
02199     {
02200         if(!setTagsFromBuffer())
02201         {
02202             // Failed to read the tags from the buffer, so cannot
02203             // get tags.
02204             // TODO - what do we want to do on failure?            
02205         }
02206     }
02207     
02208     int key = MAKEKEY(tag[0], tag[1], 'Z');
02209     int offset = extras.Find(key);
02210 
02211     int value;
02212     if (offset < 0)
02213     {
02214         // TODO - what do we want to do on failure?
02215         return(NOT_FOUND_TAG_STRING);
02216     }
02217     else
02218         value = extras[offset];
02219 
02220     return strings[value];
02221 }
02222 
02223 int & SamRecord::getInteger(const char * tag)
02224 {
02225     // Init to success.
02226     myStatus = SamStatus::SUCCESS;
02227     // Parse the buffer if necessary.
02228     if(myNeedToSetTagsFromBuffer)
02229     {
02230         if(!setTagsFromBuffer())
02231         {
02232             // Failed to read the tags from the buffer, so cannot
02233             // get tags.  setTagsFromBuffer set the error.
02234             // TODO - what do we want to do on failure?
02235         }
02236     }
02237     
02238     int key = MAKEKEY(tag[0], tag[1], 'i');
02239     int offset = extras.Find(key);
02240 
02241     int value;
02242     if (offset < 0)
02243     {
02244         // TODO - what do we want to do on failure?
02245         return NOT_FOUND_TAG_INT;
02246     }
02247     else
02248         value = extras[offset];
02249 
02250     return integers[value];
02251 }
02252 
02253 
02254 double & SamRecord::getDouble(const char * tag)
02255 {
02256     // Init to success.
02257     myStatus = SamStatus::SUCCESS;
02258     // Parse the buffer if necessary.
02259     if(myNeedToSetTagsFromBuffer)
02260     {
02261         if(!setTagsFromBuffer())
02262         {
02263             // Failed to read the tags from the buffer, so cannot
02264             // get tags.  setTagsFromBuffer set the error.
02265             // TODO - what do we want to do on failure?
02266         }
02267     }
02268     
02269     int key = MAKEKEY(tag[0], tag[1], 'f');
02270     int offset = extras.Find(key);
02271 
02272     int value;
02273     if (offset < 0)
02274     {
02275         // TODO - what do we want to do on failure?
02276         return NOT_FOUND_TAG_DOUBLE;
02277     }
02278     else
02279         value = extras[offset];
02280 
02281     return doubles[value];
02282 }
02283 
02284 
02285 bool SamRecord::checkTag(const char * tag, char type)
02286 {
02287     // Init to success.
02288     myStatus = SamStatus::SUCCESS;
02289     // Parse the buffer if necessary.
02290     if(myNeedToSetTagsFromBuffer)
02291     {
02292         if(!setTagsFromBuffer())
02293         {
02294             // Failed to read the tags from the buffer, so cannot
02295             // get tags.  setTagsFromBuffer set the error.
02296             return("");
02297         }
02298     }
02299     
02300     int key = MAKEKEY(tag[0], tag[1], type);
02301 
02302     return (extras.Find(key) != LH_NOTFOUND);
02303 }
02304 
02305 
02306 // Return the error after a failed SamRecord call.
02307 const SamStatus& SamRecord::getStatus()
02308 {
02309     return(myStatus);
02310 }
02311 
02312 
02313 // Allocate space for the record - does a realloc.  
02314 // The passed in size is the size of the entire record including the
02315 // block size field.
02316 bool SamRecord::allocateRecordStructure(int size)
02317 {
02318     if (allocatedSize < size)
02319     {
02320         bamRecordStruct* tmpRecordPtr = 
02321             (bamRecordStruct *)realloc(myRecordPtr, size);
02322         if(tmpRecordPtr == NULL)
02323         {
02324             // FAILED to allocate memory
02325             fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02326             myStatus.addError(SamStatus::FAIL_MEM, "Failed Memory Allocation.");
02327             return(false);
02328         }
02329         // Successfully allocated memory, so set myRecordPtr.
02330         myRecordPtr = tmpRecordPtr;
02331         allocatedSize = size;
02332     }
02333     return(true);
02334 }
02335 
02336 
02337 // Index is the index into the strings array.
02338 void* SamRecord::getStringPtr(int index)
02339 {
02340     int value = extras[index];
02341 
02342     return &(strings[value]);
02343 }
02344 
02345 void* SamRecord::getIntegerPtr(int offset, char& type)
02346 {
02347     int value = extras[offset];
02348 
02349     type = intType[value];
02350 
02351     return &(integers[value]);
02352 }
02353 
02354 void* SamRecord::getDoublePtr(int offset)
02355 {
02356     int value = extras[offset];
02357 
02358     return &(doubles[value]);
02359 }
02360 
02361 
02362 // Fixes the buffer to match the variable length fields if they are set.
02363 bool SamRecord::fixBuffer(SequenceTranslation translation)
02364 {
02365     // Check to see if the buffer is already synced.
02366     if(myIsBufferSynced &&
02367        (myBufferSequenceTranslation == translation))
02368     {
02369         // Already synced, nothing to do.
02370         return(true);
02371     }
02372    
02373     // Set the bin if necessary.
02374     if(!myIsBinValid)
02375     {
02376         // The bin that is set in the record is not valid, so
02377         // reset it.
02378         myRecordPtr->myBin = 
02379             bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());      
02380         myIsBinValid = true;
02381     }
02382 
02383     // Not synced.
02384     bool status = true;
02385 
02386     // First determine the size the buffer needs to be.
02387     uint8_t newReadNameLen = getReadNameLength();
02388     uint16_t newCigarLen = getCigarLength();
02389     int32_t newReadLen = getReadLength();
02390     uint32_t newTagLen = getTagLength();
02391     uint32_t bamSequenceLen = (newReadLen+1)/2;
02392 
02393     // The buffer size extends from the start of the record to data
02394     // plus the length of the variable fields,
02395     // Multiply the cigar length by 4 since it is the number of uint32_t fields.
02396     int newBufferSize = 
02397         ((unsigned char*)(&(myRecordPtr->myData)) - 
02398          (unsigned char*)myRecordPtr) +
02399         newReadNameLen + ((newCigarLen)*4) +
02400         newReadLen + bamSequenceLen + newTagLen;
02401    
02402     if(!allocateRecordStructure(newBufferSize))
02403     {
02404         // Failed to allocate space.
02405         return(false);
02406     }
02407 
02408     // Now that space has been added to the buffer, check to see what if
02409     // any fields need to be extracted from the buffer prior to starting to
02410     // overwrite it.  Fields need to be extracted from the buffer if the 
02411     // buffer is valid for the field and a previous variable length field has
02412     // changed length.
02413     bool readNameLenChange = (newReadNameLen != myRecordPtr->myReadNameLength);
02414     bool cigarLenChange = (newCigarLen != myRecordPtr->myCigarLength);
02415     bool readLenChange = (newReadLen != myRecordPtr->myReadLength);
02416 
02417     // If the tags are still stored in the buffer and any other fields changed
02418     // lengths, they need to be extracted.
02419     if(myIsTagsBufferValid &&
02420        (readNameLenChange | cigarLenChange | readLenChange))
02421     {
02422         status &= setTagsFromBuffer();
02423         // The tag buffer will not be valid once the other fields
02424         // are written, so set it to not valid.
02425         myIsTagsBufferValid = false;
02426     }
02427 
02428     // If the sequence or quality strings are still stored in the buffer, and
02429     // any of the previous fields have changed length, extract it from the
02430     // current buffer.
02431     if((myIsQualityBufferValid | myIsSequenceBufferValid) && 
02432        (readNameLenChange | cigarLenChange | readLenChange))
02433     {
02434         setSequenceAndQualityFromBuffer();
02435         // The quality and sequence buffers will not be valid once the other
02436         // fields are written, so set them to not valid.
02437         myIsQualityBufferValid = false;
02438         myIsSequenceBufferValid = false;
02439     }
02440 
02441     // If the cigar is still stored in the buffer, and any of the
02442     // previous fields have changed length, extract it from the current buffer.
02443     if((myIsCigarBufferValid) && 
02444        (readNameLenChange))
02445     {
02446         status &= parseCigarBinary();
02447         myIsCigarBufferValid = false;
02448     }
02449 
02450     // Set each value in the buffer if it is not already valid.
02451     if(!myIsReadNameBufferValid)
02452     {
02453         memcpy(&(myRecordPtr->myData), myReadName.c_str(), 
02454                newReadNameLen);
02455    
02456         // Set the new ReadNameLength.
02457         myRecordPtr->myReadNameLength = newReadNameLen;
02458         myIsReadNameBufferValid = true;
02459     }
02460 
02461     unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
02462         myRecordPtr->myReadNameLength;
02463    
02464     // Set the Cigar.  Need to reformat from the string to 
02465     unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
02466       
02467     if(!myIsCigarBufferValid)
02468     {
02469         // The cigar was already parsed when it was set, so just copy
02470         // data from the temporary buffer.
02471         myRecordPtr->myCigarLength = newCigarLen;
02472         memcpy(packedCigar, myCigarTempBuffer, 
02473                myRecordPtr->myCigarLength * sizeof(uint32_t));
02474       
02475         myIsCigarBufferValid = true;
02476     }
02477 
02478     unsigned char * packedSequence = readNameEnds + 
02479         myRecordPtr->myCigarLength * sizeof(int);
02480     unsigned char * packedQuality = packedSequence + bamSequenceLen;
02481    
02482     if(!myIsSequenceBufferValid || !myIsQualityBufferValid || 
02483        (myBufferSequenceTranslation != translation))
02484     {
02485         myRecordPtr->myReadLength = newReadLen;
02486         // Determine if the quality needs to be set and is just a * and needs to
02487         // be set to 0xFF.
02488         bool noQuality = false;
02489         if((myQuality.Length() == 1) && (myQuality[0] == '*'))
02490         {
02491             noQuality = true;
02492         }
02493       
02494         const char* translatedSeq = NULL;
02495         // If the sequence is not valid in the buffer or it is not
02496         // properly translated, get the properly translated sequence
02497         // that needs to be put into the buffer.
02498         if((!myIsSequenceBufferValid) ||
02499            (translation != myBufferSequenceTranslation))
02500         {
02501             translatedSeq = getSequence(translation);
02502         }
02503 
02504         for (int i = 0; i < myRecordPtr->myReadLength; i++) 
02505         {
02506             if((!myIsSequenceBufferValid) ||
02507                (translation != myBufferSequenceTranslation))
02508             {
02509                 // Sequence buffer is not valid, so set the sequence.
02510                 int seqVal = 0;
02511                 switch(translatedSeq[i])
02512                 {
02513                     case '=':
02514                         seqVal = 0;
02515                         break;
02516                     case 'A':
02517                     case 'a':
02518                         seqVal = 1;
02519                         break;
02520                     case 'C':
02521                     case 'c':
02522                         seqVal = 2;
02523                         break;
02524                     case 'G':
02525                     case 'g':
02526                         seqVal = 4;
02527                         break;
02528                     case 'T':
02529                     case 't':
02530                         seqVal = 8;
02531                         break;
02532                     case 'N':
02533                     case 'n':
02534                     case '.':
02535                         seqVal = 15;
02536                         break;
02537                     default:
02538                         myStatus.addError(SamStatus::FAIL_PARSE,
02539                                           "Unknown Sequence character found.");
02540                         status = false;
02541                         break;
02542                 };
02543             
02544                 if(i & 1)
02545                 {
02546                     // Odd number i's go in the lower 4 bits, so OR in the
02547                     // lower bits
02548                     packedSequence[i/2] |= seqVal;
02549                 }
02550                 else
02551                 {
02552                     // Even i's go in the upper 4 bits and are always set first.
02553                     packedSequence[i/2] = seqVal << 4;
02554                 }
02555             }
02556 
02557             if(!myIsQualityBufferValid)
02558             {
02559                 // Set the quality.
02560                 if((noQuality) || (myQuality.Length() <= i))
02561                 {
02562                     // No quality or the quality is smaller than the sequence,
02563                     // so set it to 0xFF
02564                     packedQuality[i] = 0xFF;
02565                 }
02566                 else
02567                 {
02568                     // Copy the quality string.
02569                     packedQuality[i] = myQuality[i] - 33;
02570                 }
02571             }
02572         }
02573         myIsQualityBufferValid = true;
02574         myIsSequenceBufferValid = true;
02575         myBufferSequenceTranslation = translation;
02576     }
02577 
02578     if(!myIsTagsBufferValid)
02579     {
02580         status &= setTagsInBuffer();
02581     }
02582 
02583     // Set the lengths in the buffer.
02584     myRecordPtr->myReadNameLength = newReadNameLen;
02585     myRecordPtr->myCigarLength = newCigarLen;
02586     myRecordPtr->myReadLength = newReadLen;
02587 
02588     // Set the buffer block size to the size of the buffer minus the
02589     // first field.
02590     myRecordPtr->myBlockSize = newBufferSize - sizeof(int32_t);
02591 
02592     if(status)
02593     {
02594         myIsBufferSynced = true;
02595     }
02596 
02597     return(status);
02598 }
02599 
02600 
02601 // Sets the Sequence and Quality strings from the buffer.
02602 // They are done together in one method because they require the same
02603 // loop, so might as well be done at the same time.
02604 void SamRecord::setSequenceAndQualityFromBuffer()
02605 {
02606     // NOTE: If the sequence buffer is not valid, do not set the sequence
02607     // string from the buffer.
02608     // NOTE: If the quality buffer is not valid, do not set the quality string
02609     // from the buffer.
02610 
02611     // Extract the sequence if the buffer is valid and the string's length is 0.
02612     bool extractSequence = false;
02613     if(myIsSequenceBufferValid && (mySequence.Length() == 0))
02614     {
02615         extractSequence = true;
02616     }
02617 
02618     // Extract the quality if the buffer is valid and the string's length is 0.
02619     bool extractQuality = false;
02620     if(myIsQualityBufferValid && (myQuality.Length() == 0))
02621     {
02622         extractQuality = true;
02623     }
02624 
02625     // If neither the quality nor the sequence need to be extracted,
02626     // just return.
02627     if(!extractSequence && !extractQuality)
02628     {
02629         return;
02630     }
02631 
02632     // Set the sequence and quality strings..
02633     if(extractSequence)
02634     {
02635         mySequence.SetLength(myRecordPtr->myReadLength);
02636     }
02637     if(extractQuality)
02638     {
02639         myQuality.SetLength(myRecordPtr->myReadLength);
02640     }
02641    
02642     unsigned char * readNameEnds = 
02643         (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
02644     unsigned char * packedSequence = 
02645         readNameEnds + myRecordPtr->myCigarLength * sizeof(int);
02646     unsigned char * packedQuality = 
02647         packedSequence + (myRecordPtr->myReadLength + 1) / 2;
02648 
02649     const char * asciiBases = "=AC.G...T......N";
02650 
02651     // Flag to see if the quality is specified - the quality contains a value
02652     // other than 0xFF.  If all values are 0xFF, then there is no quality.
02653     bool qualitySpecified = false;
02654 
02655     for (int i = 0; i < myRecordPtr->myReadLength; i++)
02656     {
02657         if(extractSequence)
02658         {
02659             mySequence[i] = i & 1 ?
02660                 asciiBases[packedSequence[i / 2] & 0xF] :
02661                 asciiBases[packedSequence[i / 2] >> 4];
02662         }
02663 
02664         if(extractQuality)
02665         {
02666             if(packedQuality[i] != 0xFF)
02667             {
02668                 // Quality is specified, so mark the flag.
02669                 qualitySpecified = true;
02670             }
02671 
02672             myQuality[i] = packedQuality[i] + 33;
02673         }
02674     }
02675 
02676     // If the read length is 0, then set the sequence and quality to '*'
02677     if(myRecordPtr->myReadLength == 0)
02678     {
02679         if(extractSequence)
02680         {
02681             mySequence = "*";
02682         }
02683         if(extractQuality)
02684         {
02685             myQuality = "*";
02686         }
02687     }
02688     else if(extractQuality && !qualitySpecified)
02689     {
02690         // No quality was specified, so set it to "*"
02691         myQuality = "*";
02692     }
02693 }
02694 
02695 
02696 // Parse the cigar to calculate the alignment/unclipped end.
02697 bool SamRecord::parseCigar()
02698 {
02699     // Determine if the cigar string or cigar binary needs to be parsed.
02700     if(myCigar.Length() == 0)
02701     {
02702         // The cigar string is not yet set, so parse the binary.
02703         return(parseCigarBinary());
02704     }
02705     return(parseCigarString());
02706 }
02707 
02708 // Parse the cigar to calculate the alignment/unclipped end.
02709 bool SamRecord::parseCigarBinary()
02710 {
02711     // Only need to parse if the string is not already set.
02712     // The length of the cigar string is set to zero when the 
02713     // record is read from a file into the buffer.
02714     if(myCigar.Length() != 0)
02715     {
02716         // Already parsed.
02717         return(true);
02718     }
02719 
02720     unsigned char * readNameEnds = 
02721         (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
02722    
02723     unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
02724 
02725     myCigarRoller.Set(packedCigar, myRecordPtr->myCigarLength);
02726     
02727     myCigarRoller.getCigarString(myCigar);
02728 
02729     myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
02730 
02731     myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
02732     myUnclippedEndOffset = myCigarRoller.getNumEndClips();
02733 
02734     // if the cigar length is 0, then set the cigar string to "*"
02735     if(myRecordPtr->myCigarLength == 0)
02736     {
02737         myCigar = "*";
02738         return(true);
02739     }
02740 
02741     // Copy the cigar into a temporary buffer.
02742     int newBufferSize = myRecordPtr->myCigarLength * sizeof(uint32_t);
02743     if(newBufferSize > myCigarTempBufferAllocatedSize)
02744     {
02745         uint32_t* tempBufferPtr = 
02746             (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
02747         if(tempBufferPtr == NULL)
02748         {
02749             // Failed to allocate memory.
02750             // Do not parse, just return.
02751             fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02752             myStatus.addError(SamStatus::FAIL_MEM,
02753                               "Failed to Allocate Memory.");
02754             return(false);
02755         }
02756         myCigarTempBuffer = tempBufferPtr;
02757         myCigarTempBufferAllocatedSize = newBufferSize;
02758     }
02759 
02760     memcpy(myCigarTempBuffer, packedCigar, 
02761            myRecordPtr->myCigarLength * sizeof(uint32_t));
02762 
02763     // Set the length of the temp buffer.
02764     myCigarTempBufferLength = myRecordPtr->myCigarLength;
02765 
02766     return(true);
02767 }
02768 
02769 // Parse the cigar string to calculate the cigar length and alignment end.
02770 bool SamRecord::parseCigarString()
02771 {
02772     myCigarTempBufferLength = 0;
02773     if(myCigar == "*")
02774     {
02775         // Cigar is empty, so initialize the variables.
02776         myAlignmentLength = 0;
02777         myUnclippedStartOffset = 0;
02778         myUnclippedEndOffset = 0;
02779         myCigarRoller.clear();
02780         return(true);
02781     }
02782 
02783     myCigarRoller.Set(myCigar);
02784     
02785     myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
02786     
02787     myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
02788     myUnclippedEndOffset = myCigarRoller.getNumEndClips();
02789 
02790     // Check to see if the Temporary Cigar Buffer is large enough to contain
02791     // this cigar.  If we make it the size of the length of the cigar string,
02792     // it will be more than large enough.
02793     int newBufferSize = myCigar.Length() * sizeof(uint32_t);
02794     if(newBufferSize > myCigarTempBufferAllocatedSize)
02795     {
02796         uint32_t* tempBufferPtr = 
02797             (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
02798         if(tempBufferPtr == NULL)
02799         {
02800             // Failed to allocate memory.
02801             // Do not parse, just return.
02802             fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02803             myStatus.addError(SamStatus::FAIL_MEM,
02804                               "Failed to Allocate Memory.");
02805             return(false);
02806         }
02807         myCigarTempBuffer = tempBufferPtr;
02808         myCigarTempBufferAllocatedSize = newBufferSize;
02809     }
02810 
02811     // Track if there were any errors.
02812     bool status = true;
02813 
02814     // Track the index into the cigar string that is being parsed.
02815     char *cigarOp;
02816     const char* cigarEntryStart = myCigar.c_str();
02817     int opLen = 0;
02818     int op = 0;
02819 
02820     unsigned int * packedCigar = myCigarTempBuffer;
02821     // TODO - maybe one day make a cigar list... or maybe make a 
02822     // reference cigar string for ease of lookup....
02823     const char* endCigarString = cigarEntryStart + myCigar.Length();
02824     while(cigarEntryStart < endCigarString)
02825     {
02826         bool validCigarEntry = true;
02827         // Get the opLen from the string.  cigarOp will then point to 
02828         // the operation.
02829         opLen = strtol(cigarEntryStart, &cigarOp, 10);
02830         // Switch on the type of operation.
02831         switch(*cigarOp)
02832         {
02833             case('M'):
02834                 op = 0;
02835                 break;
02836             case('I'):
02837                 // Insert into the reference position, so do not increment the
02838                 // reference end position.
02839                 op = 1;
02840                 break;
02841             case('D'):
02842                 op = 2;
02843                 break;
02844             case('N'):
02845                 op = 3;
02846                 break;
02847             case('S'):
02848                 op = 4;
02849                 break;
02850             case('H'):
02851                 op = 5;
02852                 break;
02853             case('P'):
02854                 op = 6;
02855                 break;
02856             default:
02857                 fprintf(stderr, "ERROR parsing cigar\n");
02858                 validCigarEntry = false;
02859                 status = false;
02860                 myStatus.addError(SamStatus::FAIL_PARSE,
02861                                   "Unknown operation found when parsing the Cigar.");
02862                 break;
02863         };
02864         if(validCigarEntry)
02865         {
02866             // Increment the cigar length.
02867             ++myCigarTempBufferLength;
02868             *packedCigar = (opLen << 4) | op;
02869             packedCigar++;
02870         }
02871         // The next Entry starts at one past the cigar op, so set the start.
02872         cigarEntryStart = ++cigarOp;
02873     }
02874 
02875     // Check clipLength to adjust the end position.
02876     return(status);
02877 }
02878 
02879 
02880 bool SamRecord::setTagsFromBuffer()
02881 {
02882     // If the tags do not need to be set from the buffer, return true.
02883     if(myNeedToSetTagsFromBuffer == false)
02884     {
02885         // Already been set from the buffer.
02886         return(true);
02887     }
02888 
02889     // Mark false, as they are being set now.
02890     myNeedToSetTagsFromBuffer = false;
02891 
02892     unsigned char * readNameEnds = 
02893         (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
02894     unsigned char * packedSequence = 
02895         readNameEnds + myRecordPtr->myCigarLength * sizeof(int);
02896     unsigned char * packedQuality = 
02897         packedSequence + (myRecordPtr->myReadLength + 1) / 2;
02898 
02899     unsigned char * extraPtr = packedQuality + myRecordPtr->myReadLength;
02900 
02901     // Default to success, will be changed to false on failure.
02902     bool status = true;
02903 
02904     // Clear any previously set tags.
02905     clearTags();
02906     while (myRecordPtr->myBlockSize + 4 - 
02907            (extraPtr - (unsigned char *)myRecordPtr) > 0)
02908     {
02909         int key;
02910         int value;
02911         void * content = extraPtr + 3;
02912         int tagBufferSize = 0;
02913 
02914         key = MAKEKEY(extraPtr[0], extraPtr[1], extraPtr[2]);
02915       
02916         switch (extraPtr[2])
02917         {
02918             case 'A' :
02919                 value = integers.Length();
02920                 integers.Push(* (char *) content);
02921                 intType.push_back(extraPtr[2]);
02922                 extraPtr += 4;
02923                 tagBufferSize += 4;
02924                 break;
02925             case 'c' :
02926                 value = integers.Length();
02927                 integers.Push(* (char *) content);
02928                 intType.push_back(extraPtr[2]);
02929                 extraPtr += 4;
02930                 tagBufferSize += 4;
02931                 break;
02932             case 'C' :
02933                 value = integers.Length();
02934                 integers.Push(* (unsigned char *) content);
02935                 intType.push_back(extraPtr[2]);
02936                 extraPtr += 4;
02937                 tagBufferSize += 4;
02938                 break;
02939             case 's' :
02940                 value = integers.Length();
02941                 integers.Push(* (short *) content);
02942                 intType.push_back(extraPtr[2]);
02943                 extraPtr += 5;
02944                 tagBufferSize += 5;
02945                 break;
02946             case 'S' :
02947                 value = integers.Length();
02948                 integers.Push(* (unsigned short *) content);
02949                 intType.push_back(extraPtr[2]);
02950                 extraPtr += 5;
02951                 tagBufferSize += 5;
02952                 break;
02953             case 'i' :
02954                 value = integers.Length();
02955                 integers.Push(* (int *) content);
02956                 intType.push_back(extraPtr[2]);
02957                 extraPtr += 7;
02958                 tagBufferSize += 7;
02959                 break;
02960             case 'I' :
02961                 value = integers.Length();
02962                 integers.Push((int) * (unsigned int *) content);
02963                 intType.push_back(extraPtr[2]);
02964                 extraPtr += 7;
02965                 tagBufferSize += 7;
02966                 break;
02967             case 'Z' :
02968                 value = strings.Length();
02969                 strings.Push((const char *) content);
02970                 extraPtr += 4 + strings.Last().Length();
02971                 tagBufferSize += 4 + strings.Last().Length();
02972                 break;
02973             case 'f' :
02974                 value = doubles.Length();
02975                 doubles.Push(* (float *) content);
02976                 fprintf(stderr, "\n\nFLOAT!!!\n\n");
02977                 extraPtr += 7;
02978                 tagBufferSize += 7;
02979                 break;
02980             default :
02981                 fprintf(stderr, 
02982                         "parsing BAM - Unknown custom field of type %c%c:%c\n",
02983                         extraPtr[0], extraPtr[1], extraPtr[2]);
02984                 // Failed on read.
02985                 // Increment extraPtr just by the size of the 3 known fields
02986                 extraPtr += 3;
02987                 myStatus.addError(SamStatus::FAIL_PARSE,
02988                                   "Unknown tag type.");
02989                 status = false;
02990         }
02991 
02992         // Only add the tag if it has so far been successfully processed.
02993         if(status)
02994         {
02995             extras.Add(key, value);
02996             myTagBufferSize += tagBufferSize;
02997         }
02998     }
02999     return(status);
03000 }
03001 
03002 
03003 bool SamRecord::setTagsInBuffer()
03004 {
03005     // The buffer size extends from the start of the record to data
03006     // plus the length of the variable fields,
03007     // Multiply the cigar length by 4 since it is the number of uint32_t fields.
03008     int bamSequenceLength = (myRecordPtr->myReadLength+1)/2;
03009     int newBufferSize = ((unsigned char*)(&(myRecordPtr->myData)) - 
03010                          (unsigned char*)myRecordPtr) +  
03011         myRecordPtr->myReadNameLength + ((myRecordPtr->myCigarLength)*4) +
03012         myRecordPtr->myReadLength + bamSequenceLength + myTagBufferSize;
03013 
03014     // Make sure the buffer is big enough.
03015     if(!allocateRecordStructure(newBufferSize))
03016     {
03017         // Failed to allocate space.
03018         return(false);
03019     }
03020 
03021     unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
03022         myRecordPtr->myReadNameLength;
03023     unsigned char * packedSequence = readNameEnds + 
03024         myRecordPtr->myCigarLength * sizeof(int);
03025     unsigned char * packedQuality = 
03026         packedSequence + bamSequenceLength;
03027    
03028     char * extraPtr = (char*)packedQuality + myRecordPtr->myReadLength;
03029 
03030     bool status = true;
03031 
03032     // Set the tags in the buffer.
03033     if (extras.Entries())
03034     {
03035         for (int i = 0; i < extras.Capacity(); i++)
03036         {
03037             if (extras.SlotInUse(i))
03038             {
03039                 int key = extras.GetKey(i);
03040                 getTag(key, extraPtr);
03041                 extraPtr += 2;
03042                 char vtype;
03043                 getTypeFromKey(key, vtype);
03044  
03045                 if(vtype == 'i')
03046                 {
03047                     vtype = getIntegerType(i);
03048                 }
03049 
03050                 extraPtr[0] = vtype;
03051 
03052                 // increment the pointer to where the value is.
03053                 extraPtr += 1;
03054 
03055                 switch (vtype)
03056                 {
03057                     case 'A' :
03058                         *(char*)extraPtr = (char)getInteger(i);
03059                         // sprintf(extraPtr, "%d", getInteger(i));
03060                         extraPtr += 1;
03061                         break;
03062                     case 'c' :
03063                         *(int8_t*)extraPtr = (int8_t)getInteger(i);
03064                         // sprintf(extraPtr, "%.4d", getInteger(i));
03065                         extraPtr += 1;
03066                         break;
03067                     case 'C' :
03068                         *(uint8_t*)extraPtr = (uint8_t)getInteger(i);
03069                         // sprintf(extraPtr, "%.4d", getInteger(i));
03070                         extraPtr += 1;
03071                         break;
03072                     case 's' :
03073                         *(int16_t*)extraPtr = (int16_t)getInteger(i);
03074                         // sprintf(extraPtr, "%.4d", getInteger(i));
03075                         extraPtr += 2;
03076                         break;
03077                     case 'S' :
03078                         *(uint16_t*)extraPtr = (uint16_t)getInteger(i);
03079                         // sprintf(extraPtr, "%.4d", getInteger(i));
03080                         extraPtr += 2;
03081                         break;
03082                     case 'i' :
03083                         *(int32_t*)extraPtr = (int32_t)getInteger(i);
03084                         // sprintf(extraPtr, "%.4d", getInteger(i));
03085                         extraPtr += 4;
03086                         break;
03087                     case 'I' :
03088                         *(uint32_t*)extraPtr = (uint32_t)getInteger(i);
03089                         // sprintf(extraPtr, "%.4d", getInteger(i));
03090                         extraPtr += 4;
03091                         break;
03092                     case 'Z' :
03093                         sprintf(extraPtr, "%s", getString(i).c_str());
03094                         extraPtr += getString(i).Length() + 1;
03095                         break;
03096                     case 'f' :
03097                         // TODO, figure out if %f is good enough.
03098                         sprintf(extraPtr, "%f", getDouble(i));
03099                         extraPtr += 4;
03100                         break;
03101                     default :
03102                         myStatus.addError(SamStatus::FAIL_PARSE,
03103                                           "Unknown tag type.");
03104                         status = false;
03105                         break;
03106                 }
03107             }
03108         }
03109     }
03110 
03111     // Validate that the extra pointer is at the end of the allocated buffer.
03112     // If not then there was a problem.
03113     if(extraPtr != (char*)myRecordPtr + newBufferSize)
03114     {
03115         fprintf(stderr, "ERROR updating the buffer.  Incorrect size.");
03116         myStatus.addError(SamStatus::FAIL_PARSE,
03117                           "ERROR updating the buffer.  Incorrect size.");
03118         status = false;
03119     }
03120 
03121 
03122     // The buffer tags are now in sync.
03123     myNeedToSetTagsInBuffer = false;
03124     myIsTagsBufferValid = true;
03125     return(status);
03126 }
03127 
03128 
03129 // Reset the variables for a newly set buffer.  The buffer must be set first
03130 // since this looks up the reference ids in the buffer to set the reference
03131 // names.
03132 void SamRecord::setVariablesForNewBuffer(SamFileHeader& header)
03133 {
03134     // Lookup the reference name & mate reference name associated with this
03135     // record.
03136     myReferenceName = 
03137         header.getReferenceLabel(myRecordPtr->myReferenceID);
03138     myMateReferenceName = 
03139         header.getReferenceLabel(myRecordPtr->myMateReferenceID);      
03140 
03141     // Clear the SAM Strings that are now not in-sync with the buffer.
03142     myReadName.SetLength(0);
03143     myCigar.SetLength(0);
03144     mySequence.SetLength(0);
03145     mySeqWithEq.clear();
03146     mySeqWithoutEq.clear();
03147     myQuality.SetLength(0);
03148     myNeedToSetTagsFromBuffer = true;
03149     myNeedToSetTagsInBuffer = false;
03150 
03151     //Set that the buffer is valid.
03152     myIsBufferSynced = true;
03153     // Set that the variable length buffer fields are valid.
03154     myIsReadNameBufferValid = true;
03155     myIsCigarBufferValid = true;
03156     myIsSequenceBufferValid = true;
03157     myBufferSequenceTranslation = NONE;
03158     myIsQualityBufferValid = true;
03159     myIsTagsBufferValid = true;
03160     myIsBinValid = true;
03161 }
03162 
03163 
03164 // Extract the vtype from the key.
03165 void SamRecord::getTypeFromKey(int key, char& type) const
03166 {
03167     // Extract the type from the key.
03168     type = (key >> 16) & 0xFF;
03169 }
03170 
03171 
03172 // Extract the tag from the key.
03173 void SamRecord::getTag(int key, char* tag) const
03174 {
03175     // Extract the tag from the key.
03176     tag[0] = key & 0xFF;
03177     tag[1] = (key >> 8) & 0xFF;
03178     tag[2] = 0;
03179 }
03180 
03181 
03182 // Index is the index into the strings array.
03183 String & SamRecord::getString(int index)
03184 {
03185     int value = extras[index];
03186 
03187     return strings[value];
03188 }
03189 
03190 int & SamRecord::getInteger(int offset)
03191 {
03192     int value = extras[offset];
03193 
03194     return integers[value];
03195 }
03196 
03197 char & SamRecord::getIntegerType(int offset)
03198 {
03199     int value = extras[offset];
03200 
03201     return intType[value];
03202 }
03203 
03204 double & SamRecord::getDouble(int offset)
03205 {
03206     int value = extras[offset];
03207 
03208     return doubles[value];
03209 }
03210 
03211 
Generated on Tue Sep 6 17:51:59 2011 for libStatGen Software by  doxygen 1.6.3