SamRecord.cpp

00001 /*
00002  *  Copyright (C) 2010-2012  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include <stdlib.h>
00019 #include <limits>
00020 #include <stdexcept>
00021 
00022 #include "bam.h"
00023 
00024 #include "SamRecord.h"
00025 #include "SamValidation.h"
00026 
00027 #include "BaseUtilities.h"
00028 #include "SamQuerySeqWithRefHelper.h"
00029 
00030 const char* SamRecord::DEFAULT_READ_NAME = "UNKNOWN";
00031 const char* SamRecord::FIELD_ABSENT_STRING = "=";
00032 
00033 
00034 SamRecord::SamRecord()
00035     : myStatus(),
00036       myRefPtr(NULL),
00037       mySequenceTranslation(NONE)
00038 {
00039     int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00040 
00041     myRecordPtr = 
00042         (bamRecordStruct *) malloc(defaultAllocSize);
00043 
00044     myCigarTempBuffer = NULL;
00045     myCigarTempBufferAllocatedSize = 0;
00046 
00047     allocatedSize = defaultAllocSize;
00048 
00049     resetRecord();
00050 }
00051 
00052 
00053 SamRecord::SamRecord(ErrorHandler::HandlingType errorHandlingType)
00054     : myStatus(errorHandlingType),
00055       myRefPtr(NULL),
00056       mySequenceTranslation(NONE)
00057 {
00058     int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00059 
00060     myRecordPtr = 
00061         (bamRecordStruct *) malloc(defaultAllocSize);
00062 
00063     myCigarTempBuffer = NULL;
00064     myCigarTempBufferAllocatedSize = 0;
00065 
00066     allocatedSize = defaultAllocSize;
00067 
00068     resetRecord();
00069 }
00070 
00071 
00072 SamRecord::~SamRecord()
00073 {
00074     resetRecord();
00075 
00076     if(myRecordPtr != NULL)
00077     {
00078         free(myRecordPtr);
00079         myRecordPtr = NULL;
00080     }
00081     if(myCigarTempBuffer != NULL)
00082     {
00083         free(myCigarTempBuffer);
00084         myCigarTempBuffer = NULL;
00085         myCigarTempBufferAllocatedSize = 0;
00086     }
00087 }
00088 
00089 
00090 // Resets the fields of the record to a default value.
00091 void SamRecord::resetRecord()
00092 {
00093     myIsBufferSynced = true;
00094 
00095     myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE;
00096     myRecordPtr->myReferenceID = -1;
00097     myRecordPtr->myPosition = -1;
00098     myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00099     myRecordPtr->myMapQuality = 0;
00100     myRecordPtr->myBin = DEFAULT_BIN;
00101     myRecordPtr->myCigarLength = 0;
00102     myRecordPtr->myFlag = 0;
00103     myRecordPtr->myReadLength = 0;
00104     myRecordPtr->myMateReferenceID = -1;
00105     myRecordPtr->myMatePosition = -1;
00106     myRecordPtr->myInsertSize = 0;
00107    
00108     // Set the sam values for the variable length fields.
00109     // TODO - one way to speed this up might be to not set to "*" and just
00110     // clear them, and write out a '*' for SAM if it is empty.
00111     myReadName = DEFAULT_READ_NAME;
00112     myReferenceName = "*";
00113     myMateReferenceName = "*";
00114     myCigar = "*";
00115     mySequence = "*";
00116     mySeqWithEq.clear();
00117     mySeqWithoutEq.clear();
00118     myQuality = "*";
00119     myNeedToSetTagsFromBuffer = false;
00120     myNeedToSetTagsInBuffer = false;
00121 
00122     // Initialize the calculated alignment info to the uncalculated value.
00123     myAlignmentLength = -1;
00124     myUnclippedStartOffset = -1;
00125     myUnclippedEndOffset = -1;
00126 
00127     clearTags();
00128 
00129     // Set the bam values for the variable length fields.
00130     // Only the read name needs to be set, the others are a length of 0.
00131     // Set the read name.  The min size of myRecordPtr includes the size for
00132     // the default read name.
00133     memcpy(&(myRecordPtr->myData), myReadName.c_str(), 
00134            myRecordPtr->myReadNameLength);
00135 
00136     // Set that the variable length buffer fields are valid.
00137     myIsReadNameBufferValid = true;
00138     myIsCigarBufferValid = true;
00139     myPackedSequence = 
00140         (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength +
00141         myRecordPtr->myCigarLength * sizeof(int);
00142     myIsSequenceBufferValid = true;
00143     myBufferSequenceTranslation = NONE;
00144 
00145     myPackedQuality = myPackedSequence;
00146     myIsQualityBufferValid = true;
00147     myIsTagsBufferValid = true;
00148     myIsBinValid = true;
00149 
00150     myCigarTempBufferLength = -1;
00151 
00152     myStatus = SamStatus::SUCCESS;
00153 
00154     NOT_FOUND_TAG_STRING = "";
00155     NOT_FOUND_TAG_INT = -1;
00156     NOT_FOUND_TAG_DOUBLE = -1;
00157 }
00158 
00159 
00160 // Returns whether or not the record is valid.
00161 // Header is needed to perform some validation against it.
00162 bool SamRecord::isValid(SamFileHeader& header)
00163 {
00164     myStatus = SamStatus::SUCCESS;
00165     SamValidationErrors invalidSamErrors;
00166     if(!SamValidator::isValid(header, *this, invalidSamErrors))
00167     {
00168         // The record is not valid.
00169         std::string errorMessage = "";
00170         invalidSamErrors.getErrorString(errorMessage);
00171         myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str());
00172         return(false);
00173     }
00174     // The record is valid.
00175     return(true);
00176 }
00177 
00178 
00179 void SamRecord::setReference(GenomeSequence* reference)
00180 {
00181     myRefPtr = reference;
00182 }
00183 
00184 
00185 // Set the type of sequence translation to use when getting
00186 // the sequence.  The default type (if this method is never called) is
00187 // NONE (the sequence is left as-is).  This is used 
00188 void SamRecord::setSequenceTranslation(SequenceTranslation translation)
00189 {
00190     mySequenceTranslation = translation;
00191 }
00192 
00193 
00194 bool SamRecord::setReadName(const char* readName) 
00195 {
00196     myReadName = readName;
00197     myIsBufferSynced = false;
00198     myIsReadNameBufferValid = false;
00199     myStatus = SamStatus::SUCCESS;
00200 
00201     // The read name must at least have some length, otherwise this is a parsing
00202     // error.
00203     if(myReadName.Length() == 0)
00204     {
00205         // Invalid - reset ReadName return false.
00206         myReadName = DEFAULT_READ_NAME;
00207         myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00208         myStatus.setStatus(SamStatus::INVALID, "0 length Query Name.");
00209         return(false);
00210     }
00211 
00212     return true;
00213 }
00214 
00215 
00216 bool SamRecord::setFlag(uint16_t flag)
00217 {
00218     myStatus = SamStatus::SUCCESS;
00219     myRecordPtr->myFlag = flag;
00220     return true;
00221 }
00222 
00223 
00224 bool SamRecord::setReferenceName(SamFileHeader& header,
00225                                  const char* referenceName) 
00226 {
00227     myStatus = SamStatus::SUCCESS;
00228 
00229     myReferenceName = referenceName;
00230     // If the reference ID does not already exist, add it (pass true)
00231     myRecordPtr->myReferenceID = header.getReferenceID(referenceName, true);
00232 
00233     return true;
00234 }
00235 
00236 
00237 bool SamRecord::set1BasedPosition(int32_t position) 
00238 {
00239     return(set0BasedPosition(position - 1));
00240 }
00241 
00242 
00243 bool SamRecord::set0BasedPosition(int32_t position)
00244 {
00245     myStatus = SamStatus::SUCCESS;
00246     myRecordPtr->myPosition = position;
00247     myIsBinValid = false;
00248     return true;
00249 }
00250 
00251 
00252 bool SamRecord::setMapQuality(uint8_t mapQuality)
00253 {
00254     myStatus = SamStatus::SUCCESS;
00255     myRecordPtr->myMapQuality = mapQuality;
00256     return true;
00257 }
00258 
00259 
00260 bool SamRecord::setCigar(const char* cigar)
00261 {
00262     myStatus = SamStatus::SUCCESS;
00263     myCigar = cigar;
00264  
00265     myIsBufferSynced = false;
00266     myIsCigarBufferValid = false;
00267     myCigarTempBufferLength = -1;
00268     myIsBinValid = false;
00269 
00270     // Initialize the calculated alignment info to the uncalculated value.
00271     myAlignmentLength = -1;
00272     myUnclippedStartOffset = -1;
00273     myUnclippedEndOffset = -1;
00274 
00275     return true;
00276 }
00277 
00278 
00279 bool SamRecord::setCigar(const Cigar& cigar)
00280 {
00281     myStatus = SamStatus::SUCCESS;
00282     cigar.getCigarString(myCigar);
00283  
00284     myIsBufferSynced = false;
00285     myIsCigarBufferValid = false;
00286     myCigarTempBufferLength = -1;
00287     myIsBinValid = false;
00288 
00289     // Initialize the calculated alignment info to the uncalculated value.
00290     myAlignmentLength = -1;
00291     myUnclippedStartOffset = -1;
00292     myUnclippedEndOffset = -1;
00293 
00294     return true;
00295 }
00296 
00297 
00298 bool SamRecord::setMateReferenceName(SamFileHeader& header,
00299                                      const char* mateReferenceName) 
00300 {
00301     myStatus = SamStatus::SUCCESS;
00302     // Set the mate reference, if it is "=", set it to be equal
00303     // to myReferenceName.  This assumes that myReferenceName has already
00304     // been called.
00305     if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0)
00306     {
00307         myMateReferenceName = myReferenceName;
00308     }
00309     else
00310     {
00311         myMateReferenceName = mateReferenceName;
00312     }
00313 
00314     // Set the Mate Reference ID.
00315     // If the reference ID does not already exist, add it (pass true)
00316     myRecordPtr->myMateReferenceID = 
00317         header.getReferenceID(myMateReferenceName, true);
00318 
00319     return true;
00320 }
00321 
00322 
00323 bool SamRecord::set1BasedMatePosition(int32_t matePosition)
00324 {
00325     return(set0BasedMatePosition(matePosition - 1));
00326 }
00327 
00328 
00329 bool SamRecord::set0BasedMatePosition(int32_t matePosition)
00330 {
00331     myStatus = SamStatus::SUCCESS;
00332     myRecordPtr->myMatePosition = matePosition;
00333     return true;
00334 }
00335 
00336 
00337 bool SamRecord::setInsertSize(int32_t insertSize)
00338 {
00339     myStatus = SamStatus::SUCCESS;
00340     myRecordPtr->myInsertSize = insertSize;
00341     return true;
00342 }
00343 
00344 
00345 bool SamRecord::setSequence(const char* seq) 
00346 {
00347     myStatus = SamStatus::SUCCESS;
00348     mySequence = seq;
00349     mySeqWithEq.clear();
00350     mySeqWithoutEq.clear();
00351    
00352     myIsBufferSynced = false;
00353     myIsSequenceBufferValid = false;
00354     return true;
00355 }
00356 
00357 
00358 bool SamRecord::setQuality(const char* quality) 
00359 {
00360     myStatus = SamStatus::SUCCESS;
00361     myQuality = quality;
00362     myIsBufferSynced = false;
00363     myIsQualityBufferValid = false;
00364     return true;
00365 }
00366 
00367 
00368 //Shift indels to the left
00369 bool SamRecord::shiftIndelsLeft()
00370 {
00371     // Check to see whether or not the Cigar has already been
00372     // set - this is determined by checking if alignment length
00373     // is set since alignment length and the cigar are set
00374     // at the same time.
00375     if(myAlignmentLength == -1)
00376     {
00377         // Not been set, so calculate it.
00378         parseCigar();
00379     }
00380     
00381     // Track whether or not there was a shift.
00382     bool shifted = false;
00383 
00384     // Cigar is set, so now myCigarRoller can be used.
00385     // Track where in the read we are.
00386     uint32_t currentPos = 0;
00387 
00388     // Since the loop starts at 1 because the first operation can't be shifted,
00389     // increment the currentPos past the first operation.
00390     if(Cigar::foundInQuery(myCigarRoller[0]))
00391     {
00392         // This op was found in the read, increment the current position.
00393         currentPos += myCigarRoller[0].count;
00394     }
00395    
00396     int numOps = myCigarRoller.size();
00397     
00398     // Loop through the cigar operations from the 2nd operation since
00399     // the first operation is already on the end and can't shift.
00400     for(int currentOp = 1; currentOp < numOps; currentOp++)
00401     {
00402         if(myCigarRoller[currentOp].operation == Cigar::insert)
00403         {
00404             // For now, only shift a max of 1 operation.
00405             int prevOpIndex = currentOp-1;
00406             // Track the next op for seeing if it is the same as the
00407             // previous for merging reasons.
00408             int nextOpIndex = currentOp+1;
00409             if(nextOpIndex == numOps)
00410             {
00411                 // There is no next op, so set it equal to the current one.
00412                 nextOpIndex = currentOp;
00413             }
00414             // The start of the previous operation, so we know when we hit it
00415             // so we don't shift past it.
00416             uint32_t prevOpStart = 
00417                 currentPos - myCigarRoller[prevOpIndex].count;
00418 
00419             // We can only shift if the previous operation
00420             if(!Cigar::isMatchOrMismatch(myCigarRoller[prevOpIndex]))
00421             {
00422                 // TODO - shift past pads
00423                 // An insert is in the read, so increment the position.
00424                 currentPos += myCigarRoller[currentOp].count;                 
00425                 // Not a match/mismatch, so can't shift into it.
00426                 continue;
00427             }
00428                     
00429             // It is a match or mismatch, so check to see if we can
00430             // shift into it.
00431 
00432             // The end of the insert is calculated by adding the size
00433             // of this insert minus 1 to the start of the insert.
00434             uint32_t insertEndPos = 
00435                 currentPos + myCigarRoller[currentOp].count - 1;
00436                 
00437             // The insert starts at the current position.
00438             uint32_t insertStartPos = currentPos;
00439                 
00440             // Loop as long as the position before the insert start
00441             // matches the last character in the insert. If they match,
00442             // the insert can be shifted one index left because the
00443             // implied reference will not change.  If they do not match,
00444             // we can't shift because the implied reference would change.
00445             // Stop loop when insertStartPos = prevOpStart, because we 
00446             // don't want to move past that.
00447             while((insertStartPos > prevOpStart) && 
00448                   (getSequence(insertEndPos,BASES) == 
00449                    getSequence(insertStartPos - 1, BASES)))
00450             {
00451                 // We can shift, so move the insert start & end one left.
00452                 --insertEndPos;
00453                 --insertStartPos;
00454             }
00455 
00456             // Determine if a shift has occurred.
00457             int shiftLen = currentPos - insertStartPos;
00458             if(shiftLen > 0)
00459             {
00460                 // Shift occured, so adjust the cigar if the cigar will
00461                 // not become more operations.
00462                 // If the next operation is the same as the previous or
00463                 // if the insert and the previous operation switch positions
00464                 // then the cigar has the same number of operations.
00465                 // If the next operation is different, and the shift splits
00466                 // the previous operation in 2, then the cigar would
00467                 // become longer, so we do not want to shift.
00468                 if(myCigarRoller[nextOpIndex].operation == 
00469                    myCigarRoller[prevOpIndex].operation)
00470                 {
00471                     // The operations are the same, so merge them by adding
00472                     // the length of the shift to the next operation.
00473                     myCigarRoller.IncrementCount(nextOpIndex, shiftLen);
00474                     myCigarRoller.IncrementCount(prevOpIndex, -shiftLen);
00475 
00476                     // If the previous op length is 0, just remove that
00477                     // operation.
00478                     if(myCigarRoller[prevOpIndex].count == 0)
00479                     {
00480                         myCigarRoller.Remove(prevOpIndex);
00481                     }
00482                     shifted = true;
00483                 } 
00484                 else
00485                 {
00486                     // Can only shift if the insert shifts past the
00487                     // entire previous operation, otherwise an operation
00488                     // would need to be added.
00489                     if(insertStartPos == prevOpStart)
00490                     { 
00491                         // Swap the positions of the insert and the
00492                         // previous operation.
00493                         myCigarRoller.Update(currentOp,
00494                                              myCigarRoller[prevOpIndex].operation,
00495                                              myCigarRoller[prevOpIndex].count);
00496                         // Size of the previous op is the entire
00497                         // shift length.
00498                         myCigarRoller.Update(prevOpIndex, 
00499                                              Cigar::insert,
00500                                              shiftLen);
00501                         shifted = true;
00502                     }
00503                 }
00504             }
00505             // An insert is in the read, so increment the position.
00506             currentPos += myCigarRoller[currentOp].count;                 
00507         }
00508         else if(Cigar::foundInQuery(myCigarRoller[currentOp]))
00509         {
00510             // This op was found in the read, increment the current position.
00511             currentPos += myCigarRoller[currentOp].count;
00512         }
00513     }
00514     if(shifted)
00515     {
00516         // TODO - setCigar is currently inefficient because later the cigar
00517         // roller will be recalculated, but for now it will work.
00518         setCigar(myCigarRoller);
00519     }
00520     return(shifted);
00521 }
00522 
00523 
00524 // Set the BAM record from the passeed in buffer of the specified size.
00525 // Note: The size includes the block size.
00526 SamStatus::Status SamRecord::setBuffer(const char* fromBuffer,
00527                                        uint32_t fromBufferSize,
00528                                        SamFileHeader& header)
00529 {
00530     myStatus = SamStatus::SUCCESS;
00531     if((fromBuffer == NULL) || (fromBufferSize == 0))
00532     {
00533         // Buffer is empty.
00534         myStatus.setStatus(SamStatus::FAIL_PARSE,
00535                            "Cannot parse an empty file.");
00536         return(SamStatus::FAIL_PARSE);
00537     }
00538 
00539     // Clear the record.   
00540     resetRecord();
00541 
00542     // allocate space for the record size.
00543     if(!allocateRecordStructure(fromBufferSize))
00544     {
00545         // Failed to allocate space.
00546         return(SamStatus::FAIL_MEM);
00547     }
00548    
00549     memcpy(myRecordPtr, fromBuffer, fromBufferSize);
00550 
00551     setVariablesForNewBuffer(header);
00552 
00553     // Return the status of the record.
00554     return(SamStatus::SUCCESS);
00555 }
00556 
00557 
00558 // Read the BAM record from a file.
00559 SamStatus::Status SamRecord::setBufferFromFile(IFILE filePtr, 
00560                                                SamFileHeader& header)
00561 {
00562     myStatus = SamStatus::SUCCESS;
00563     if((filePtr == NULL) || (filePtr->isOpen() == false))
00564     {
00565         // File is not open, return failure.
00566         myStatus.setStatus(SamStatus::FAIL_ORDER, 
00567                            "Can't read from an unopened file.");
00568         return(SamStatus::FAIL_ORDER);
00569     }
00570 
00571     // Clear the record.
00572     resetRecord();
00573 
00574     // read the record size.
00575     int numBytes = 
00576         ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t));
00577 
00578     // Check to see if the end of the file was hit and no bytes were read.
00579     if(ifeof(filePtr) && (numBytes == 0))
00580     {
00581         // End of file, nothing was read, no more records.
00582         myStatus.setStatus(SamStatus::NO_MORE_RECS,
00583                            "No more records left to read.");
00584         return(SamStatus::NO_MORE_RECS);
00585     }
00586     
00587     if(numBytes != sizeof(int32_t))
00588     {
00589         // Failed to read the entire block size.  Either the end of the file
00590         // was reached early or there was an error.
00591         if(ifeof(filePtr))
00592         {
00593             // Error: end of the file reached prior to reading the rest of the
00594             // record.
00595             myStatus.setStatus(SamStatus::FAIL_PARSE, 
00596                                "EOF reached in the middle of a record.");
00597             return(SamStatus::FAIL_PARSE);
00598         }
00599         else
00600         {
00601             // Error reading.
00602             myStatus.setStatus(SamStatus::FAIL_IO, 
00603                                "Failed to read the record size.");
00604             return(SamStatus::FAIL_IO);
00605         }
00606     }
00607 
00608     // allocate space for the record size.
00609     if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t)))
00610     {
00611         // Failed to allocate space.
00612         // Status is set by allocateRecordStructure.
00613         return(SamStatus::FAIL_MEM);
00614     }
00615 
00616     // Read the rest of the alignment block, starting at the reference id.
00617     if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize)
00618        != (unsigned int)myRecordPtr->myBlockSize)
00619     {
00620         // Error reading the record.  Reset it and return failure.
00621         resetRecord();
00622         myStatus.setStatus(SamStatus::FAIL_IO,
00623                            "Failed to read the record");
00624         return(SamStatus::FAIL_IO);
00625     }
00626 
00627     setVariablesForNewBuffer(header);
00628 
00629     // Return the status of the record.
00630     return(SamStatus::SUCCESS);
00631 }
00632 
00633 
00634 // Add the specified tag to the record.
00635 // Returns true if the tag was successfully added, false otherwise.
00636 bool SamRecord::addIntTag(const char* tag, int32_t value)
00637 {
00638     myStatus = SamStatus::SUCCESS;
00639     int key = 0;
00640     int index = 0;
00641     char bamvtype;
00642 
00643     int tagBufferSize = 0;
00644 
00645     // First check to see if the tags need to be synced to the buffer.
00646     if(myNeedToSetTagsFromBuffer)
00647     {
00648         if(!setTagsFromBuffer())
00649         {
00650             // Failed to read tags from the buffer, so cannot add new ones.
00651             return(false);
00652         }
00653     }
00654 
00655     // Ints come in as int.  But it can be represented in fewer bits.
00656     // So determine a more specific type that is in line with the
00657     // types for BAM files.
00658     // First check to see if it is a negative.
00659     if(value < 0)
00660     {
00661         // The int is negative, so it will need to use a signed type.
00662         // See if it is greater than the min value for a char.
00663         if(value > std::numeric_limits<char>::min())
00664         {
00665             // It can be stored in a signed char.
00666             bamvtype = 'c';
00667             tagBufferSize += 4;
00668         }
00669         else if(value > std::numeric_limits<short>::min())
00670         {
00671             // It fits in a signed short.
00672             bamvtype = 's';
00673             tagBufferSize += 5;
00674         }
00675         else
00676         {
00677             // Just store it as a signed int.
00678             bamvtype = 'i';
00679             tagBufferSize += 7;
00680         }
00681     }
00682     else
00683     {
00684         // It is positive, so an unsigned type can be used.
00685         if(value < std::numeric_limits<unsigned char>::max())
00686         {
00687             // It is under the max of an unsigned char.
00688             bamvtype = 'C';
00689             tagBufferSize += 4;
00690         }
00691         else if(value < std::numeric_limits<unsigned short>::max())
00692         {
00693             // It is under the max of an unsigned short.
00694             bamvtype = 'S';
00695             tagBufferSize += 5;
00696         }
00697         else
00698         {
00699             // Just store it as an unsigned int.
00700             bamvtype = 'I';
00701             tagBufferSize += 7;
00702         }
00703     }
00704 
00705     // Check to see if the tag is already there.
00706     key = MAKEKEY(tag[0], tag[1], bamvtype);
00707     unsigned int hashIndex = extras.Find(key);
00708     if(hashIndex != LH_NOTFOUND)
00709     {
00710         // Tag was already found.
00711         index = extras[hashIndex];
00712 
00713         // First check to see if the value changed.
00714         if((integers[index] == value) && (intType[index] == bamvtype))
00715         {
00716             // The value has not changed, so do nothing.
00717             return(true);
00718         }
00719         else
00720         {
00721             // Not the same value, so adjust the settings.
00722             // Subtract the size of the previous tag from tagBufferSize to get
00723             // the adjusted size.
00724             switch(intType[index])
00725             {
00726                 case 'c':
00727                 case 'C':
00728                     tagBufferSize -= 4;
00729                     break;
00730                 case 's':
00731                 case 'S':
00732                     tagBufferSize -= 5;
00733                     break;
00734                 case 'i':
00735                 case 'I':
00736                     tagBufferSize -= 7;
00737                     break;
00738                 default:
00739                 myStatus.setStatus(SamStatus::INVALID, 
00740                                    "unknown tag inttype type found.\n");
00741                 return(false);              
00742             }
00743             
00744             // Update the integer value and type.
00745             integers[index] = value;
00746             intType[index] = bamvtype;
00747         }
00748     }
00749     else
00750     {
00751         // Tag is not already there, so add it.
00752         index = integers.Length();
00753         
00754         integers.Push(value);
00755         intType.push_back(bamvtype);
00756 
00757         extras.Add(key, index);
00758     }
00759 
00760     // The buffer tags are now out of sync.
00761     myNeedToSetTagsInBuffer = true;
00762     myIsTagsBufferValid = false;
00763     myIsBufferSynced = false;
00764     myTagBufferSize += tagBufferSize;
00765 
00766     return(true);
00767 }
00768 
00769 
00770 // Add the specified tag to the record, replacing it if it is already there and
00771 // is different from the previous value.
00772 // Returns true if the tag was successfully added (or was already there), false otherwise.
00773 bool SamRecord::addTag(const char* tag, char vtype, const char* valuePtr)
00774 {
00775     if(vtype == 'i')
00776     {
00777         // integer type.  Call addIntTag to handle it.
00778         int intVal = atoi(valuePtr);
00779         return(addIntTag(tag, intVal));
00780     }
00781 
00782     // Non-int type.
00783     myStatus = SamStatus::SUCCESS;
00784     bool status = true; // default to successful.
00785     int key = 0;
00786     int index = 0;
00787 
00788     int tagBufferSize = 0;
00789 
00790     // First check to see if the tags need to be synced to the buffer.
00791     if(myNeedToSetTagsFromBuffer)
00792     {
00793         if(!setTagsFromBuffer())
00794         {
00795             // Failed to read tags from the buffer, so cannot add new ones.
00796             return(false);
00797         }
00798     }
00799 
00800     // First check to see if the tag is already there.
00801     key = MAKEKEY(tag[0], tag[1], vtype);
00802     unsigned int hashIndex = extras.Find(key);
00803     if(hashIndex == LH_NOTFOUND)
00804     {
00805         // The key was not found.  If this is a 'B'/'Z', check for
00806         // the key with 'Z'/'B'.
00807         if(vtype == 'Z')
00808         {
00809             int tmpkey = MAKEKEY(tag[0], tag[1], 'B');
00810             hashIndex = extras.Find(tmpkey);
00811         }
00812         else if(vtype == 'B')
00813         {
00814             int tmpkey = MAKEKEY(tag[0], tag[1], 'Z');
00815             hashIndex = extras.Find(tmpkey);
00816         }
00817     }
00818     if(hashIndex != LH_NOTFOUND)
00819     {
00820         // The key was found in the hash, so get the lookup index.
00821         index = extras[hashIndex];
00822 
00823         // Adjust the currently pointed to value to the new setting.
00824         switch (vtype)
00825         {
00826             case 'A' :
00827                 // First check to see if the value changed.
00828                 if(integers[index] == (const int)*(valuePtr))
00829                 {
00830                     // The value has not changed, so do nothing.
00831                     return(true);
00832                 }
00833                 else
00834                 {
00835                     // Tag buffer size doesn't change between different 'A' entries.
00836                     integers[index] = (const int)*(valuePtr);
00837                     intType[index] = vtype;
00838                 }
00839                 break;
00840             case 'Z' :
00841             case 'B' :
00842                 // First check to see if the value changed.
00843                 if(strings[index] == valuePtr)
00844                 {
00845                     // The value has not changed, so do nothing.
00846                     return(true);
00847                 }
00848                 else
00849                 {
00850                     // Adjust the tagBufferSize by removing the size of the old string.
00851                     tagBufferSize -= strings[index].Length();
00852                     strings[index] = valuePtr;
00853                     // Adjust the tagBufferSize by adding the size of the new string.
00854                     tagBufferSize += strings[index].Length();
00855                 }
00856                 break;
00857             case 'f' :
00858                 // First check to see if the value changed.
00859                 if(doubles[index] == atof(valuePtr))
00860                 {
00861                     // The value has not changed, so do nothing.
00862                     return(true);
00863                 }
00864                 else
00865                 {
00866                     // Tag buffer size doesn't change between different 'f' entries.
00867                     doubles[index] = atof(valuePtr);
00868                 }
00869                 break;
00870             default :
00871                 fprintf(stderr,
00872                         "samRecord::addTag() - Unknown custom field of type %c\n",
00873                         vtype);
00874                 myStatus.setStatus(SamStatus::FAIL_PARSE, 
00875                                    "Unknown custom field in a tag");
00876                 status = false;
00877                 break;
00878         }
00879     }
00880     else
00881     {
00882         // The key was not found in the hash, so add it.
00883         switch (vtype)
00884         {
00885             case 'A' :
00886                 index = integers.Length();
00887                 integers.Push((const int)*(valuePtr));
00888                 intType.push_back(vtype);
00889                 tagBufferSize += 4;
00890                 break;
00891             case 'Z' :
00892             case 'B' :
00893                 index = strings.Length();
00894                 strings.Push(valuePtr);
00895                 tagBufferSize += 4 + strings.Last().Length();
00896                 break;
00897             case 'f' :
00898                 index = doubles.Length();
00899                 doubles.Push(atof(valuePtr));
00900                 tagBufferSize += 7;
00901                 break;
00902             default :
00903                 fprintf(stderr,
00904                         "samRecord::addTag() - Unknown custom field of type %c\n",
00905                         vtype);
00906                 myStatus.setStatus(SamStatus::FAIL_PARSE, 
00907                                    "Unknown custom field in a tag");
00908                 status = false;
00909                 break;
00910         }
00911         if(status)
00912         {
00913             // If successful, add the key to extras.
00914             extras.Add(key, index);
00915         }
00916     }
00917 
00918     // Only add the tag if it has so far been successfully processed.
00919     if(status)
00920     {
00921         // The buffer tags are now out of sync.
00922         myNeedToSetTagsInBuffer = true;
00923         myIsTagsBufferValid = false;
00924         myIsBufferSynced = false;
00925         myTagBufferSize += tagBufferSize;
00926     }
00927     return(status);
00928 }
00929 
00930 
00931 void SamRecord::clearTags()
00932 {
00933     if(extras.Entries() != 0)
00934     {
00935         extras.Clear();
00936     }
00937     strings.Clear();
00938     integers.Clear();
00939     intType.clear();
00940     doubles.Clear();
00941     myTagBufferSize = 0;
00942     resetTagIter();
00943 }
00944 
00945 
00946 bool SamRecord::rmTag(const char* tag, char type)
00947 {
00948     // Check the length of tag.
00949     if(strlen(tag) != 2)
00950     {
00951         // Tag is the wrong length.
00952         myStatus.setStatus(SamStatus::INVALID, 
00953                            "rmTag called with tag that is not 2 characters\n");
00954         return(false);
00955     }
00956 
00957     myStatus = SamStatus::SUCCESS;
00958     if(myNeedToSetTagsFromBuffer)
00959     {
00960         if(!setTagsFromBuffer())
00961         {
00962             // Failed to read the tags from the buffer, so cannot
00963             // get tags.
00964             return(false);
00965         }
00966     }
00967 
00968     // Construct the key.
00969     int key = MAKEKEY(tag[0], tag[1], type);
00970     // Look to see if the key exsists in the hash.
00971     int offset = extras.Find(key);
00972 
00973     if(offset < 0)
00974     {
00975         // Not found, so return true, successfully removed since
00976         // it is not in tag.
00977         return(true);
00978     }
00979 
00980     // Offset is set, so the key was found.
00981     // First if it is an integer, determine the actual type of the int.
00982     char vtype;
00983     getTypeFromKey(key, vtype);
00984     if(vtype == 'i')
00985     {
00986         vtype = getIntegerType(offset);
00987     }
00988 
00989     // Offset is set, so recalculate the buffer size without this entry.
00990     // Do NOT remove from strings, integers, or doubles because then
00991     // extras would need to be updated for all entries with the new indexes
00992     // into those variables.
00993     int rmBuffSize = 0;
00994     switch(vtype)
00995     {
00996         case 'A':
00997         case 'c':
00998         case 'C':
00999             rmBuffSize = 4;
01000             break;
01001         case 's':
01002         case 'S':
01003             rmBuffSize = 5;
01004             break;
01005         case 'i':
01006         case 'I':
01007             rmBuffSize = 7;
01008             break;
01009         case 'f':
01010             rmBuffSize = 7;
01011             break;
01012         case 'Z':
01013         case 'B':
01014             rmBuffSize = 4 + getString(offset).Length();
01015             break;
01016         default:
01017             myStatus.setStatus(SamStatus::INVALID, 
01018                                "rmTag called with unknown type.\n");
01019             return(false);
01020             break;
01021     };
01022 
01023     // The buffer tags are now out of sync.
01024     myNeedToSetTagsInBuffer = true;
01025     myIsTagsBufferValid = false;
01026     myIsBufferSynced = false;
01027     myTagBufferSize -= rmBuffSize;
01028 
01029     // Remove from the hash.
01030     extras.Delete(offset);
01031     return(true);
01032 }
01033 
01034 
01035 bool SamRecord::rmTags(const char* tags)
01036 {
01037     const char* currentTagPtr = tags;
01038 
01039     myStatus = SamStatus::SUCCESS;
01040     if(myNeedToSetTagsFromBuffer)
01041     {
01042         if(!setTagsFromBuffer())
01043         {
01044             // Failed to read the tags from the buffer, so cannot
01045             // get tags.
01046             return(false);
01047         }
01048     }
01049     
01050     bool returnStatus = true;
01051 
01052     int rmBuffSize = 0;
01053     while(*currentTagPtr != '\0')
01054     {
01055 
01056         // Tags are formatted as: XY:Z
01057         // Where X is [A-Za-z], Y is [A-Za-z], and
01058         // Z is A,i,f,Z,H (cCsSI are also excepted)
01059         if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
01060            (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
01061         {
01062             myStatus.setStatus(SamStatus::INVALID, 
01063                                "rmTags called with improperly formatted tags.\n");
01064             returnStatus = false;
01065             break;
01066         }
01067 
01068         // Construct the key.
01069         int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1], 
01070                           currentTagPtr[3]);
01071         // Look to see if the key exsists in the hash.
01072         int offset = extras.Find(key);
01073 
01074         if(offset >= 0)
01075         {
01076             // Offset is set, so the key was found.
01077             // First if it is an integer, determine the actual type of the int.
01078             char vtype;
01079             getTypeFromKey(key, vtype);
01080             if(vtype == 'i')
01081             {
01082                 vtype = getIntegerType(offset);
01083             }
01084             
01085             // Offset is set, so recalculate the buffer size without this entry.
01086             // Do NOT remove from strings, integers, or doubles because then
01087             // extras would need to be updated for all entries with the new indexes
01088             // into those variables.
01089             switch(vtype)
01090             {
01091                 case 'A':
01092                 case 'c':
01093                 case 'C':
01094                     rmBuffSize += 4;
01095                     break;
01096                 case 's':
01097                 case 'S':
01098                     rmBuffSize += 5;
01099                     break;
01100                 case 'i':
01101                 case 'I':
01102                     rmBuffSize += 7;
01103                     break;
01104                 case 'f':
01105                     rmBuffSize += 7;
01106                     break;
01107                 case 'Z':
01108                 case 'B':
01109                     rmBuffSize += 4 + getString(offset).Length();
01110                     break;
01111                 default:
01112                     myStatus.setStatus(SamStatus::INVALID, 
01113                                        "rmTag called with unknown type.\n");
01114                     returnStatus = false;
01115                     break;
01116             };
01117             
01118             // Remove from the hash.
01119             extras.Delete(offset);
01120         }
01121         // Increment to the next tag.
01122         if(currentTagPtr[4] == ';')
01123         {
01124             // Increment once more.
01125             currentTagPtr += 5;
01126         }
01127         else if(currentTagPtr[4] != '\0')
01128         {
01129             // Invalid tag format. 
01130             myStatus.setStatus(SamStatus::INVALID, 
01131                                "rmTags called with improperly formatted tags.\n");
01132             returnStatus = false;
01133             break;
01134         }
01135         else
01136         {
01137             // Last Tag.
01138             currentTagPtr += 4;
01139         }
01140     }
01141 
01142     // The buffer tags are now out of sync.
01143     myNeedToSetTagsInBuffer = true;
01144     myIsTagsBufferValid = false;
01145     myIsBufferSynced = false;
01146     myTagBufferSize -= rmBuffSize;
01147     
01148 
01149     return(returnStatus);
01150 }
01151 
01152 
01153 // Get methods for record fields.
01154 const void* SamRecord::getRecordBuffer()
01155 {
01156     return(getRecordBuffer(mySequenceTranslation));
01157 }
01158 
01159 
01160 // Get methods for record fields.
01161 const void* SamRecord::getRecordBuffer(SequenceTranslation translation) 
01162 {
01163     myStatus = SamStatus::SUCCESS;
01164     bool status = true;
01165     // If the buffer is not synced or the sequence in the buffer is not
01166     // properly translated, fix the buffer.
01167     if((myIsBufferSynced == false) ||
01168        (myBufferSequenceTranslation != translation))
01169     {
01170         status &= fixBuffer(translation);
01171     }
01172     // If the buffer is synced, check to see if the tags need to be synced.
01173     if(myNeedToSetTagsInBuffer)
01174     {
01175         status &= setTagsInBuffer();
01176     }
01177     if(!status)
01178     {
01179         return(NULL);
01180     }
01181     return (const void *)myRecordPtr;
01182 }
01183 
01184 
01185 // Write the record as a buffer into the file using the class's 
01186 // sequence translation setting.
01187 SamStatus::Status SamRecord::writeRecordBuffer(IFILE filePtr)
01188 {
01189     return(writeRecordBuffer(filePtr, mySequenceTranslation));
01190 }
01191 
01192 
01193 // Write the record as a buffer into the file using the specified translation.
01194 SamStatus::Status SamRecord::writeRecordBuffer(IFILE filePtr, 
01195                                                SequenceTranslation translation)
01196 {
01197     myStatus = SamStatus::SUCCESS;
01198     if((filePtr == NULL) || (filePtr->isOpen() == false))
01199     {
01200         // File is not open, return failure.
01201         myStatus.setStatus(SamStatus::FAIL_ORDER,
01202                            "Can't write to an unopened file.");
01203         return(SamStatus::FAIL_ORDER);
01204     }
01205 
01206     if((myIsBufferSynced == false) ||
01207        (myBufferSequenceTranslation != translation))
01208     {
01209         if(!fixBuffer(translation))
01210         {
01211             return(myStatus.getStatus());
01212         }
01213     }
01214 
01215     // Write the record.
01216     unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t);
01217     unsigned int numBytesWritten = 
01218         ifwrite(filePtr, myRecordPtr, numBytesToWrite);
01219 
01220     // Return status based on if the correct number of bytes were written.
01221     if(numBytesToWrite == numBytesWritten)
01222     {
01223         return(SamStatus::SUCCESS);
01224     }
01225     // The correct number of bytes were not written.
01226     myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record.");
01227     return(SamStatus::FAIL_IO);
01228 }
01229 
01230 
01231 int32_t SamRecord::getBlockSize() 
01232 {
01233     myStatus = SamStatus::SUCCESS;
01234     // If the buffer isn't synced, sync the buffer to determine the
01235     // block size.
01236     if(myIsBufferSynced == false)
01237     {
01238         // Since this just returns the block size, the translation of
01239         // the sequence does not matter, so just use the currently set
01240         // value.
01241         fixBuffer(myBufferSequenceTranslation);
01242     }
01243     return myRecordPtr->myBlockSize;
01244 }
01245 
01246 
01247 // This method returns the reference name.
01248 const char* SamRecord::getReferenceName()
01249 {
01250     myStatus = SamStatus::SUCCESS;
01251     return myReferenceName.c_str();
01252 }
01253 
01254 
01255 int32_t SamRecord::getReferenceID()
01256 {
01257     myStatus = SamStatus::SUCCESS;
01258     return myRecordPtr->myReferenceID;
01259 }
01260 
01261 
01262 int32_t SamRecord::get1BasedPosition()
01263 {
01264     myStatus = SamStatus::SUCCESS;
01265     return (myRecordPtr->myPosition + 1);
01266 }
01267 
01268 
01269 int32_t SamRecord::get0BasedPosition()
01270 {
01271     myStatus = SamStatus::SUCCESS;
01272     return myRecordPtr->myPosition;
01273 }
01274 
01275 
01276 uint8_t SamRecord::getReadNameLength() 
01277 {
01278     myStatus = SamStatus::SUCCESS;
01279     // If the buffer is valid, return the size from there, otherwise get the 
01280     // size from the string length + 1 (ending null).
01281     if(myIsReadNameBufferValid)
01282     {
01283         return(myRecordPtr->myReadNameLength);
01284     }
01285    
01286     return(myReadName.Length() + 1);
01287 }
01288 
01289 
01290 uint8_t SamRecord::getMapQuality()
01291 {
01292     myStatus = SamStatus::SUCCESS;
01293     return myRecordPtr->myMapQuality;
01294 }
01295 
01296 
01297 uint16_t SamRecord::getBin()
01298 {
01299     myStatus = SamStatus::SUCCESS;
01300     if(!myIsBinValid)
01301     {
01302         // The bin that is set in the record is not valid, so
01303         // reset it.
01304         myRecordPtr->myBin = 
01305             bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());      
01306         myIsBinValid = true;
01307     }
01308     return(myRecordPtr->myBin);
01309 }
01310 
01311 
01312 uint16_t SamRecord::getCigarLength()
01313 {
01314     myStatus = SamStatus::SUCCESS;
01315     // If the cigar buffer is valid
01316     // then get the length from there.
01317     if(myIsCigarBufferValid)
01318     {
01319         return myRecordPtr->myCigarLength;      
01320     }
01321 
01322     if(myCigarTempBufferLength == -1)
01323     {
01324         // The cigar buffer is not valid and the cigar temp buffer is not set,
01325         // so parse the string.
01326         parseCigarString();
01327     }
01328    
01329     // The temp buffer is now set, so return the size.
01330     return(myCigarTempBufferLength);
01331 }
01332 
01333 
01334 uint16_t SamRecord::getFlag()
01335 {
01336     myStatus = SamStatus::SUCCESS;
01337     return myRecordPtr->myFlag;
01338 }
01339 
01340 
01341 int32_t SamRecord::getReadLength() 
01342 {
01343     myStatus = SamStatus::SUCCESS;
01344     if(myIsSequenceBufferValid == false)
01345     {
01346         // If the sequence is "*", then return 0.
01347         if((mySequence.Length() == 1) && (mySequence[0] == '*'))
01348         {
01349             return(0);
01350         }
01351         // Do not add 1 since it is not null terminated.
01352         return(mySequence.Length());
01353     }
01354     return(myRecordPtr->myReadLength);
01355 }
01356 
01357 
01358 // This method returns the mate reference name.  If it is equal to the
01359 // reference name, it still returns the reference name.
01360 const char* SamRecord::getMateReferenceName()
01361 {
01362     myStatus = SamStatus::SUCCESS;
01363     return myMateReferenceName.c_str();
01364 }
01365 
01366 
01367 // This method returns the mate reference name.  If it is equal to the
01368 // reference name, it returns "=", unless they are both "*" in which case
01369 // "*" is returned.
01370 const char* SamRecord::getMateReferenceNameOrEqual()
01371 {
01372     myStatus = SamStatus::SUCCESS;
01373     if(myMateReferenceName == "*")
01374     {
01375         return(myMateReferenceName);
01376     }
01377     if(myMateReferenceName == getReferenceName())
01378     {
01379         return(FIELD_ABSENT_STRING);
01380     }
01381     else
01382     {
01383         return(myMateReferenceName);
01384     }
01385 }
01386 
01387 
01388 int32_t SamRecord::getMateReferenceID()
01389 {
01390     myStatus = SamStatus::SUCCESS;
01391     return myRecordPtr->myMateReferenceID;
01392 }
01393 
01394 
01395 int32_t SamRecord::get1BasedMatePosition()
01396 {
01397     myStatus = SamStatus::SUCCESS;
01398     return (myRecordPtr->myMatePosition + 1);
01399 }
01400 
01401 
01402 int32_t SamRecord::get0BasedMatePosition()
01403 {
01404     myStatus = SamStatus::SUCCESS;
01405     return myRecordPtr->myMatePosition;
01406 }
01407 
01408 
01409 int32_t SamRecord::getInsertSize()
01410 {
01411     myStatus = SamStatus::SUCCESS;
01412     return myRecordPtr->myInsertSize;
01413 }
01414 
01415 
01416 // Returns the inclusive rightmost position of the clipped sequence.
01417 int32_t SamRecord::get0BasedAlignmentEnd()
01418 {
01419     myStatus = SamStatus::SUCCESS;
01420     if(myAlignmentLength == -1)
01421     {
01422         // Alignment end has not been set, so calculate it.
01423         parseCigar();
01424     }
01425     // If alignment length > 0, subtract 1 from it to get the end.
01426     if(myAlignmentLength == 0)
01427     {
01428         // Length is 0, just return the start position.
01429         return(myRecordPtr->myPosition);
01430     }
01431     return(myRecordPtr->myPosition + myAlignmentLength - 1);
01432 }
01433 
01434 
01435 // Returns the inclusive rightmost position of the clipped sequence.
01436 int32_t SamRecord::get1BasedAlignmentEnd()
01437 {
01438     return(get0BasedAlignmentEnd() + 1);
01439 }
01440 
01441    
01442 // Return the length of the alignment.
01443 int32_t SamRecord::getAlignmentLength()
01444 {
01445     myStatus = SamStatus::SUCCESS;
01446     if(myAlignmentLength == -1)
01447     {
01448         // Alignment end has not been set, so calculate it.
01449         parseCigar();
01450     }
01451     // Return the alignment length.
01452     return(myAlignmentLength);
01453 }
01454 
01455 // Returns the inclusive left-most position adjust for clipped bases.
01456 int32_t SamRecord::get0BasedUnclippedStart()
01457 {
01458     myStatus = SamStatus::SUCCESS;
01459     if(myUnclippedStartOffset == -1)
01460     {
01461         // Unclipped has not yet been calculated, so parse the cigar to get it
01462         parseCigar();
01463     }
01464     return(myRecordPtr->myPosition - myUnclippedStartOffset);
01465 }
01466 
01467 
01468 // Returns the inclusive left-most position adjust for clipped bases.
01469 int32_t SamRecord::get1BasedUnclippedStart()
01470 {
01471     return(get0BasedUnclippedStart() + 1);
01472 }
01473 
01474 
01475 // Returns the inclusive right-most position adjust for clipped bases.
01476 int32_t SamRecord::get0BasedUnclippedEnd()
01477 {
01478     // myUnclippedEndOffset will be set by get0BasedAlignmentEnd if the 
01479     // cigar has not yet been parsed, so no need to check it here.
01480     return(get0BasedAlignmentEnd() + myUnclippedEndOffset);
01481 }
01482 
01483 
01484 // Returns the inclusive right-most position adjust for clipped bases.
01485 int32_t SamRecord::get1BasedUnclippedEnd()
01486 {
01487     return(get0BasedUnclippedEnd() + 1);
01488 }
01489 
01490 
01491 // Get the read name.
01492 const char* SamRecord::getReadName()
01493 {
01494     myStatus = SamStatus::SUCCESS;
01495     if(myReadName.Length() == 0)
01496     {
01497         // 0 Length, means that it is in the buffer, but has not yet
01498         // been synced to the string, so do the sync.
01499         myReadName = (char*)&(myRecordPtr->myData);
01500     }
01501     return myReadName.c_str();
01502 }
01503 
01504 
01505 const char* SamRecord::getCigar()
01506 {
01507     myStatus = SamStatus::SUCCESS;
01508     if(myCigar.Length() == 0)
01509     {
01510         // 0 Length, means that it is in the buffer, but has not yet
01511         // been synced to the string, so do the sync.
01512         parseCigarBinary();
01513     }
01514     return myCigar.c_str();
01515 }
01516 
01517 
01518 const char* SamRecord::getSequence()
01519 {
01520     return(getSequence(mySequenceTranslation));
01521 }
01522 
01523 
01524 const char* SamRecord::getSequence(SequenceTranslation translation)
01525 {
01526     myStatus = SamStatus::SUCCESS;
01527     if(mySequence.Length() == 0)
01528     {
01529         // 0 Length, means that it is in the buffer, but has not yet
01530         // been synced to the string, so do the sync.
01531         setSequenceAndQualityFromBuffer();
01532     }
01533 
01534     // Determine if translation needs to be done.
01535     if((translation == NONE) || (myRefPtr == NULL))
01536     {
01537         return mySequence.c_str();
01538     }
01539     else if(translation == EQUAL)
01540     {
01541         if(mySeqWithEq.length() == 0)
01542         {
01543             // Check to see if the sequence is defined.
01544             if(mySequence == "*")
01545             {
01546                 // Sequence is undefined, so no translation necessary.
01547                 mySeqWithEq = '*';
01548             }
01549             else
01550             {
01551                 // Sequence defined, so translate it.
01552                 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(), 
01553                                                   myRecordPtr->myPosition,
01554                                                   *(getCigarInfo()),
01555                                                   getReferenceName(),
01556                                                   *myRefPtr,
01557                                                   mySeqWithEq);
01558             }
01559         }
01560         return(mySeqWithEq.c_str());
01561     }
01562     else
01563     {
01564         // translation == BASES
01565         if(mySeqWithoutEq.length() == 0)
01566         {
01567             if(mySequence == "*")
01568             {
01569                 // Sequence is undefined, so no translation necessary.
01570                 mySeqWithoutEq = '*';
01571             }
01572             else
01573             {
01574                 // Sequence defined, so translate it.
01575                 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(), 
01576                                                      myRecordPtr->myPosition,
01577                                                      *(getCigarInfo()),
01578                                                      getReferenceName(),
01579                                                      *myRefPtr,
01580                                                      mySeqWithoutEq);
01581             }
01582         }
01583         return(mySeqWithoutEq.c_str());
01584     }
01585 }
01586 
01587 
01588 const char* SamRecord::getQuality() 
01589 {
01590     myStatus = SamStatus::SUCCESS;
01591     if(myQuality.Length() == 0)
01592     {
01593         // 0 Length, means that it is in the buffer, but has not yet
01594         // been synced to the string, so do the sync.
01595         setSequenceAndQualityFromBuffer();      
01596     }
01597     return myQuality.c_str();
01598 }
01599 
01600 
01601 char SamRecord::getSequence(int index)
01602 {
01603     return(getSequence(index, mySequenceTranslation));
01604 }
01605 
01606 
01607 char SamRecord::getSequence(int index, SequenceTranslation translation)
01608 {
01609     static const char * asciiBases = "=AC.G...T......N";
01610 
01611     // Determine the read length.
01612     int32_t readLen = getReadLength();
01613 
01614     // If the read length is 0, this method should not be called.
01615     if(readLen == 0)
01616     {
01617         String exceptionString = "SamRecord::getSequence(";
01618         exceptionString += index;
01619         exceptionString += ") is not allowed since sequence = '*'";
01620         throw std::runtime_error(exceptionString.c_str());
01621     }
01622     else if((index < 0) || (index >= readLen))
01623     {
01624         // Only get here if the index was out of range, so thow an exception.
01625         String exceptionString = "SamRecord::getSequence(";
01626         exceptionString += index;
01627         exceptionString += ") is out of range. Index must be between 0 and ";
01628         exceptionString += (readLen - 1);
01629         throw std::runtime_error(exceptionString.c_str());
01630     }
01631 
01632     // Determine if translation needs to be done.
01633     if((translation == NONE) || (myRefPtr == NULL))
01634     {
01635         // No translation needs to be done.
01636         if(mySequence.Length() == 0)
01637         {
01638             // Parse BAM sequence.
01639             if(myIsSequenceBufferValid)
01640             {
01641                 return(index & 1 ?
01642                        asciiBases[myPackedSequence[index / 2] & 0xF] :
01643                        asciiBases[myPackedSequence[index / 2] >> 4]);
01644             }
01645             else
01646             {
01647                 String exceptionString = "SamRecord::getSequence(";
01648                 exceptionString += index;
01649                 exceptionString += ") called with no sequence set";
01650                 throw std::runtime_error(exceptionString.c_str());
01651             }
01652         }
01653         // Already have string.
01654         return(mySequence[index]);
01655     }
01656     else
01657     {
01658         // Need to translate the sequence either to have '=' or to not
01659         // have it.
01660         // First check to see if the sequence has been set.
01661         if(mySequence.Length() == 0)
01662         {
01663             // 0 Length, means that it is in the buffer, but has not yet
01664             // been synced to the string, so do the sync.
01665             setSequenceAndQualityFromBuffer();
01666         }
01667 
01668         // Check the type of translation.
01669         if(translation == EQUAL)
01670         {
01671             // Check whether or not the string has already been 
01672             // retrieved that has the '=' in it.
01673             if(mySeqWithEq.length() == 0)
01674             {
01675                 // The string with '=' has not yet been determined,
01676                 // so get the string.
01677                 // Check to see if the sequence is defined.
01678                 if(mySequence == "*")
01679                 {
01680                     // Sequence is undefined, so no translation necessary.
01681                     mySeqWithEq = '*';
01682                 }
01683                 else
01684                 {
01685                     // Sequence defined, so translate it.
01686                     SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(), 
01687                                                       myRecordPtr->myPosition, 
01688                                                       *(getCigarInfo()),
01689                                                       getReferenceName(),
01690                                                       *myRefPtr,
01691                                                       mySeqWithEq);
01692                 }
01693             }
01694             // Sequence is set, so return it.
01695             return(mySeqWithEq[index]);
01696         }
01697         else
01698         {
01699             // translation == BASES
01700             // Check whether or not the string has already been 
01701             // retrieved that does not have the '=' in it.
01702             if(mySeqWithoutEq.length() == 0)
01703             {
01704                 // The string with '=' has not yet been determined,
01705                 // so get the string.
01706                 // Check to see if the sequence is defined.
01707                 if(mySequence == "*")
01708                 {
01709                     // Sequence is undefined, so no translation necessary.
01710                     mySeqWithoutEq = '*';
01711                 }
01712                 else
01713                 {
01714                     // Sequence defined, so translate it.
01715                     // The string without '=' has not yet been determined,
01716                     // so get the string.
01717                     SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(), 
01718                                                          myRecordPtr->myPosition, 
01719                                                          *(getCigarInfo()),
01720                                                          getReferenceName(),
01721                                                          *myRefPtr,
01722                                                          mySeqWithoutEq);
01723                 }
01724             }
01725             // Sequence is set, so return it.
01726             return(mySeqWithoutEq[index]);
01727         }
01728     }
01729 }
01730 
01731 
01732 char SamRecord::getQuality(int index)
01733 {
01734     // Determine the read length.
01735     int32_t readLen = getReadLength();
01736 
01737     // If the read length is 0, return ' ' whose ascii code is below
01738     // the minimum ascii code for qualities.
01739     if(readLen == 0)
01740     {
01741         return(BaseUtilities::UNKNOWN_QUALITY_CHAR);
01742     }
01743     else if((index < 0) || (index >= readLen))
01744     {
01745         // Only get here if the index was out of range, so thow an exception.
01746         String exceptionString = "SamRecord::getQuality(";
01747         exceptionString += index;
01748         exceptionString += ") is out of range. Index must be between 0 and ";
01749         exceptionString += (readLen - 1);
01750         throw std::runtime_error(exceptionString.c_str());
01751     }
01752 
01753     if(myQuality.Length() == 0) 
01754     {
01755         // Parse BAM Quality.
01756         // Know that myPackedQuality is correct since readLen != 0.
01757         return(myPackedQuality[index] + 33);
01758     }
01759     else
01760     {
01761         // Already have string.
01762         if((myQuality.Length() == 1) && (myQuality[0] == '*'))
01763         {
01764             // Return the unknown quality character.
01765             return(BaseUtilities::UNKNOWN_QUALITY_CHAR);
01766         }
01767         else if(index >= myQuality.Length())
01768         {
01769             // Only get here if the index was out of range, so thow an exception.
01770             // Technically the myQuality string is not guaranteed to be the same length
01771             // as the sequence, so this catches that error.
01772             String exceptionString = "SamRecord::getQuality(";
01773             exceptionString += index;
01774             exceptionString += ") is out of range. Index must be between 0 and ";
01775             exceptionString += (myQuality.Length() - 1);
01776             throw std::runtime_error(exceptionString.c_str());
01777         }
01778         else
01779         {
01780             return(myQuality[index]);
01781         }
01782     }
01783 }
01784 
01785    
01786 Cigar* SamRecord::getCigarInfo()
01787 {
01788     // Check to see whether or not the Cigar has already been
01789     // set - this is determined by checking if alignment length
01790     // is set since alignment length and the cigar are set
01791     // at the same time.
01792     if(myAlignmentLength == -1)
01793     {
01794         // Not been set, so calculate it.
01795         parseCigar();
01796     }
01797     return(&myCigarRoller);
01798 }
01799 
01800 
01801 // Return the number of bases in this read that overlap the passed in
01802 // region.  (start & end are 0-based)
01803 uint32_t SamRecord::getNumOverlaps(int32_t start, int32_t end)
01804 {
01805     // Determine whether or not the cigar has been parsed, which sets up
01806     // the cigar roller.  This is determined by checking the alignment length.
01807     if(myAlignmentLength == -1)
01808     {
01809         parseCigar();
01810     }
01811     return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition()));
01812 }
01813 
01814 
01815 // Returns the values of all fields except the tags.
01816 bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName, 
01817                           String& cigar, String& sequence, String& quality)
01818 {
01819     return(getFields(recStruct, readName, cigar, sequence, quality,
01820                      mySequenceTranslation));
01821 }
01822 
01823 
01824 // Returns the values of all fields except the tags.
01825 bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName, 
01826                           String& cigar, String& sequence, String& quality,
01827                           SequenceTranslation translation)
01828 {
01829     myStatus = SamStatus::SUCCESS;
01830     if(myIsBufferSynced == false)
01831     {
01832         if(!fixBuffer(translation))
01833         {
01834             // failed to set the buffer, return false.
01835             return(false);
01836         }
01837     }
01838     memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct));
01839 
01840     readName = getReadName();
01841     // Check the status.
01842     if(myStatus != SamStatus::SUCCESS)
01843     {
01844         // Failed to set the fields, return false.
01845         return(false);
01846     }
01847     cigar = getCigar();
01848     // Check the status.
01849     if(myStatus != SamStatus::SUCCESS)
01850     {
01851         // Failed to set the fields, return false.
01852         return(false);
01853     }
01854     sequence = getSequence(translation);
01855     // Check the status.
01856     if(myStatus != SamStatus::SUCCESS)
01857     {
01858         // Failed to set the fields, return false.
01859         return(false);
01860     }
01861     quality = getQuality();
01862     // Check the status.
01863     if(myStatus != SamStatus::SUCCESS)
01864     {
01865         // Failed to set the fields, return false.
01866         return(false);
01867     }
01868     return(true);
01869 }
01870 
01871 
01872 // Returns the reference pointer.
01873 GenomeSequence* SamRecord::getReference()
01874 {
01875     return(myRefPtr);
01876 }
01877 
01878 
01879 uint32_t SamRecord::getTagLength()
01880 {
01881     myStatus = SamStatus::SUCCESS;
01882     if(myNeedToSetTagsFromBuffer)
01883     {
01884         // Tags are only set in the buffer, so the size of the tags is 
01885         // the length of the record minus the starting location of the tags.
01886         unsigned char * tagStart = 
01887             (unsigned char *)myRecordPtr->myData 
01888             + myRecordPtr->myReadNameLength 
01889             + myRecordPtr->myCigarLength * sizeof(int)
01890             + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength;
01891       
01892         // The non-tags take up from the start of the record to the tag start.
01893         // Do not include the block size part of the record since it is not
01894         // included in the size.
01895         uint32_t nonTagSize = 
01896             tagStart - (unsigned char*)&(myRecordPtr->myReferenceID);
01897         // Tags take up the size of the block minus the non-tag section.
01898         uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize;
01899         return(tagSize);
01900     }
01901 
01902     // Tags are stored outside the buffer, so myTagBufferSize is set.
01903     return(myTagBufferSize);
01904 }
01905 
01906 
01907 // Returns true if there is another tag and sets tag and vtype to the
01908 // appropriate values, and returns a pointer to the value.
01909 // Sets the Status to SUCCESS when a tag is successfully returned or
01910 // when there are no more tags.  Otherwise the status is set to describe
01911 // why it failed (parsing, etc).
01912 bool SamRecord::getNextSamTag(char* tag, char& vtype, void** value)
01913 {
01914     myStatus = SamStatus::SUCCESS;
01915     if(myNeedToSetTagsFromBuffer)
01916     {
01917         if(!setTagsFromBuffer())
01918         {
01919             // Failed to read the tags from the buffer, so cannot
01920             // get tags.
01921             return(false);
01922         }
01923     }
01924 
01925     // Increment the tag index to start looking at the next tag.
01926     // At the beginning, it is set to -1.
01927     myLastTagIndex++;
01928     int maxTagIndex = extras.Capacity();
01929     if(myLastTagIndex >= maxTagIndex)
01930     {
01931         // Hit the end of the tags, return false, no more tags.
01932         // Status is still success since this is not an error, 
01933         // it is just the end of the list.
01934         return(false);
01935     }
01936 
01937     bool tagFound = false;
01938     // Loop until a tag is found or the end of extras is hit.
01939     while((tagFound == false) && (myLastTagIndex < maxTagIndex))
01940     {
01941         if(extras.SlotInUse(myLastTagIndex))
01942         {
01943             // Found a slot to use.
01944             int key = extras.GetKey(myLastTagIndex);
01945             getTag(key, tag);
01946             getTypeFromKey(key, vtype);
01947             tagFound = true;
01948             // Get the value associated with the key based on the vtype.
01949             switch (vtype)
01950             {
01951                 case 'f' :
01952                     *value = getDoublePtr(myLastTagIndex);
01953                     break;
01954                 case 'i' :
01955                     *value = getIntegerPtr(myLastTagIndex, vtype);
01956                     if(vtype != 'A')
01957                     {
01958                         // Convert all int types to 'i'
01959                         vtype = 'i';
01960                     }
01961                     break;
01962                 case 'Z' :
01963                 case 'B' :
01964                     *value = getStringPtr(myLastTagIndex);
01965                     break;
01966                 default:
01967                     myStatus.setStatus(SamStatus::FAIL_PARSE,
01968                                        "Unknown tag type");
01969                     tagFound = false;
01970                     break;
01971             }
01972         }
01973         if(!tagFound)
01974         {
01975             // Increment the index since a tag was not found.
01976             myLastTagIndex++;
01977         }
01978     }
01979     return(tagFound);
01980 }
01981 
01982 
01983 // Reset the tag iterator to the beginning of the tags.
01984 void SamRecord::resetTagIter()
01985 {
01986     myLastTagIndex = -1;
01987 }
01988 
01989 
01990 bool SamRecord::isIntegerType(char vtype)
01991 {
01992     if((vtype == 'c') || (vtype == 'C') ||
01993        (vtype == 's') || (vtype == 'S') ||
01994        (vtype == 'i') || (vtype == 'I'))
01995     {
01996         return(true);
01997     }
01998     return(false);
01999 }
02000 
02001 
02002 bool SamRecord::isDoubleType(char vtype)
02003 {
02004     if(vtype == 'f')
02005     {
02006         return(true);
02007     }
02008     return(false);
02009 }
02010 
02011 
02012 bool SamRecord::isCharType(char vtype)
02013 {
02014     if(vtype == 'A')
02015     {
02016         return(true);
02017     }
02018     return(false);
02019 }
02020 
02021 
02022 bool SamRecord::isStringType(char vtype)
02023 {
02024     if((vtype == 'Z') || (vtype == 'B'))
02025     {
02026         return(true);
02027     }
02028     return(false);
02029 }
02030 
02031 
02032 bool SamRecord::getTagsString(const char* tags, String& returnString, char delim)
02033 {
02034     const char* currentTagPtr = tags;
02035 
02036     returnString.Clear();
02037     myStatus = SamStatus::SUCCESS;
02038     if(myNeedToSetTagsFromBuffer)
02039     {
02040         if(!setTagsFromBuffer())
02041         {
02042             // Failed to read the tags from the buffer, so cannot
02043             // get tags.
02044             return(false);
02045         }
02046     }
02047     
02048     bool returnStatus = true;
02049 
02050     while(*currentTagPtr != '\0')
02051     {
02052         // Tags are formatted as: XY:Z
02053         // Where X is [A-Za-z], Y is [A-Za-z], and
02054         // Z is A,i,f,Z,H (cCsSI are also excepted)
02055         if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
02056            (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
02057         {
02058             myStatus.setStatus(SamStatus::INVALID, 
02059                                "getTagsString called with improperly formatted tags.\n");
02060             returnStatus = false;
02061             break;
02062         }
02063 
02064         // Construct the key.
02065         int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1], 
02066                           currentTagPtr[3]);
02067         // Look to see if the key exsists in the hash.
02068         int offset = extras.Find(key);
02069 
02070         if(offset >= 0)
02071         {
02072             // Offset is set, so the key was found.
02073             if(!returnString.IsEmpty())
02074             {
02075                 returnString += delim;
02076             }
02077             returnString += currentTagPtr[0];
02078             returnString += currentTagPtr[1];
02079             returnString += ':';
02080             returnString += currentTagPtr[3];
02081             returnString += ':';
02082 
02083             // First if it is an integer, determine the actual type of the int.
02084             char vtype;
02085             getTypeFromKey(key, vtype);
02086 
02087             switch(vtype)
02088             {
02089                 case 'i':
02090                     returnString += *(int*)getIntegerPtr(offset, vtype);
02091                     break;
02092                 case 'f':
02093                     returnString += *(double*)getDoublePtr(offset);
02094                     break;
02095                 case 'Z':
02096                 case 'B':
02097                     returnString += *(String*)getStringPtr(offset);
02098                     break;
02099                 default:
02100                     myStatus.setStatus(SamStatus::INVALID, 
02101                                        "rmTag called with unknown type.\n");
02102                     returnStatus = false;
02103                     break;
02104             };
02105         }
02106         // Increment to the next tag.
02107         if(currentTagPtr[4] == ';')
02108         {
02109             // Increment once more.
02110             currentTagPtr += 5;
02111         }
02112         else if(currentTagPtr[4] != '\0')
02113         {
02114             // Invalid tag format. 
02115             myStatus.setStatus(SamStatus::INVALID, 
02116                                "rmTags called with improperly formatted tags.\n");
02117             returnStatus = false;
02118             break;
02119         }
02120         else
02121         {
02122             // Last Tag.
02123             currentTagPtr += 4;
02124         }
02125     }
02126     return(returnStatus);
02127 }
02128 
02129 
02130 const String* SamRecord::getStringTag(const char * tag)
02131 {
02132     // Parse the buffer if necessary.
02133     if(myNeedToSetTagsFromBuffer)
02134     {
02135         if(!setTagsFromBuffer())
02136         {
02137             // Failed to read the tags from the buffer, so cannot
02138             // get tags.  setTagsFromBuffer set the errors,
02139             // so just return null.
02140             return(NULL);
02141         }
02142     }
02143     
02144     int key = MAKEKEY(tag[0], tag[1], 'Z');
02145     int offset = extras.Find(key);
02146 
02147     int value;
02148     if (offset < 0)
02149     {
02150         // Check for 'B' tag.
02151         key = MAKEKEY(tag[0], tag[1], 'B');
02152         offset = extras.Find(key);
02153         if(offset < 0)
02154         {
02155             // Tag not found.
02156             return(NULL);
02157         }
02158     }
02159 
02160     // Offset is valid, so return the tag.
02161     value = extras[offset];
02162     return(&(strings[value]));
02163 }
02164 
02165 
02166 int* SamRecord::getIntegerTag(const char * tag)
02167 {
02168     // Init to success.
02169     myStatus = SamStatus::SUCCESS;
02170     // Parse the buffer if necessary.
02171     if(myNeedToSetTagsFromBuffer)
02172     {
02173         if(!setTagsFromBuffer())
02174         {
02175             // Failed to read the tags from the buffer, so cannot
02176             // get tags.  setTagsFromBuffer set the errors,
02177             // so just return null.
02178             return(NULL);
02179         }
02180     }
02181     
02182     int key = MAKEKEY(tag[0], tag[1], 'i');
02183     int offset = extras.Find(key);
02184 
02185     int value;
02186     if (offset < 0)
02187     {
02188         // Failed to find the tag.
02189         return(NULL);
02190     }
02191     else
02192         value = extras[offset];
02193 
02194     return(&(integers[value]));
02195 }
02196 
02197 
02198 double* SamRecord::getDoubleTag(const char * tag)
02199 {
02200     // Init to success.
02201     myStatus = SamStatus::SUCCESS;
02202     // Parse the buffer if necessary.
02203     if(myNeedToSetTagsFromBuffer)
02204     {
02205         if(!setTagsFromBuffer())
02206         {
02207             // Failed to read the tags from the buffer, so cannot
02208             // get tags.  setTagsFromBuffer set the errors,
02209             // so just return null.
02210             return(NULL);
02211         }
02212     }
02213     
02214     int key = MAKEKEY(tag[0], tag[1], 'f');
02215     int offset = extras.Find(key);
02216 
02217     int value;
02218     if (offset < 0)
02219     {
02220         // Failed to find the tag.
02221         return(NULL);
02222     }
02223     else
02224         value = extras[offset];
02225 
02226     return(&(doubles[value]));
02227 }
02228 
02229 
02230 String & SamRecord::getString(const char * tag)
02231 {
02232     // Init to success.
02233     myStatus = SamStatus::SUCCESS;
02234     // Parse the buffer if necessary.
02235     if(myNeedToSetTagsFromBuffer)
02236     {
02237         if(!setTagsFromBuffer())
02238         {
02239             // Failed to read the tags from the buffer, so cannot
02240             // get tags.
02241             // TODO - what do we want to do on failure?            
02242         }
02243     }
02244     
02245     int key = MAKEKEY(tag[0], tag[1], 'Z');
02246     int offset = extras.Find(key);
02247 
02248     int value;
02249     if (offset < 0)
02250     {
02251     
02252         key = MAKEKEY(tag[0], tag[1], 'B');
02253         offset = extras.Find(key);
02254         if (offset < 0)
02255         {
02256             // TODO - what do we want to do on failure?
02257             return(NOT_FOUND_TAG_STRING);
02258         }
02259     }
02260     value = extras[offset];
02261 
02262     return strings[value];
02263 }
02264 
02265 int & SamRecord::getInteger(const char * tag)
02266 {
02267     // Init to success.
02268     myStatus = SamStatus::SUCCESS;
02269     // Parse the buffer if necessary.
02270     if(myNeedToSetTagsFromBuffer)
02271     {
02272         if(!setTagsFromBuffer())
02273         {
02274             // Failed to read the tags from the buffer, so cannot
02275             // get tags.  setTagsFromBuffer set the error.
02276             // TODO - what do we want to do on failure?
02277         }
02278     }
02279     
02280     int key = MAKEKEY(tag[0], tag[1], 'i');
02281     int offset = extras.Find(key);
02282 
02283     int value;
02284     if (offset < 0)
02285     {
02286         // TODO - what do we want to do on failure?
02287         return NOT_FOUND_TAG_INT;
02288     }
02289     else
02290         value = extras[offset];
02291 
02292     return integers[value];
02293 }
02294 
02295 
02296 double & SamRecord::getDouble(const char * tag)
02297 {
02298     // Init to success.
02299     myStatus = SamStatus::SUCCESS;
02300     // Parse the buffer if necessary.
02301     if(myNeedToSetTagsFromBuffer)
02302     {
02303         if(!setTagsFromBuffer())
02304         {
02305             // Failed to read the tags from the buffer, so cannot
02306             // get tags.  setTagsFromBuffer set the error.
02307             // TODO - what do we want to do on failure?
02308         }
02309     }
02310     
02311     int key = MAKEKEY(tag[0], tag[1], 'f');
02312     int offset = extras.Find(key);
02313 
02314     int value;
02315     if (offset < 0)
02316     {
02317         // TODO - what do we want to do on failure?
02318         return NOT_FOUND_TAG_DOUBLE;
02319     }
02320     else
02321         value = extras[offset];
02322 
02323     return doubles[value];
02324 }
02325 
02326 
02327 bool SamRecord::checkTag(const char * tag, char type)
02328 {
02329     // Init to success.
02330     myStatus = SamStatus::SUCCESS;
02331     // Parse the buffer if necessary.
02332     if(myNeedToSetTagsFromBuffer)
02333     {
02334         if(!setTagsFromBuffer())
02335         {
02336             // Failed to read the tags from the buffer, so cannot
02337             // get tags.  setTagsFromBuffer set the error.
02338             return("");
02339         }
02340     }
02341     
02342     int key = MAKEKEY(tag[0], tag[1], type);
02343 
02344     return (extras.Find(key) != LH_NOTFOUND);
02345 }
02346 
02347 
02348 // Return the error after a failed SamRecord call.
02349 const SamStatus& SamRecord::getStatus()
02350 {
02351     return(myStatus);
02352 }
02353 
02354 
02355 // Allocate space for the record - does a realloc.  
02356 // The passed in size is the size of the entire record including the
02357 // block size field.
02358 bool SamRecord::allocateRecordStructure(int size)
02359 {
02360     if (allocatedSize < size)
02361     {
02362         bamRecordStruct* tmpRecordPtr = 
02363             (bamRecordStruct *)realloc(myRecordPtr, size);
02364         if(tmpRecordPtr == NULL)
02365         {
02366             // FAILED to allocate memory
02367             fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02368             myStatus.addError(SamStatus::FAIL_MEM, "Failed Memory Allocation.");
02369             return(false);
02370         }
02371         // Successfully allocated memory, so set myRecordPtr.
02372         myRecordPtr = tmpRecordPtr;
02373 
02374         // Reset the pointers into the record.
02375         if(myIsSequenceBufferValid)
02376         {
02377             myPackedSequence = (unsigned char *)myRecordPtr->myData + 
02378                 myRecordPtr->myReadNameLength +
02379                 myRecordPtr->myCigarLength * sizeof(int);
02380         }
02381         if(myIsQualityBufferValid)
02382         {
02383             myPackedQuality = (unsigned char *)myRecordPtr->myData + 
02384                 myRecordPtr->myReadNameLength +
02385                 myRecordPtr->myCigarLength * sizeof(int) + 
02386                 (myRecordPtr->myReadLength + 1) / 2;
02387         }
02388 
02389         allocatedSize = size;
02390     }
02391     return(true);
02392 }
02393 
02394 
02395 // Index is the index into the strings array.
02396 void* SamRecord::getStringPtr(int index)
02397 {
02398     int value = extras[index];
02399 
02400     return &(strings[value]);
02401 }
02402 
02403 void* SamRecord::getIntegerPtr(int offset, char& type)
02404 {
02405     int value = extras[offset];
02406 
02407     type = intType[value];
02408 
02409     return &(integers[value]);
02410 }
02411 
02412 void* SamRecord::getDoublePtr(int offset)
02413 {
02414     int value = extras[offset];
02415 
02416     return &(doubles[value]);
02417 }
02418 
02419 
02420 // Fixes the buffer to match the variable length fields if they are set.
02421 bool SamRecord::fixBuffer(SequenceTranslation translation)
02422 {
02423     // Check to see if the buffer is already synced.
02424     if(myIsBufferSynced &&
02425        (myBufferSequenceTranslation == translation))
02426     {
02427         // Already synced, nothing to do.
02428         return(true);
02429     }
02430    
02431     // Set the bin if necessary.
02432     if(!myIsBinValid)
02433     {
02434         // The bin that is set in the record is not valid, so
02435         // reset it.
02436         myRecordPtr->myBin = 
02437             bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());      
02438         myIsBinValid = true;
02439     }
02440 
02441     // Not synced.
02442     bool status = true;
02443 
02444     // First determine the size the buffer needs to be.
02445     uint8_t newReadNameLen = getReadNameLength();
02446     uint16_t newCigarLen = getCigarLength();
02447     int32_t newReadLen = getReadLength();
02448     uint32_t newTagLen = getTagLength();
02449     uint32_t bamSequenceLen = (newReadLen+1)/2;
02450 
02451     // The buffer size extends from the start of the record to data
02452     // plus the length of the variable fields,
02453     // Multiply the cigar length by 4 since it is the number of uint32_t fields.
02454     int newBufferSize = 
02455         ((unsigned char*)(&(myRecordPtr->myData)) - 
02456          (unsigned char*)myRecordPtr) +
02457         newReadNameLen + ((newCigarLen)*4) +
02458         newReadLen + bamSequenceLen + newTagLen;
02459    
02460     if(!allocateRecordStructure(newBufferSize))
02461     {
02462         // Failed to allocate space.
02463         return(false);
02464     }
02465 
02466     // Now that space has been added to the buffer, check to see what if
02467     // any fields need to be extracted from the buffer prior to starting to
02468     // overwrite it.  Fields need to be extracted from the buffer if the 
02469     // buffer is valid for the field and a previous variable length field has
02470     // changed length.
02471     bool readNameLenChange = (newReadNameLen != myRecordPtr->myReadNameLength);
02472     bool cigarLenChange = (newCigarLen != myRecordPtr->myCigarLength);
02473     bool readLenChange = (newReadLen != myRecordPtr->myReadLength);
02474 
02475     // If the tags are still stored in the buffer and any other fields changed
02476     // lengths, they need to be extracted.
02477     if(myIsTagsBufferValid &&
02478        (readNameLenChange | cigarLenChange | readLenChange))
02479     {
02480         status &= setTagsFromBuffer();
02481         // The tag buffer will not be valid once the other fields
02482         // are written, so set it to not valid.
02483         myIsTagsBufferValid = false;
02484     }
02485 
02486     // If the sequence or quality strings are still stored in the buffer, and
02487     // any of the previous fields have changed length, extract it from the
02488     // current buffer.
02489     if((myIsQualityBufferValid | myIsSequenceBufferValid) && 
02490        (readNameLenChange | cigarLenChange | readLenChange))
02491     {
02492         setSequenceAndQualityFromBuffer();
02493         // The quality and sequence buffers will not be valid once the other
02494         // fields are written, so set them to not valid.
02495         myIsQualityBufferValid = false;
02496         myIsSequenceBufferValid = false;
02497     }
02498 
02499     // If the cigar is still stored in the buffer, and any of the
02500     // previous fields have changed length, extract it from the current buffer.
02501     if((myIsCigarBufferValid) && 
02502        (readNameLenChange))
02503     {
02504         status &= parseCigarBinary();
02505         myIsCigarBufferValid = false;
02506     }
02507 
02508     // Set each value in the buffer if it is not already valid.
02509     if(!myIsReadNameBufferValid)
02510     {
02511         memcpy(&(myRecordPtr->myData), myReadName.c_str(), 
02512                newReadNameLen);
02513    
02514         // Set the new ReadNameLength.
02515         myRecordPtr->myReadNameLength = newReadNameLen;
02516         myIsReadNameBufferValid = true;
02517     }
02518 
02519     unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
02520         myRecordPtr->myReadNameLength;
02521    
02522     // Set the Cigar.  Need to reformat from the string to 
02523     unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
02524       
02525     if(!myIsCigarBufferValid)
02526     {
02527         // The cigar was already parsed when it was set, so just copy
02528         // data from the temporary buffer.
02529         myRecordPtr->myCigarLength = newCigarLen;
02530         memcpy(packedCigar, myCigarTempBuffer, 
02531                myRecordPtr->myCigarLength * sizeof(uint32_t));
02532       
02533         myIsCigarBufferValid = true;
02534     }
02535 
02536     unsigned char * packedSequence = readNameEnds + 
02537         myRecordPtr->myCigarLength * sizeof(int);
02538     unsigned char * packedQuality = packedSequence + bamSequenceLen;
02539    
02540     if(!myIsSequenceBufferValid || !myIsQualityBufferValid || 
02541        (myBufferSequenceTranslation != translation))
02542     {
02543         myRecordPtr->myReadLength = newReadLen;
02544         // Determine if the quality needs to be set and is just a * and needs to
02545         // be set to 0xFF.
02546         bool noQuality = false;
02547         if((myQuality.Length() == 1) && (myQuality[0] == '*'))
02548         {
02549             noQuality = true;
02550         }
02551       
02552         const char* translatedSeq = NULL;
02553         // If the sequence is not valid in the buffer or it is not
02554         // properly translated, get the properly translated sequence
02555         // that needs to be put into the buffer.
02556         if((!myIsSequenceBufferValid) ||
02557            (translation != myBufferSequenceTranslation))
02558         {
02559             translatedSeq = getSequence(translation);
02560         }
02561 
02562         for (int i = 0; i < myRecordPtr->myReadLength; i++) 
02563         {
02564             if((!myIsSequenceBufferValid) ||
02565                (translation != myBufferSequenceTranslation))
02566             {
02567                 // Sequence buffer is not valid, so set the sequence.
02568                 int seqVal = 0;
02569                 switch(translatedSeq[i])
02570                 {
02571                     case '=':
02572                         seqVal = 0;
02573                         break;
02574                     case 'A':
02575                     case 'a':
02576                         seqVal = 1;
02577                         break;
02578                     case 'C':
02579                     case 'c':
02580                         seqVal = 2;
02581                         break;
02582                     case 'G':
02583                     case 'g':
02584                         seqVal = 4;
02585                         break;
02586                     case 'T':
02587                     case 't':
02588                         seqVal = 8;
02589                         break;
02590                     case 'N':
02591                     case 'n':
02592                     case '.':
02593                         seqVal = 15;
02594                         break;
02595                     default:
02596                         myStatus.addError(SamStatus::FAIL_PARSE,
02597                                           "Unknown Sequence character found.");
02598                         status = false;
02599                         break;
02600                 };
02601             
02602                 if(i & 1)
02603                 {
02604                     // Odd number i's go in the lower 4 bits, so OR in the
02605                     // lower bits
02606                     packedSequence[i/2] |= seqVal;
02607                 }
02608                 else
02609                 {
02610                     // Even i's go in the upper 4 bits and are always set first.
02611                     packedSequence[i/2] = seqVal << 4;
02612                 }
02613             }
02614 
02615             if(!myIsQualityBufferValid)
02616             {
02617                 // Set the quality.
02618                 if((noQuality) || (myQuality.Length() <= i))
02619                 {
02620                     // No quality or the quality is smaller than the sequence,
02621                     // so set it to 0xFF
02622                     packedQuality[i] = 0xFF;
02623                 }
02624                 else
02625                 {
02626                     // Copy the quality string.
02627                     packedQuality[i] = myQuality[i] - 33;
02628                 }
02629             }
02630         }
02631         myPackedSequence = (unsigned char *)myRecordPtr->myData + 
02632             myRecordPtr->myReadNameLength + 
02633             myRecordPtr->myCigarLength * sizeof(int);
02634         myPackedQuality = myPackedSequence + 
02635             (myRecordPtr->myReadLength + 1) / 2;
02636         myIsSequenceBufferValid = true;
02637         myIsQualityBufferValid = true;
02638         myBufferSequenceTranslation = translation;
02639     }
02640 
02641     if(!myIsTagsBufferValid)
02642     {
02643         status &= setTagsInBuffer();
02644     }
02645 
02646     // Set the lengths in the buffer.
02647     myRecordPtr->myReadNameLength = newReadNameLen;
02648     myRecordPtr->myCigarLength = newCigarLen;
02649     myRecordPtr->myReadLength = newReadLen;
02650 
02651     // Set the buffer block size to the size of the buffer minus the
02652     // first field.
02653     myRecordPtr->myBlockSize = newBufferSize - sizeof(int32_t);
02654 
02655     if(status)
02656     {
02657         myIsBufferSynced = true;
02658     }
02659 
02660     return(status);
02661 }
02662 
02663 
02664 // Sets the Sequence and Quality strings from the buffer.
02665 // They are done together in one method because they require the same
02666 // loop, so might as well be done at the same time.
02667 void SamRecord::setSequenceAndQualityFromBuffer()
02668 {
02669     // NOTE: If the sequence buffer is not valid, do not set the sequence
02670     // string from the buffer.
02671     // NOTE: If the quality buffer is not valid, do not set the quality string
02672     // from the buffer.
02673 
02674     // Extract the sequence if the buffer is valid and the string's length is 0.
02675     bool extractSequence = false;
02676     if(myIsSequenceBufferValid && (mySequence.Length() == 0))
02677     {
02678         extractSequence = true;
02679     }
02680 
02681     // Extract the quality if the buffer is valid and the string's length is 0.
02682     bool extractQuality = false;
02683     if(myIsQualityBufferValid && (myQuality.Length() == 0))
02684     {
02685         extractQuality = true;
02686     }
02687 
02688     // If neither the quality nor the sequence need to be extracted,
02689     // just return.
02690     if(!extractSequence && !extractQuality)
02691     {
02692         return;
02693     }
02694 
02695     // Set the sequence and quality strings..
02696     if(extractSequence)
02697     {
02698         mySequence.SetLength(myRecordPtr->myReadLength);
02699     }
02700     if(extractQuality)
02701     {
02702         myQuality.SetLength(myRecordPtr->myReadLength);
02703     }
02704    
02705     const char * asciiBases = "=AC.G...T......N";
02706 
02707     // Flag to see if the quality is specified - the quality contains a value
02708     // other than 0xFF.  If all values are 0xFF, then there is no quality.
02709     bool qualitySpecified = false;
02710 
02711     for (int i = 0; i < myRecordPtr->myReadLength; i++)
02712     {
02713         if(extractSequence)
02714         {
02715             mySequence[i] = i & 1 ?
02716                 asciiBases[myPackedSequence[i / 2] & 0xF] :
02717                 asciiBases[myPackedSequence[i / 2] >> 4];
02718         }
02719 
02720         if(extractQuality)
02721         {
02722             if(myPackedQuality[i] != 0xFF)
02723             {
02724                 // Quality is specified, so mark the flag.
02725                 qualitySpecified = true;
02726             }
02727 
02728             myQuality[i] = myPackedQuality[i] + 33;
02729         }
02730     }
02731 
02732     // If the read length is 0, then set the sequence and quality to '*'
02733     if(myRecordPtr->myReadLength == 0)
02734     {
02735         if(extractSequence)
02736         {
02737             mySequence = "*";
02738         }
02739         if(extractQuality)
02740         {
02741             myQuality = "*";
02742         }
02743     }
02744     else if(extractQuality && !qualitySpecified)
02745     {
02746         // No quality was specified, so set it to "*"
02747         myQuality = "*";
02748     }
02749 }
02750 
02751 
02752 // Parse the cigar to calculate the alignment/unclipped end.
02753 bool SamRecord::parseCigar()
02754 {
02755     // Determine if the cigar string or cigar binary needs to be parsed.
02756     if(myCigar.Length() == 0)
02757     {
02758         // The cigar string is not yet set, so parse the binary.
02759         return(parseCigarBinary());
02760     }
02761     return(parseCigarString());
02762 }
02763 
02764 // Parse the cigar to calculate the alignment/unclipped end.
02765 bool SamRecord::parseCigarBinary()
02766 {
02767     // Only need to parse if the string is not already set.
02768     // The length of the cigar string is set to zero when the 
02769     // record is read from a file into the buffer.
02770     if(myCigar.Length() != 0)
02771     {
02772         // Already parsed.
02773         return(true);
02774     }
02775 
02776     unsigned char * readNameEnds = 
02777         (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
02778    
02779     unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
02780 
02781     myCigarRoller.Set(packedCigar, myRecordPtr->myCigarLength);
02782     
02783     myCigarRoller.getCigarString(myCigar);
02784 
02785     myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
02786 
02787     myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
02788     myUnclippedEndOffset = myCigarRoller.getNumEndClips();
02789 
02790     // if the cigar length is 0, then set the cigar string to "*"
02791     if(myRecordPtr->myCigarLength == 0)
02792     {
02793         myCigar = "*";
02794         return(true);
02795     }
02796 
02797     // Copy the cigar into a temporary buffer.
02798     int newBufferSize = myRecordPtr->myCigarLength * sizeof(uint32_t);
02799     if(newBufferSize > myCigarTempBufferAllocatedSize)
02800     {
02801         uint32_t* tempBufferPtr = 
02802             (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
02803         if(tempBufferPtr == NULL)
02804         {
02805             // Failed to allocate memory.
02806             // Do not parse, just return.
02807             fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02808             myStatus.addError(SamStatus::FAIL_MEM,
02809                               "Failed to Allocate Memory.");
02810             return(false);
02811         }
02812         myCigarTempBuffer = tempBufferPtr;
02813         myCigarTempBufferAllocatedSize = newBufferSize;
02814     }
02815 
02816     memcpy(myCigarTempBuffer, packedCigar, 
02817            myRecordPtr->myCigarLength * sizeof(uint32_t));
02818 
02819     // Set the length of the temp buffer.
02820     myCigarTempBufferLength = myRecordPtr->myCigarLength;
02821 
02822     return(true);
02823 }
02824 
02825 // Parse the cigar string to calculate the cigar length and alignment end.
02826 bool SamRecord::parseCigarString()
02827 {
02828     myCigarTempBufferLength = 0;
02829     if(myCigar == "*")
02830     {
02831         // Cigar is empty, so initialize the variables.
02832         myAlignmentLength = 0;
02833         myUnclippedStartOffset = 0;
02834         myUnclippedEndOffset = 0;
02835         myCigarRoller.clear();
02836         return(true);
02837     }
02838 
02839     myCigarRoller.Set(myCigar);
02840     
02841     myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
02842     
02843     myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
02844     myUnclippedEndOffset = myCigarRoller.getNumEndClips();
02845 
02846     // Check to see if the Temporary Cigar Buffer is large enough to contain
02847     // this cigar.  If we make it the size of the length of the cigar string,
02848     // it will be more than large enough.
02849     int newBufferSize = myCigar.Length() * sizeof(uint32_t);
02850     if(newBufferSize > myCigarTempBufferAllocatedSize)
02851     {
02852         uint32_t* tempBufferPtr = 
02853             (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
02854         if(tempBufferPtr == NULL)
02855         {
02856             // Failed to allocate memory.
02857             // Do not parse, just return.
02858             fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02859             myStatus.addError(SamStatus::FAIL_MEM,
02860                               "Failed to Allocate Memory.");
02861             return(false);
02862         }
02863         myCigarTempBuffer = tempBufferPtr;
02864         myCigarTempBufferAllocatedSize = newBufferSize;
02865     }
02866 
02867     // Track if there were any errors.
02868     bool status = true;
02869 
02870     // Track the index into the cigar string that is being parsed.
02871     char *cigarOp;
02872     const char* cigarEntryStart = myCigar.c_str();
02873     int opLen = 0;
02874     int op = 0;
02875 
02876     unsigned int * packedCigar = myCigarTempBuffer;
02877     // TODO - maybe one day make a cigar list... or maybe make a 
02878     // reference cigar string for ease of lookup....
02879     const char* endCigarString = cigarEntryStart + myCigar.Length();
02880     while(cigarEntryStart < endCigarString)
02881     {
02882         bool validCigarEntry = true;
02883         // Get the opLen from the string.  cigarOp will then point to 
02884         // the operation.
02885         opLen = strtol(cigarEntryStart, &cigarOp, 10);
02886         // Switch on the type of operation.
02887         switch(*cigarOp)
02888         {
02889             case('M'):
02890                 op = 0;
02891                 break;
02892             case('I'):
02893                 // Insert into the reference position, so do not increment the
02894                 // reference end position.
02895                 op = 1;
02896                 break;
02897             case('D'):
02898                 op = 2;
02899                 break;
02900             case('N'):
02901                 op = 3;
02902                 break;
02903             case('S'):
02904                 op = 4;
02905                 break;
02906             case('H'):
02907                 op = 5;
02908                 break;
02909             case('P'):
02910                 op = 6;
02911                 break;
02912             default:
02913                 fprintf(stderr, "ERROR parsing cigar\n");
02914                 validCigarEntry = false;
02915                 status = false;
02916                 myStatus.addError(SamStatus::FAIL_PARSE,
02917                                   "Unknown operation found when parsing the Cigar.");
02918                 break;
02919         };
02920         if(validCigarEntry)
02921         {
02922             // Increment the cigar length.
02923             ++myCigarTempBufferLength;
02924             *packedCigar = (opLen << 4) | op;
02925             packedCigar++;
02926         }
02927         // The next Entry starts at one past the cigar op, so set the start.
02928         cigarEntryStart = ++cigarOp;
02929     }
02930 
02931     // Check clipLength to adjust the end position.
02932     return(status);
02933 }
02934 
02935 
02936 bool SamRecord::setTagsFromBuffer()
02937 {
02938     // If the tags do not need to be set from the buffer, return true.
02939     if(myNeedToSetTagsFromBuffer == false)
02940     {
02941         // Already been set from the buffer.
02942         return(true);
02943     }
02944 
02945     // Mark false, as they are being set now.
02946     myNeedToSetTagsFromBuffer = false;
02947 
02948     unsigned char * extraPtr = myPackedQuality + myRecordPtr->myReadLength;
02949 
02950     // Default to success, will be changed to false on failure.
02951     bool status = true;
02952 
02953     // Clear any previously set tags.
02954     clearTags();
02955     while (myRecordPtr->myBlockSize + 4 - 
02956            (extraPtr - (unsigned char *)myRecordPtr) > 0)
02957     {
02958         int key = 0;
02959         int value = 0;
02960         void * content = extraPtr + 3;
02961         int tagBufferSize = 0;
02962 
02963         key = MAKEKEY(extraPtr[0], extraPtr[1], extraPtr[2]);
02964       
02965         switch (extraPtr[2])
02966         {
02967             case 'A' :
02968                 value = integers.Length();
02969                 integers.Push(* (char *) content);
02970                 intType.push_back(extraPtr[2]);
02971                 extraPtr += 4;
02972                 tagBufferSize += 4;
02973                 break;
02974             case 'c' :
02975                 value = integers.Length();
02976                 integers.Push(* (char *) content);
02977                 intType.push_back(extraPtr[2]);
02978                 extraPtr += 4;
02979                 tagBufferSize += 4;
02980                 break;
02981             case 'C' :
02982                 value = integers.Length();
02983                 integers.Push(* (unsigned char *) content);
02984                 intType.push_back(extraPtr[2]);
02985                 extraPtr += 4;
02986                 tagBufferSize += 4;
02987                 break;
02988             case 's' :
02989                 value = integers.Length();
02990                 integers.Push(* (short *) content);
02991                 intType.push_back(extraPtr[2]);
02992                 extraPtr += 5;
02993                 tagBufferSize += 5;
02994                 break;
02995             case 'S' :
02996                 value = integers.Length();
02997                 integers.Push(* (unsigned short *) content);
02998                 intType.push_back(extraPtr[2]);
02999                 extraPtr += 5;
03000                 tagBufferSize += 5;
03001                 break;
03002             case 'i' :
03003                 value = integers.Length();
03004                 integers.Push(* (int *) content);
03005                 intType.push_back(extraPtr[2]);
03006                 extraPtr += 7;
03007                 tagBufferSize += 7;
03008                 break;
03009             case 'I' :
03010                 value = integers.Length();
03011                 integers.Push((int) * (unsigned int *) content);
03012                 intType.push_back(extraPtr[2]);
03013                 extraPtr += 7;
03014                 tagBufferSize += 7;
03015                 break;
03016             case 'Z' :
03017             case 'B' :
03018                 value = strings.Length();
03019                 strings.Push((const char *) content);
03020                 extraPtr += 4 + strings.Last().Length();
03021                 tagBufferSize += 4 + strings.Last().Length();
03022                 break;
03023             case 'f' :
03024                 value = doubles.Length();
03025                 doubles.Push(* (float *) content);
03026                 fprintf(stderr, "\n\nFLOAT!!!\n\n");
03027                 extraPtr += 7;
03028                 tagBufferSize += 7;
03029                 break;
03030             default :
03031                 fprintf(stderr, 
03032                         "parsing BAM - Unknown custom field of type %c%c:%c\n",
03033                         extraPtr[0], extraPtr[1], extraPtr[2]);
03034                 // Failed on read.
03035                 // Increment extraPtr just by the size of the 3 known fields
03036                 extraPtr += 3;
03037                 myStatus.addError(SamStatus::FAIL_PARSE,
03038                                   "Unknown tag type.");
03039                 status = false;
03040         }
03041 
03042         // Only add the tag if it has so far been successfully processed.
03043         if(status)
03044         {
03045             extras.Add(key, value);
03046             myTagBufferSize += tagBufferSize;
03047         }
03048     }
03049     return(status);
03050 }
03051 
03052 
03053 bool SamRecord::setTagsInBuffer()
03054 {
03055     // The buffer size extends from the start of the record to data
03056     // plus the length of the variable fields,
03057     // Multiply the cigar length by 4 since it is the number of uint32_t fields.
03058     int bamSequenceLength = (myRecordPtr->myReadLength+1)/2;
03059     int newBufferSize = ((unsigned char*)(&(myRecordPtr->myData)) - 
03060                          (unsigned char*)myRecordPtr) +  
03061         myRecordPtr->myReadNameLength + ((myRecordPtr->myCigarLength)*4) +
03062         myRecordPtr->myReadLength + bamSequenceLength + myTagBufferSize;
03063 
03064     // Make sure the buffer is big enough.
03065     if(!allocateRecordStructure(newBufferSize))
03066     {
03067         // Failed to allocate space.
03068         return(false);
03069     }
03070 
03071     char * extraPtr = (char*)myPackedQuality + myRecordPtr->myReadLength;
03072 
03073     bool status = true;
03074 
03075     // Set the tags in the buffer.
03076     if (extras.Entries())
03077     {
03078         for (int i = 0; i < extras.Capacity(); i++)
03079         {
03080             if (extras.SlotInUse(i))
03081             {
03082                 int key = extras.GetKey(i);
03083                 getTag(key, extraPtr);
03084                 extraPtr += 2;
03085                 char vtype;
03086                 getTypeFromKey(key, vtype);
03087  
03088                 if(vtype == 'i')
03089                 {
03090                     vtype = getIntegerType(i);
03091                 }
03092 
03093                 extraPtr[0] = vtype;
03094 
03095                 // increment the pointer to where the value is.
03096                 extraPtr += 1;
03097 
03098                 switch (vtype)
03099                 {
03100                     case 'A' :
03101                         *(char*)extraPtr = (char)getInteger(i);
03102                         // sprintf(extraPtr, "%d", getInteger(i));
03103                         extraPtr += 1;
03104                         break;
03105                     case 'c' :
03106                         *(int8_t*)extraPtr = (int8_t)getInteger(i);
03107                         // sprintf(extraPtr, "%.4d", getInteger(i));
03108                         extraPtr += 1;
03109                         break;
03110                     case 'C' :
03111                         *(uint8_t*)extraPtr = (uint8_t)getInteger(i);
03112                         // sprintf(extraPtr, "%.4d", getInteger(i));
03113                         extraPtr += 1;
03114                         break;
03115                     case 's' :
03116                         *(int16_t*)extraPtr = (int16_t)getInteger(i);
03117                         // sprintf(extraPtr, "%.4d", getInteger(i));
03118                         extraPtr += 2;
03119                         break;
03120                     case 'S' :
03121                         *(uint16_t*)extraPtr = (uint16_t)getInteger(i);
03122                         // sprintf(extraPtr, "%.4d", getInteger(i));
03123                         extraPtr += 2;
03124                         break;
03125                     case 'i' :
03126                         *(int32_t*)extraPtr = (int32_t)getInteger(i);
03127                         // sprintf(extraPtr, "%.4d", getInteger(i));
03128                         extraPtr += 4;
03129                         break;
03130                     case 'I' :
03131                         *(uint32_t*)extraPtr = (uint32_t)getInteger(i);
03132                         // sprintf(extraPtr, "%.4d", getInteger(i));
03133                         extraPtr += 4;
03134                         break;
03135                     case 'Z' :
03136                     case 'B' :
03137                         sprintf(extraPtr, "%s", getString(i).c_str());
03138                         extraPtr += getString(i).Length() + 1;
03139                         break;
03140                     case 'f' :
03141                         // TODO, figure out if %f is good enough.
03142                         sprintf(extraPtr, "%f", getDouble(i));
03143                         extraPtr += 4;
03144                         break;
03145                     default :
03146                         myStatus.addError(SamStatus::FAIL_PARSE,
03147                                           "Unknown tag type.");
03148                         status = false;
03149                         break;
03150                 }
03151             }
03152         }
03153     }
03154 
03155     // Validate that the extra pointer is at the end of the allocated buffer.
03156     // If not then there was a problem.
03157     if(extraPtr != (char*)myRecordPtr + newBufferSize)
03158     {
03159         fprintf(stderr, "ERROR updating the buffer.  Incorrect size.");
03160         myStatus.addError(SamStatus::FAIL_PARSE,
03161                           "ERROR updating the buffer.  Incorrect size.");
03162         status = false;
03163     }
03164 
03165 
03166     // The buffer tags are now in sync.
03167     myNeedToSetTagsInBuffer = false;
03168     myIsTagsBufferValid = true;
03169     return(status);
03170 }
03171 
03172 
03173 // Reset the variables for a newly set buffer.  The buffer must be set first
03174 // since this looks up the reference ids in the buffer to set the reference
03175 // names.
03176 void SamRecord::setVariablesForNewBuffer(SamFileHeader& header)
03177 {
03178     // Lookup the reference name & mate reference name associated with this
03179     // record.
03180     myReferenceName = 
03181         header.getReferenceLabel(myRecordPtr->myReferenceID);
03182     myMateReferenceName = 
03183         header.getReferenceLabel(myRecordPtr->myMateReferenceID);      
03184 
03185     // Clear the SAM Strings that are now not in-sync with the buffer.
03186     myReadName.SetLength(0);
03187     myCigar.SetLength(0);
03188     mySequence.SetLength(0);
03189     mySeqWithEq.clear();
03190     mySeqWithoutEq.clear();
03191     myQuality.SetLength(0);
03192     myNeedToSetTagsFromBuffer = true;
03193     myNeedToSetTagsInBuffer = false;
03194 
03195     //Set that the buffer is valid.
03196     myIsBufferSynced = true;
03197     // Set that the variable length buffer fields are valid.
03198     myIsReadNameBufferValid = true;
03199     myIsCigarBufferValid = true;
03200     myPackedSequence = (unsigned char *)myRecordPtr->myData + 
03201         myRecordPtr->myReadNameLength + myRecordPtr->myCigarLength * sizeof(int);
03202     myIsSequenceBufferValid = true;
03203     myBufferSequenceTranslation = NONE;
03204     myPackedQuality = myPackedSequence + 
03205         (myRecordPtr->myReadLength + 1) / 2;
03206     myIsQualityBufferValid = true;
03207     myIsTagsBufferValid = true;
03208     myIsBinValid = true;
03209 }
03210 
03211 
03212 // Extract the vtype from the key.
03213 void SamRecord::getTypeFromKey(int key, char& type) const
03214 {
03215     // Extract the type from the key.
03216     type = (key >> 16) & 0xFF;
03217 }
03218 
03219 
03220 // Extract the tag from the key.
03221 void SamRecord::getTag(int key, char* tag) const
03222 {
03223     // Extract the tag from the key.
03224     tag[0] = key & 0xFF;
03225     tag[1] = (key >> 8) & 0xFF;
03226     tag[2] = 0;
03227 }
03228 
03229 
03230 // Index is the index into the strings array.
03231 String & SamRecord::getString(int index)
03232 {
03233     int value = extras[index];
03234 
03235     return strings[value];
03236 }
03237 
03238 int & SamRecord::getInteger(int offset)
03239 {
03240     int value = extras[offset];
03241 
03242     return integers[value];
03243 }
03244 
03245 char & SamRecord::getIntegerType(int offset)
03246 {
03247     int value = extras[offset];
03248 
03249     return intType[value];
03250 }
03251 
03252 double & SamRecord::getDouble(int offset)
03253 {
03254     int value = extras[offset];
03255 
03256     return doubles[value];
03257 }
03258 
03259 
Generated on Mon Feb 11 13:45:18 2013 for libStatGen Software by  doxygen 1.6.3