SamRecord.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include <stdlib.h>
00019 #include <limits>
00020 #include <stdexcept>
00021 
00022 #include "bam.h"
00023 
00024 #include "SamRecord.h"
00025 #include "SamValidation.h"
00026 
00027 #include "BaseUtilities.h"
00028 
00029 
00030 const char* SamRecord::DEFAULT_READ_NAME = "UNKNOWN";
00031 const char* SamRecord::FIELD_ABSENT_STRING = "=";
00032 
00033 
00034 SamRecord::SamRecord()
00035     : myStatus()
00036 {
00037     int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00038 
00039     myRecordPtr = 
00040         (bamRecordStruct *) malloc(defaultAllocSize);
00041 
00042     myCigarTempBuffer = NULL;
00043     myCigarTempBufferAllocatedSize = 0;
00044 
00045     allocatedSize = defaultAllocSize;
00046 
00047     resetRecord();
00048 }
00049 
00050 
00051 SamRecord::SamRecord(ErrorHandler::HandlingType errorHandlingType)
00052     : myStatus(errorHandlingType)
00053 {
00054     int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00055 
00056     myRecordPtr = 
00057         (bamRecordStruct *) malloc(defaultAllocSize);
00058 
00059     myCigarTempBuffer = NULL;
00060     myCigarTempBufferAllocatedSize = 0;
00061 
00062     allocatedSize = defaultAllocSize;
00063 
00064     resetRecord();
00065 }
00066 
00067 
00068 SamRecord::~SamRecord()
00069 {
00070     resetRecord();
00071 
00072     if(myRecordPtr != NULL)
00073     {
00074         free(myRecordPtr);
00075         myRecordPtr = NULL;
00076     }
00077     if(myCigarTempBuffer != NULL)
00078     {
00079         free(myCigarTempBuffer);
00080         myCigarTempBuffer = NULL;
00081         myCigarTempBufferAllocatedSize = 0;
00082     }
00083 }
00084 
00085 
00086 // Resets the fields of the record to a default value.
00087 void SamRecord::resetRecord()
00088 {
00089     myIsBufferSynced = true;
00090 
00091     myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE;
00092     myRecordPtr->myReferenceID = -1;
00093     myRecordPtr->myPosition = -1;
00094     myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00095     myRecordPtr->myMapQuality = 0;
00096     myRecordPtr->myBin = DEFAULT_BIN;
00097     myRecordPtr->myCigarLength = 0;
00098     myRecordPtr->myFlag = 0;
00099     myRecordPtr->myReadLength = 0;
00100     myRecordPtr->myMateReferenceID = -1;
00101     myRecordPtr->myMatePosition = -1;
00102     myRecordPtr->myInsertSize = 0;
00103    
00104     // Set the sam values for the variable length fields.
00105     myReadName = DEFAULT_READ_NAME;
00106     myReferenceName = "*";
00107     myMateReferenceName = "*";
00108     myCigar = "*";
00109     mySequence = "*";
00110     myQuality = "*";
00111     myNeedToSetTagsFromBuffer = false;
00112     myNeedToSetTagsInBuffer = false;
00113 
00114     // Initialize the calculated alignment info to the uncalculated value.
00115     myAlignmentLength = -1;
00116     myUnclippedStartOffset = -1;
00117     myUnclippedEndOffset = -1;
00118 
00119     clearTags();
00120 
00121     // Set the bam values for the variable length fields.
00122     // Only the read name needs to be set, the others are a length of 0.
00123     // Set the read name.  The min size of myRecordPtr includes the size for
00124     // the default read name.
00125     memcpy(&(myRecordPtr->myData), myReadName.c_str(), 
00126            myRecordPtr->myReadNameLength);
00127 
00128     // Set that the variable length buffer fields are valid.
00129     myIsReadNameBufferValid = true;
00130     myIsCigarBufferValid = true;
00131     myIsSequenceBufferValid = true;
00132     myIsQualityBufferValid = true;
00133     myIsTagsBufferValid = true;
00134     myIsBinValid = true;
00135 
00136     myCigarTempBufferLength = -1;
00137 
00138     myStatus = SamStatus::SUCCESS;
00139 
00140     NOT_FOUND_TAG_STRING = "";
00141     NOT_FOUND_TAG_INT = -1;
00142     NOT_FOUND_TAG_DOUBLE = -1;
00143 }
00144 
00145 
00146 // Reset the tag iterator to the beginning of the tags.
00147 void SamRecord::resetTagIter()
00148 {
00149     myLastTagIndex = -1;
00150 }
00151 
00152 
00153 // Returns whether or not the record is valid.
00154 // Header is needed to perform some validation against it.
00155 bool SamRecord::isValid(SamFileHeader& header)
00156 {
00157     myStatus = SamStatus::SUCCESS;
00158     SamValidationErrors invalidSamErrors;
00159     if(!SamValidator::isValid(header, *this, invalidSamErrors))
00160     {
00161         // The record is not valid.
00162         std::string errorMessage = "";
00163         invalidSamErrors.getErrorString(errorMessage);
00164         myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str());
00165         return(false);
00166     }
00167     // The record is valid.
00168     return(true);
00169 }
00170 
00171 
00172 bool SamRecord::setReadName(const char* readName) 
00173 {
00174     myReadName = readName;
00175     myIsBufferSynced = false;
00176     myIsReadNameBufferValid = false;
00177     myStatus = SamStatus::SUCCESS;
00178 
00179     // The read name must at least have some length, otherwise this is a parsing
00180     // error.
00181     if(myReadName.Length() == 0)
00182     {
00183         // Invalid - reset ReadName return false.
00184         myReadName = DEFAULT_READ_NAME;
00185         myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00186         myStatus.setStatus(SamStatus::INVALID, "0 length Query Name.");
00187         return(false);
00188     }
00189 
00190     return true;
00191 }
00192 
00193 
00194 bool SamRecord::setFlag(uint16_t flag)
00195 {
00196     myStatus = SamStatus::SUCCESS;
00197     myRecordPtr->myFlag = flag;
00198     return true;
00199 }
00200 
00201 
00202 bool SamRecord::setReferenceName(SamFileHeader& header,
00203                                  const char* referenceName) 
00204 {
00205     myStatus = SamStatus::SUCCESS;
00206 
00207     myReferenceName = referenceName;
00208     myRecordPtr->myReferenceID = header.getReferenceID(referenceName);
00209 
00210     return true;
00211 }
00212 
00213 
00214 bool SamRecord::set1BasedPosition(int32_t position) 
00215 {
00216     return(set0BasedPosition(position - 1));
00217 }
00218 
00219 
00220 bool SamRecord::set0BasedPosition(int32_t position)
00221 {
00222     myStatus = SamStatus::SUCCESS;
00223     myRecordPtr->myPosition = position;
00224     myIsBinValid = false;
00225     return true;
00226 }
00227 
00228 
00229 bool SamRecord::setMapQuality(uint8_t mapQuality)
00230 {
00231     myStatus = SamStatus::SUCCESS;
00232     myRecordPtr->myMapQuality = mapQuality;
00233     return true;
00234 }
00235 
00236 
00237 bool SamRecord::setCigar(const char* cigar)
00238 {
00239     myStatus = SamStatus::SUCCESS;
00240     myCigar = cigar;
00241  
00242     myIsBufferSynced = false;
00243     myIsCigarBufferValid = false;
00244     myCigarTempBufferLength = -1;
00245     myIsBinValid = false;
00246 
00247     // Initialize the calculated alignment info to the uncalculated value.
00248     myAlignmentLength = -1;
00249     myUnclippedStartOffset = -1;
00250     myUnclippedEndOffset = -1;
00251 
00252     return true;
00253 }
00254 
00255 
00256 bool SamRecord::setCigar(const Cigar& cigar)
00257 {
00258     myStatus = SamStatus::SUCCESS;
00259     cigar.getCigarString(myCigar);
00260  
00261     myIsBufferSynced = false;
00262     myIsCigarBufferValid = false;
00263     myCigarTempBufferLength = -1;
00264     myIsBinValid = false;
00265 
00266     // Initialize the calculated alignment info to the uncalculated value.
00267     myAlignmentLength = -1;
00268     myUnclippedStartOffset = -1;
00269     myUnclippedEndOffset = -1;
00270 
00271     return true;
00272 }
00273 
00274 
00275 bool SamRecord::setMateReferenceName(SamFileHeader& header,
00276                                      const char* mateReferenceName) 
00277 {
00278     myStatus = SamStatus::SUCCESS;
00279     // Set the mate reference, if it is "=", set it to be equal
00280     // to myReferenceName.  This assumes that myReferenceName has already
00281     // been called.
00282     if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0)
00283     {
00284         myMateReferenceName = myReferenceName;
00285     }
00286     else
00287     {
00288         myMateReferenceName = mateReferenceName;
00289     }
00290 
00291     // Set the Mate Reference ID.
00292     myRecordPtr->myMateReferenceID = 
00293         header.getReferenceID(myMateReferenceName);
00294 
00295     return true;
00296 }
00297 
00298 
00299 bool SamRecord::set1BasedMatePosition(int32_t matePosition)
00300 {
00301     return(set0BasedMatePosition(matePosition - 1));
00302 }
00303 
00304 
00305 bool SamRecord::set0BasedMatePosition(int32_t matePosition)
00306 {
00307     myStatus = SamStatus::SUCCESS;
00308     myRecordPtr->myMatePosition = matePosition;
00309     return true;
00310 }
00311 
00312 
00313 bool SamRecord::setInsertSize(int32_t insertSize)
00314 {
00315     myStatus = SamStatus::SUCCESS;
00316     myRecordPtr->myInsertSize = insertSize;
00317     return true;
00318 }
00319 
00320 
00321 bool SamRecord::setSequence(const char* seq) 
00322 {
00323     myStatus = SamStatus::SUCCESS;
00324     mySequence = seq;
00325    
00326     myIsBufferSynced = false;
00327     myIsSequenceBufferValid = false;
00328     return true;
00329 }
00330 
00331 
00332 bool SamRecord::setQuality(const char* quality) 
00333 {
00334     myStatus = SamStatus::SUCCESS;
00335     myQuality = quality;
00336     myIsBufferSynced = false;
00337     myIsQualityBufferValid = false;
00338     return true;
00339 }
00340 
00341 
00342 // Read the BAM record from a file.
00343 SamStatus::Status SamRecord::setBufferFromFile(IFILE filePtr, 
00344                                                SamFileHeader& header)
00345 {
00346     myStatus = SamStatus::SUCCESS;
00347     if((filePtr == NULL) || (filePtr->isOpen() == false))
00348     {
00349         // File is not open, return failure.
00350         myStatus.setStatus(SamStatus::FAIL_ORDER, 
00351                            "Can't read from an unopened file.");
00352         return(SamStatus::FAIL_ORDER);
00353     }
00354 
00355     // Clear the record.
00356     resetRecord();
00357 
00358     // read the record size.
00359     int numBytes = 
00360         ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t));
00361 
00362     if(ifeof(filePtr))
00363     {
00364         if(numBytes == 0)
00365         {
00366             // End of file, nothing was read, no more records.
00367             myStatus.setStatus(SamStatus::NO_MORE_RECS,
00368                                "No more records left to read.");
00369             return(SamStatus::NO_MORE_RECS);
00370         }
00371         else
00372         {
00373             // Error: end of the file reached prior to reading the rest of the
00374             // record.
00375             myStatus.setStatus(SamStatus::FAIL_PARSE, 
00376                                "EOF reached in the middle of a record.");
00377             return(SamStatus::FAIL_PARSE);
00378         }
00379     }
00380 
00381     // allocate space for the record size.
00382     if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t)))
00383     {
00384         // Failed to allocate space.
00385         // Status is set by allocateRecordStructure.
00386         return(SamStatus::FAIL_MEM);
00387     }
00388 
00389     // Read the rest of the alignment block, starting at the reference id.
00390     if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize)
00391        != (unsigned int)myRecordPtr->myBlockSize)
00392     {
00393         // Error reading the record.  Reset it and return failure.
00394         resetRecord();
00395         myStatus.setStatus(SamStatus::FAIL_IO,
00396                            "Failed to read the record");
00397         return(SamStatus::FAIL_IO);
00398     }
00399 
00400     setVariablesForNewBuffer(header);
00401 
00402     // Return the status of the record.
00403     return(SamStatus::SUCCESS);
00404 }
00405 
00406 // Set the BAM record from the passeed in buffer of the specified size.
00407 // Note: The size includes the block size.
00408 SamStatus::Status SamRecord::setBuffer(const char* fromBuffer,
00409                                        uint32_t fromBufferSize,
00410                                        SamFileHeader& header)
00411 {
00412     myStatus = SamStatus::SUCCESS;
00413     if((fromBuffer == NULL) || (fromBufferSize == 0))
00414     {
00415         // Buffer is empty.
00416         myStatus.setStatus(SamStatus::FAIL_PARSE,
00417                            "Cannot parse an empty file.");
00418         return(SamStatus::FAIL_PARSE);
00419     }
00420 
00421     // Clear the record.   
00422     resetRecord();
00423 
00424     // allocate space for the record size.
00425     if(!allocateRecordStructure(fromBufferSize))
00426     {
00427         // Failed to allocate space.
00428         return(SamStatus::FAIL_MEM);
00429     }
00430    
00431     memcpy(myRecordPtr, fromBuffer, fromBufferSize);
00432 
00433     setVariablesForNewBuffer(header);
00434 
00435     // Return the status of the record.
00436     return(SamStatus::SUCCESS);
00437 }
00438 
00439 
00440 // Add the specified tag to the record.
00441 // Returns true if the tag was successfully added, false otherwise.
00442 bool SamRecord::addTag(const char* tag, char vtype, const char* valuePtr)
00443 {
00444     myStatus = SamStatus::SUCCESS;
00445     bool status = true; // default to successful.
00446     int key = 0;
00447     int intVal = 0;
00448     int index = 0;
00449     char bamvtype;
00450 
00451     int tagBufferSize = 0;
00452 
00453     // First check to see if the tags need to be synced to the buffer.
00454     if(myNeedToSetTagsFromBuffer)
00455     {
00456         if(!setTagsFromBuffer())
00457         {
00458             // Failed to read tags from the buffer, so cannot add new ones.
00459             return(false);
00460         }
00461     }
00462 
00463     switch (vtype)
00464     {
00465         case 'A' :
00466             index = integers.Length();
00467             bamvtype = vtype;
00468             integers.Push((const int)*(valuePtr));
00469             tagBufferSize += 4;
00470             break;
00471         case 'i' :
00472             index = integers.Length();
00473             intVal = atoi((const char *)valuePtr);
00474             // Ints come in as int.  But it can be represented in fewer bits.
00475             // So determine a more specific type that is in line with the
00476             // types for BAM files.
00477             // First check to see if it is a negative.
00478             if(intVal < 0)
00479             {
00480                 // The int is negative, so it will need to use a signed type.
00481                 // See if it is greater than the min value for a char.
00482                 if(intVal > std::numeric_limits<char>::min())
00483                 {
00484                     // It can be stored in a signed char.
00485                     bamvtype = 'c';
00486                     tagBufferSize += 4;
00487                 }
00488                 else if(intVal > std::numeric_limits<short>::min())
00489                 {
00490                     // It fits in a signed short.
00491                     bamvtype = 's';
00492                     tagBufferSize += 5;
00493                 }
00494                 else
00495                 {
00496                     // Just store it as a signed int.
00497                     bamvtype = 'i';
00498                     tagBufferSize += 7;
00499                 }
00500             }
00501             else
00502             {
00503                 // It is positive, so an unsigned type can be used.
00504                 if(intVal < std::numeric_limits<unsigned char>::max())
00505                 {
00506                     // It is under the max of an unsigned char.
00507                     bamvtype = 'C';
00508                     tagBufferSize += 4;
00509                 }
00510                 else if(intVal < std::numeric_limits<unsigned short>::max())
00511                 {
00512                     // It is under the max of an unsigned short.
00513                     bamvtype = 'S';
00514                     tagBufferSize += 5;
00515                 }
00516                 else
00517                 {
00518                     // Just store it as an unsigned int.
00519                     bamvtype = 'I';
00520                     tagBufferSize += 7;
00521                 }
00522             }
00523             integers.Push(intVal);
00524             break;
00525         case 'Z' :
00526             index = strings.Length();
00527             bamvtype = vtype;
00528             strings.Push((const char *)valuePtr);
00529             tagBufferSize += 4 + strings.Last().Length();
00530             break;
00531         case 'f' :
00532             index = doubles.Length();
00533             bamvtype = vtype;
00534             doubles.Push(atof((const char *)valuePtr));
00535             tagBufferSize += 7;
00536             break;
00537         default :
00538             fprintf(stderr,
00539                     "samFile::ReadSAM() - Unknown custom field of type %c\n",
00540                     vtype);
00541             myStatus.setStatus(SamStatus::FAIL_PARSE, 
00542                                "Unknown custom field in a tag");
00543             status = false;
00544     }
00545 
00546     // Only add the tag if it has so far been successfully processed.
00547     if(status)
00548     {
00549         // The buffer tags are now out of sync.
00550         myNeedToSetTagsInBuffer = true;
00551         myIsTagsBufferValid = false;
00552         myIsBufferSynced = false;
00553 
00554         key = MAKEKEY(tag[0], tag[1], bamvtype);
00555         extras.Add(key, index);
00556         myTagBufferSize += tagBufferSize;
00557     }
00558     return(status);
00559 }
00560 
00561 
00562 // Get methods for record fields.
00563 const void* SamRecord::getRecordBuffer() 
00564 {
00565     myStatus = SamStatus::SUCCESS;
00566     bool status = true;
00567     if(myIsBufferSynced == false)
00568     {
00569         status &= fixBuffer();
00570     }
00571     // If the buffer is synced, check to see if the tags need to be synced.
00572     if(myNeedToSetTagsInBuffer)
00573     {
00574         status &= setTagsInBuffer();
00575     }
00576     if(!status)
00577     {
00578         return(NULL);
00579     }
00580     return (const void *)myRecordPtr;
00581 }
00582 
00583 
00584 // Get methods for record fields.
00585 SamStatus::Status SamRecord::writeRecordBuffer(IFILE filePtr)
00586 {
00587     myStatus = SamStatus::SUCCESS;
00588     if((filePtr == NULL) || (filePtr->isOpen() == false))
00589     {
00590         // File is not open, return failure.
00591         myStatus.setStatus(SamStatus::FAIL_ORDER,
00592                            "Can't write to an unopened file.");
00593         return(SamStatus::FAIL_ORDER);
00594     }
00595 
00596     if(myIsBufferSynced == false)
00597     {
00598         if(!fixBuffer())
00599         {
00600             return(myStatus.getStatus());
00601         }
00602     }
00603 
00604     // Write the record.
00605     unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t);
00606     unsigned int numBytesWritten = 
00607         ifwrite(filePtr, myRecordPtr, numBytesToWrite);
00608 
00609     // Return status based on if the correct number of bytes were written.
00610     if(numBytesToWrite == numBytesWritten)
00611     {
00612         return(SamStatus::SUCCESS);
00613     }
00614     // The correct number of bytes were not written.
00615     myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record.");
00616     return(SamStatus::FAIL_IO);
00617 }
00618 
00619 
00620 int32_t SamRecord::getBlockSize() 
00621 {
00622     myStatus = SamStatus::SUCCESS;
00623     // If the buffer isn't synced, sync the buffer to determine the
00624     // block size.
00625     if(myIsBufferSynced == false)
00626     {
00627         fixBuffer();
00628     }
00629     return myRecordPtr->myBlockSize;
00630 }
00631 
00632 
00633 // This method returns the reference name.
00634 const char* SamRecord::getReferenceName()
00635 {
00636     myStatus = SamStatus::SUCCESS;
00637     return myReferenceName.c_str();
00638 }
00639 
00640 
00641 int32_t SamRecord::getReferenceID()
00642 {
00643     myStatus = SamStatus::SUCCESS;
00644     return myRecordPtr->myReferenceID;
00645 }
00646 
00647 
00648 int32_t SamRecord::get1BasedPosition()
00649 {
00650     myStatus = SamStatus::SUCCESS;
00651     return (myRecordPtr->myPosition + 1);
00652 }
00653 
00654 
00655 int32_t SamRecord::get0BasedPosition()
00656 {
00657     myStatus = SamStatus::SUCCESS;
00658     return myRecordPtr->myPosition;
00659 }
00660 
00661 
00662 uint8_t SamRecord::getReadNameLength() 
00663 {
00664     myStatus = SamStatus::SUCCESS;
00665     // If the buffer is valid, return the size from there, otherwise get the 
00666     // size from the string length + 1 (ending null).
00667     if(myIsReadNameBufferValid)
00668     {
00669         return(myRecordPtr->myReadNameLength);
00670     }
00671    
00672     return(myReadName.Length() + 1);
00673 }
00674 
00675 
00676 uint8_t SamRecord::getMapQuality()
00677 {
00678     myStatus = SamStatus::SUCCESS;
00679     return myRecordPtr->myMapQuality;
00680 }
00681 
00682 
00683 uint16_t SamRecord::getBin()
00684 {
00685     myStatus = SamStatus::SUCCESS;
00686     if(!myIsBinValid)
00687     {
00688         // The bin that is set in the record is not valid, so
00689         // reset it.
00690         myRecordPtr->myBin = 
00691             bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());      
00692         myIsBinValid = true;
00693     }
00694     return(myRecordPtr->myBin);
00695 }
00696 
00697 
00698 uint16_t SamRecord::getCigarLength()
00699 {
00700     myStatus = SamStatus::SUCCESS;
00701     // If the cigar buffer is valid
00702     // then get the length from there.
00703     if(myIsCigarBufferValid)
00704     {
00705         return myRecordPtr->myCigarLength;      
00706     }
00707 
00708     if(myCigarTempBufferLength == -1)
00709     {
00710         // The cigar buffer is not valid and the cigar temp buffer is not set,
00711         // so parse the string.
00712         parseCigarString();
00713     }
00714    
00715     // The temp buffer is now set, so return the size.
00716     return(myCigarTempBufferLength);
00717 }
00718 
00719 
00720 uint16_t SamRecord::getFlag()
00721 {
00722     myStatus = SamStatus::SUCCESS;
00723     return myRecordPtr->myFlag;
00724 }
00725 
00726 
00727 int32_t SamRecord::getReadLength() 
00728 {
00729     myStatus = SamStatus::SUCCESS;
00730     if(myIsSequenceBufferValid == false)
00731     {
00732         // If the sequence is "*", then return 0.
00733         if((mySequence.Length() == 1) && (mySequence[0] == '*'))
00734         {
00735             return(0);
00736         }
00737         // Do not add 1 since it is not null terminated.
00738         return(mySequence.Length());
00739     }
00740     return(myRecordPtr->myReadLength);
00741 }
00742 
00743 
00744 // This method returns the mate reference name.  If it is equal to the
00745 // reference name, it still returns the reference name.
00746 const char* SamRecord::getMateReferenceName()
00747 {
00748     myStatus = SamStatus::SUCCESS;
00749     return myMateReferenceName.c_str();
00750 }
00751 
00752 
00753 // This method returns the mate reference name.  If it is equal to the
00754 // reference name, it returns "=", unless they are both "*" in which case
00755 // "*" is returned.
00756 const char* SamRecord::getMateReferenceNameOrEqual()
00757 {
00758     myStatus = SamStatus::SUCCESS;
00759     if(myMateReferenceName == "*")
00760     {
00761         return(myMateReferenceName);
00762     }
00763     if(myMateReferenceName == getReferenceName())
00764     {
00765         return(FIELD_ABSENT_STRING);
00766     }
00767     else
00768     {
00769         return(myMateReferenceName);
00770     }
00771 }
00772 
00773 
00774 int32_t SamRecord::getMateReferenceID()
00775 {
00776     myStatus = SamStatus::SUCCESS;
00777     return myRecordPtr->myMateReferenceID;
00778 }
00779 
00780 
00781 int32_t SamRecord::get1BasedMatePosition()
00782 {
00783     myStatus = SamStatus::SUCCESS;
00784     return (myRecordPtr->myMatePosition + 1);
00785 }
00786 
00787 
00788 int32_t SamRecord::get0BasedMatePosition()
00789 {
00790     myStatus = SamStatus::SUCCESS;
00791     return myRecordPtr->myMatePosition;
00792 }
00793 
00794 
00795 int32_t SamRecord::getInsertSize()
00796 {
00797     myStatus = SamStatus::SUCCESS;
00798     return myRecordPtr->myInsertSize;
00799 }
00800 
00801 
00802 // Returns the inclusive rightmost position of the clipped sequence.
00803 int32_t SamRecord::get0BasedAlignmentEnd()
00804 {
00805     myStatus = SamStatus::SUCCESS;
00806     if(myAlignmentLength == -1)
00807     {
00808         // Alignment end has not been set, so calculate it.
00809         parseCigar();
00810     }
00811     // If alignment length > 0, subtract 1 from it to get the end.
00812     if(myAlignmentLength == 0)
00813     {
00814         // Length is 0, just return the start position.
00815         return(myRecordPtr->myPosition);
00816     }
00817     return(myRecordPtr->myPosition + myAlignmentLength - 1);
00818 }
00819 
00820 
00821 // Returns the inclusive rightmost position of the clipped sequence.
00822 int32_t SamRecord::get1BasedAlignmentEnd()
00823 {
00824     return(get0BasedAlignmentEnd() + 1);
00825 }
00826 
00827    
00828 // Return the length of the alignment.
00829 int32_t SamRecord::getAlignmentLength()
00830 {
00831     myStatus = SamStatus::SUCCESS;
00832     if(myAlignmentLength == -1)
00833     {
00834         // Alignment end has not been set, so calculate it.
00835         parseCigar();
00836     }
00837     // Return the alignment length.
00838     return(myAlignmentLength);
00839 }
00840 
00841 // Returns the inclusive left-most position adjust for clipped bases.
00842 int32_t SamRecord::get0BasedUnclippedStart()
00843 {
00844     myStatus = SamStatus::SUCCESS;
00845     if(myUnclippedStartOffset == -1)
00846     {
00847         // Unclipped has not yet been calculated, so parse the cigar to get it
00848         parseCigar();
00849     }
00850     return(myRecordPtr->myPosition - myUnclippedStartOffset);
00851 }
00852 
00853 
00854 // Returns the inclusive left-most position adjust for clipped bases.
00855 int32_t SamRecord::get1BasedUnclippedStart()
00856 {
00857     return(get0BasedUnclippedStart() + 1);
00858 }
00859 
00860 
00861 // Returns the inclusive right-most position adjust for clipped bases.
00862 int32_t SamRecord::get0BasedUnclippedEnd()
00863 {
00864     // myUnclippedEndOffset will be set by get0BasedAlignmentEnd if the 
00865     // cigar has not yet been parsed, so no need to check it here.
00866     return(get0BasedAlignmentEnd() + myUnclippedEndOffset);
00867 }
00868 
00869 
00870 // Returns the inclusive right-most position adjust for clipped bases.
00871 int32_t SamRecord::get1BasedUnclippedEnd()
00872 {
00873     return(get0BasedUnclippedEnd() + 1);
00874 }
00875 
00876 
00877 // Get the read name.
00878 const char* SamRecord::getReadName()
00879 {
00880     myStatus = SamStatus::SUCCESS;
00881     if(myReadName.Length() == 0)
00882     {
00883         // 0 Length, means that it is in the buffer, but has not yet
00884         // been synced to the string, so do the sync.
00885         myReadName = (char*)&(myRecordPtr->myData);
00886     }
00887     return myReadName.c_str();
00888 }
00889 
00890 
00891 const char* SamRecord::getCigar()
00892 {
00893     myStatus = SamStatus::SUCCESS;
00894     if(myCigar.Length() == 0)
00895     {
00896         // 0 Length, means that it is in the buffer, but has not yet
00897         // been synced to the string, so do the sync.
00898         parseCigarBinary();
00899     }
00900     return myCigar.c_str();
00901 }
00902 
00903 
00904 const char* SamRecord::getSequence()
00905 {
00906     myStatus = SamStatus::SUCCESS;
00907     if(mySequence.Length() == 0)
00908     {
00909         // 0 Length, means that it is in the buffer, but has not yet
00910         // been synced to the string, so do the sync.
00911         setSequenceAndQualityFromBuffer();
00912     }
00913     return mySequence.c_str();
00914 }
00915 
00916 
00917 const char* SamRecord::getQuality() 
00918 {
00919     myStatus = SamStatus::SUCCESS;
00920     if(myQuality.Length() == 0)
00921     {
00922         // 0 Length, means that it is in the buffer, but has not yet
00923         // been synced to the string, so do the sync.
00924         setSequenceAndQualityFromBuffer();      
00925     }
00926     return myQuality.c_str();
00927 }
00928 
00929 
00930 char SamRecord::getSequence(int index)
00931 {
00932     static const char * asciiBases = "0AC.G...T......N";
00933 
00934     // Determine the read length.
00935     int32_t readLen = getReadLength();
00936 
00937     // If the read length is 0, this method should not be called.
00938     if(readLen == 0)
00939     {
00940         String exceptionString = "SamRecord::getSequence(";
00941         exceptionString += index;
00942         exceptionString += ") is not allowed since sequence = '*'";
00943         throw std::runtime_error(exceptionString.c_str());
00944     }
00945     else if((index < 0) || (index >= readLen))
00946     {
00947         // Only get here if the index was out of range, so thow an exception.
00948         String exceptionString = "SamRecord::getSequence(";
00949         exceptionString += index;
00950         exceptionString += ") is out of range. Index must be between 0 and ";
00951         exceptionString += (readLen - 1);
00952         throw std::runtime_error(exceptionString.c_str());
00953     }
00954 
00955     if(mySequence.Length() == 0)
00956     {
00957         // Parse BAM sequence.
00958         // TODO - maybe store this pointer - and use that to track when valid?
00959         unsigned char * packedSequence = 
00960             (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength +
00961             myRecordPtr->myCigarLength * sizeof(int);
00962         
00963         return(index & 1 ?
00964                asciiBases[packedSequence[index / 2] & 0xF] :
00965                asciiBases[packedSequence[index / 2] >> 4]);
00966     }
00967     else 
00968     {
00969         // Already have string.
00970         return(mySequence[index]);
00971     }
00972 }
00973 
00974 char SamRecord::getQuality(int index)
00975 {
00976     // Determine the read length.
00977     int32_t readLen = getReadLength();
00978 
00979     // If the read length is 0, return ' ' whose ascii code is below
00980     // the minimum ascii code for qualities.
00981     if(readLen == 0)
00982     {
00983         return(BaseUtilities::UNKNOWN_QUALITY_CHAR);
00984     }
00985     else if((index < 0) || (index >= readLen))
00986     {
00987         // Only get here if the index was out of range, so thow an exception.
00988         String exceptionString = "SamRecord::getQuality(";
00989         exceptionString += index;
00990         exceptionString += ") is out of range. Index must be between 0 and ";
00991         exceptionString += (readLen - 1);
00992         throw std::runtime_error(exceptionString.c_str());
00993     }
00994 
00995     if(myQuality.Length() == 0)
00996     {
00997         // Parse BAM Quality.
00998         unsigned char * packedQuality = 
00999             (unsigned char *)myRecordPtr->myData +
01000             myRecordPtr->myReadNameLength + 
01001             myRecordPtr->myCigarLength * sizeof(int) + 
01002             (myRecordPtr->myReadLength + 1) / 2;
01003         return(packedQuality[index] + 33);
01004     }
01005     else
01006     {
01007         // Already have string.
01008         if((myQuality.Length() == 1) && (myQuality[0] == '*'))
01009         {
01010             // Return 0xFF like it does for BAM.
01011             return(0xFF);
01012         }
01013         else
01014         {
01015             return(myQuality[index]);
01016         }
01017     }
01018 }
01019 
01020    
01021 Cigar* SamRecord::getCigarInfo()
01022 {
01023     // Check to see whether or not the Cigar has already been
01024     // set - this is determined by checking if alignment length
01025     // is set since alignment length and the cigar are set
01026     // at the same time.
01027     if(myAlignmentLength == -1)
01028     {
01029         // Not been set, so calculate it.
01030         parseCigar();
01031     }
01032     return(&myCigarRoller);
01033 }
01034 
01035 
01036 uint32_t SamRecord::getTagLength() 
01037 {
01038     myStatus = SamStatus::SUCCESS;
01039     if(myNeedToSetTagsFromBuffer)
01040     {
01041         // Tags are only set in the buffer, so the size of the tags is 
01042         // the length of the record minus the starting location of the tags.
01043         unsigned char * tagStart = 
01044             (unsigned char *)myRecordPtr->myData 
01045             + myRecordPtr->myReadNameLength 
01046             + myRecordPtr->myCigarLength * sizeof(int)
01047             + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength;
01048       
01049         // The non-tags take up from the start of the record to the tag start.
01050         // Do not include the block size part of the record since it is not
01051         // included in the size.
01052         uint32_t nonTagSize = 
01053             tagStart - (unsigned char*)&(myRecordPtr->myReferenceID);
01054         // Tags take up the size of the block minus the non-tag section.
01055         uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize;
01056         return(tagSize);
01057     }
01058 
01059     // Tags are stored outside the buffer, so myTagBufferSize is set.
01060     return(myTagBufferSize);
01061 }
01062 
01063 
01064 // Returns true if there is another tag and sets tag and vtype to the
01065 // appropriate values, and returns a pointer to the value.
01066 // Sets the Status to SUCCESS when a tag is successfully returned or
01067 // when there are no more tags.  Otherwise the status is set to describe
01068 // why it failed (parsing, etc).
01069 bool SamRecord::getNextSamTag(char* tag, char& vtype, void** value)
01070 {
01071     myStatus = SamStatus::SUCCESS;
01072     if(myNeedToSetTagsFromBuffer)
01073     {
01074         if(!setTagsFromBuffer())
01075         {
01076             // Failed to read the tags from the buffer, so cannot
01077             // get tags.
01078             return(false);
01079         }
01080     }
01081 
01082     // Increment the tag index to start looking at the next tag.
01083     // At the beginning, it is set to -1.
01084     myLastTagIndex++;
01085     int maxTagIndex = extras.Capacity();
01086     if(myLastTagIndex >= maxTagIndex)
01087     {
01088         // Hit the end of the tags, return false, no more tags.
01089         // Status is still success since this is not an error, 
01090         // it is just the end of the list.
01091         return(false);
01092     }
01093 
01094     bool tagFound = false;
01095     // Loop until a tag is found or the end of extras is hit.
01096     while((tagFound == false) && (myLastTagIndex < maxTagIndex))
01097     {
01098         if(extras.SlotInUse(myLastTagIndex))
01099         {
01100             // Found a slot to use.
01101             int key = extras.GetKey(myLastTagIndex);
01102             getTag(key, tag);
01103             getVtype(key, vtype);
01104             tagFound = true;
01105             // Get the value associated with the key based on the vtype.
01106             switch (vtype)
01107             {
01108                 case 'A' :
01109                     *value = getIntegerPtr(myLastTagIndex);
01110                     break;
01111                 case 'f' :
01112                     *value = getDoublePtr(myLastTagIndex);
01113                     break;
01114                 case 'c' :
01115                 case 'C' :
01116                 case 's' :
01117                 case 'S' :
01118                 case 'i' :
01119                 case 'I' :
01120                     vtype = 'i';
01121                     *value = getIntegerPtr(myLastTagIndex);
01122                     break;
01123                 case 'Z' :
01124                     *value = getStringPtr(myLastTagIndex);
01125                     break;
01126                 default:
01127                     myStatus.setStatus(SamStatus::FAIL_PARSE,
01128                                        "Unknown tag type");
01129                     tagFound = false;
01130                     break;
01131             }
01132         }
01133         if(!tagFound)
01134         {
01135             // Increment the index since a tag was not found.
01136             myLastTagIndex++;
01137         }
01138     }
01139     return(tagFound);
01140 }
01141 
01142 
01143 // Returns the values of all fields except the tags.
01144 bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName, 
01145                           String& cigar, String& sequence, String& quality)
01146 {
01147     myStatus = SamStatus::SUCCESS;
01148     if(myIsBufferSynced == false)
01149     {
01150         if(!fixBuffer())
01151         {
01152             // failed to set the buffer, return false.
01153             return(false);
01154         }
01155     }
01156     memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct));
01157 
01158     readName = getReadName();
01159     // Check the status.
01160     if(myStatus != SamStatus::SUCCESS)
01161     {
01162         // Failed to set the fields, return false.
01163         return(false);
01164     }
01165     cigar = getCigar();
01166     // Check the status.
01167     if(myStatus != SamStatus::SUCCESS)
01168     {
01169         // Failed to set the fields, return false.
01170         return(false);
01171     }
01172     sequence = getSequence();
01173     // Check the status.
01174     if(myStatus != SamStatus::SUCCESS)
01175     {
01176         // Failed to set the fields, return false.
01177         return(false);
01178     }
01179     quality = getQuality();
01180     // Check the status.
01181     if(myStatus != SamStatus::SUCCESS)
01182     {
01183         // Failed to set the fields, return false.
01184         return(false);
01185     }
01186     return(true);
01187 }
01188 
01189 
01190 bool SamRecord::isIntegerType(char vtype) const
01191 {
01192     if((vtype == 'c') || (vtype == 'C') ||
01193        (vtype == 's') || (vtype == 'S') ||
01194        (vtype == 'i') || (vtype == 'I'))
01195     {
01196         return(true);
01197     }
01198     return(false);
01199 }
01200 
01201 
01202 bool SamRecord::isDoubleType(char vtype) const
01203 {
01204     if(vtype == 'f')
01205     {
01206         return(true);
01207     }
01208     return(false);
01209 }
01210 
01211 
01212 bool SamRecord::isCharType(char vtype) const
01213 {
01214     if(vtype == 'A')
01215     {
01216         return(true);
01217     }
01218     return(false);
01219 }
01220 
01221 
01222 bool SamRecord::isStringType(char vtype) const
01223 {
01224     if(vtype == 'Z')
01225     {
01226         return(true);
01227     }
01228     return(false);
01229 }
01230 
01231 
01232 void SamRecord::clearTags()
01233 {
01234     if(extras.Entries() != 0)
01235     {
01236         extras.Clear();
01237     }
01238     strings.Clear();
01239     integers.Clear();
01240     doubles.Clear();
01241     myTagBufferSize = 0;
01242     resetTagIter();
01243 }
01244 
01245 
01246 // Return the error after a failed SamRecord call.
01247 const SamStatus& SamRecord::getStatus()
01248 {
01249     return(myStatus);
01250 }
01251 
01252 
01253 String & SamRecord::getString(const char * tag)
01254 {
01255     // Init to success.
01256     myStatus = SamStatus::SUCCESS;
01257     // Parse the buffer if necessary.
01258     if(myNeedToSetTagsFromBuffer)
01259     {
01260         if(!setTagsFromBuffer())
01261         {
01262             // Failed to read the tags from the buffer, so cannot
01263             // get tags.
01264             // TODO - what do we want to do on failure?
01265         }
01266     }
01267     
01268     int key = MAKEKEY(tag[0], tag[1], 'Z');
01269     int offset = extras.Find(key);
01270 
01271     int value;
01272     if (offset < 0)
01273     {
01274         // TODO - what do we want to do on failure?
01275         return(NOT_FOUND_TAG_STRING);
01276     }
01277     else
01278         value = extras[offset];
01279 
01280     return strings[value];
01281 }
01282 
01283 int & SamRecord::getInteger(const char * tag)
01284 {
01285     // Init to success.
01286     myStatus = SamStatus::SUCCESS;
01287     // Parse the buffer if necessary.
01288     if(myNeedToSetTagsFromBuffer)
01289     {
01290         if(!setTagsFromBuffer())
01291         {
01292             // Failed to read the tags from the buffer, so cannot
01293             // get tags.
01294             // TODO - what do we want to do on failure?
01295         }
01296     }
01297     
01298     int key = MAKEKEY(tag[0], tag[1], 'i');
01299     int offset = extras.Find(key);
01300 
01301     int value;
01302     if (offset < 0)
01303     {
01304         // TODO - what do we want to do on failure?
01305         return NOT_FOUND_TAG_INT;
01306     }
01307     else
01308         value = extras[offset];
01309 
01310     return integers[value];
01311 }
01312 
01313 double & SamRecord::getDouble(const char * tag)
01314 {
01315     // Init to success.
01316     myStatus = SamStatus::SUCCESS;
01317     // Parse the buffer if necessary.
01318     if(myNeedToSetTagsFromBuffer)
01319     {
01320         if(!setTagsFromBuffer())
01321         {
01322             // Failed to read the tags from the buffer, so cannot
01323             // get tags.
01324             // TODO - what do we want to do on failure?
01325         }
01326     }
01327     
01328     int key = MAKEKEY(tag[0], tag[1], 'f');
01329     int offset = extras.Find(key);
01330 
01331     int value;
01332     if (offset < 0)
01333     {
01334         // TODO - what do we want to do on failure?
01335         return NOT_FOUND_TAG_DOUBLE;
01336     }
01337     else
01338         value = extras[offset];
01339 
01340     return doubles[value];
01341 }
01342 
01343 
01344 bool SamRecord::checkTag(const char * tag, char type)
01345 {
01346     // Init to success.
01347     myStatus = SamStatus::SUCCESS;
01348     // Parse the buffer if necessary.
01349     if(myNeedToSetTagsFromBuffer)
01350     {
01351         if(!setTagsFromBuffer())
01352         {
01353             // Failed to read the tags from the buffer, so cannot
01354             // get tags.
01355             return("");
01356         }
01357     }
01358     
01359     int key = MAKEKEY(tag[0], tag[1], type);
01360 
01361     return (extras.Find(key) != LH_NOTFOUND);
01362 }
01363 
01364 
01365 // Return the number of bases in this read that overlap the passed in
01366 // region.  (start & end are 0-based)
01367 uint32_t SamRecord::getNumOverlaps(int32_t start, int32_t end)
01368 {
01369     // Determine whether or not the cigar has been parsed, which sets up
01370     // the cigar roller.  This is determined by checking the alignment length.
01371     if(myAlignmentLength == -1)
01372     {
01373         parseCigar();
01374     }
01375     return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition()));
01376 }
01377 
01378 
01379 // // Get the Extra field: <tag>:<vtype>:<value> for the key.
01380 // void SamRecord::getSamExtraFieldFromKey(int key, String& extraField) 
01381 // {
01382 //     myStatus = SamStatus::SUCCESS;
01383 //     // Extract the tag from the key.
01384 //     char tag[3];
01385 //     getTag(key, tag);
01386 
01387 //     char vtype;
01388 //     getVtype(key, vtype);
01389 
01390 //     // Look up the key to get the offset into the appropriate array.
01391 //     int offset = extras.Find(key);
01392 
01393 //     // Get the value associated with the key based on the vtype.
01394 //     switch (vtype)
01395 //     {
01396 //         case 'A' :
01397 //             extraField = tag;
01398 //             extraField += ":A:";
01399 //             extraField += (char)(getInteger(offset));
01400 //             break;
01401 //         case 'f' :
01402 //             extraField = tag;
01403 //             extraField += ":f:";
01404 //             extraField += getDouble(offset);
01405 //             break;
01406 //         case 'c' :
01407 //         case 'C' :
01408 //         case 's' :
01409 //         case 'S' :
01410 //         case 'i' :
01411 //         case 'I' :
01412 //             extraField = tag;
01413 //             extraField += ":i:";
01414 //             extraField += getInteger(offset);
01415 //             break;
01416 //         case 'Z' :
01417 //             //         String valueString = getString(offset).c_str();
01418 //             extraField = tag;
01419 //             extraField += ":Z:";
01420 //             extraField += getString(offset);
01421 //             break;
01422 //         default:
01423 //             myStatus.setStatus(SamStatus::FAIL_PARSE,
01424 //                                "Unknown tag type.");
01425 //             break;
01426 //     }
01427 // }
01428 
01429 
01430 // Allocate space for the record - does a realloc.  
01431 // The passed in size is the size of the entire record including the
01432 // block size field.
01433 bool SamRecord::allocateRecordStructure(int size)
01434 {
01435     if (allocatedSize < size)
01436     {
01437         bamRecordStruct* tmpRecordPtr = 
01438             (bamRecordStruct *)realloc(myRecordPtr, size);
01439         if(tmpRecordPtr == NULL)
01440         {
01441             // FAILED to allocate memory
01442             fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
01443             myStatus.addError(SamStatus::FAIL_MEM, "Failed Memory Allocation.");
01444             return(false);
01445         }
01446         // Successfully allocated memory, so set myRecordPtr.
01447         myRecordPtr = tmpRecordPtr;
01448         allocatedSize = size;
01449     }
01450     return(true);
01451 }
01452 
01453 
01454 // Index is the index into the strings array.
01455 void* SamRecord::getStringPtr(int index)
01456 {
01457     int value = extras[index];
01458 
01459     return &(strings[value]);
01460 }
01461 
01462 void* SamRecord::getIntegerPtr(int offset)
01463 {
01464     int value = extras[offset];
01465 
01466     return &(integers[value]);
01467 }
01468 
01469 void* SamRecord::getDoublePtr(int offset)
01470 {
01471     int value = extras[offset];
01472 
01473     return &(doubles[value]);
01474 }
01475 
01476 
01477 // Fixes the buffer to match the variable length fields if they are set.
01478 bool SamRecord::fixBuffer()
01479 {
01480     // Check to see if the buffer is already synced.
01481     if(myIsBufferSynced)
01482     {
01483         // Already synced, nothing to do.
01484         return(true);
01485     }
01486    
01487     // Set the bin if necessary.
01488     if(!myIsBinValid)
01489     {
01490         // The bin that is set in the record is not valid, so
01491         // reset it.
01492         myRecordPtr->myBin = 
01493             bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());      
01494         myIsBinValid = true;
01495     }
01496 
01497     // Not synced.
01498     bool status = true;
01499 
01500     // First determine the size the buffer needs to be.
01501     uint8_t newReadNameLen = getReadNameLength();
01502     uint16_t newCigarLen = getCigarLength();
01503     int32_t newReadLen = getReadLength();
01504     uint32_t newTagLen = getTagLength();
01505     uint32_t bamSequenceLen = (newReadLen+1)/2;
01506 
01507     // The buffer size extends from the start of the record to data
01508     // plus the length of the variable fields,
01509     // Multiply the cigar length by 4 since it is the number of uint32_t fields.
01510     int newBufferSize = 
01511         ((unsigned char*)(&(myRecordPtr->myData)) - 
01512          (unsigned char*)myRecordPtr) +
01513         newReadNameLen + ((newCigarLen)*4) +
01514         newReadLen + bamSequenceLen + newTagLen;
01515    
01516     if(!allocateRecordStructure(newBufferSize))
01517     {
01518         // Failed to allocate space.
01519         return(false);
01520     }
01521 
01522     // Now that space has been added to the buffer, check to see what if
01523     // any fields need to be extracted from the buffer prior to starting to
01524     // overwrite it.  Fields need to be extracted from the buffer if the 
01525     // buffer is valid for the field and a previous variable length field has
01526     // changed length.
01527     bool readNameLenChange = (newReadNameLen != myRecordPtr->myReadNameLength);
01528     bool cigarLenChange = (newCigarLen != myRecordPtr->myCigarLength);
01529     bool readLenChange = (newReadLen != myRecordPtr->myReadLength);
01530 
01531     // If the tags are still stored in the buffer and any other fields changed
01532     // lengths, they need to be extracted.
01533     if(myIsTagsBufferValid &&
01534        (readNameLenChange | cigarLenChange | readLenChange))
01535     {
01536         status &= setTagsFromBuffer();
01537         // The tag buffer will not be valid once the other fields
01538         // are written, so set it to not valid.
01539         myIsTagsBufferValid = false;
01540     }
01541 
01542     // If the sequence or quality strings are still stored in the buffer, and
01543     // any of the previous fields have changed length, extract it from the
01544     // current buffer.
01545     if((myIsQualityBufferValid | myIsSequenceBufferValid) && 
01546        (readNameLenChange | cigarLenChange | readLenChange))
01547     {
01548         setSequenceAndQualityFromBuffer();
01549         // The quality and sequence buffers will not be valid once the other
01550         // fields are written, so set them to not valid.
01551         myIsQualityBufferValid = false;
01552         myIsSequenceBufferValid = false;
01553     }
01554 
01555     // If the cigar is still stored in the buffer, and any of the
01556     // previous fields have changed length, extract it from the current buffer.
01557     if((myIsCigarBufferValid) && 
01558        (readNameLenChange))
01559     {
01560         status &= parseCigarBinary();
01561         myIsCigarBufferValid = false;
01562     }
01563 
01564     // Set each value in the buffer if it is not already valid.
01565     if(!myIsReadNameBufferValid)
01566     {
01567         memcpy(&(myRecordPtr->myData), myReadName.c_str(), 
01568                newReadNameLen);
01569    
01570         // Set the new ReadNameLength.
01571         myRecordPtr->myReadNameLength = newReadNameLen;
01572         myIsReadNameBufferValid = true;
01573     }
01574 
01575     unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
01576         myRecordPtr->myReadNameLength;
01577    
01578     // Set the Cigar.  Need to reformat from the string to 
01579     unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
01580       
01581     if(!myIsCigarBufferValid)
01582     {
01583         // The cigar was already parsed when it was set, so just copy
01584         // data from the temporary buffer.
01585         myRecordPtr->myCigarLength = newCigarLen;
01586         memcpy(packedCigar, myCigarTempBuffer, 
01587                myRecordPtr->myCigarLength * sizeof(uint32_t));
01588       
01589         myIsCigarBufferValid = true;
01590     }
01591 
01592     unsigned char * packedSequence = readNameEnds + 
01593         myRecordPtr->myCigarLength * sizeof(int);
01594     unsigned char * packedQuality = packedSequence + bamSequenceLen;
01595    
01596     if(!myIsSequenceBufferValid || !myIsQualityBufferValid)
01597     {
01598         myRecordPtr->myReadLength = newReadLen;
01599         // Determine if the quality needs to be set and is just a * and needs to
01600         // be set to 0xFF.
01601         bool noQuality = false;
01602         if((myQuality.Length() == 1) && (myQuality[0] == '*'))
01603         {
01604             noQuality = true;
01605         }
01606       
01607         for (int i = 0; i < myRecordPtr->myReadLength; i++) 
01608         {
01609             if(!myIsSequenceBufferValid)
01610             {
01611                 // Sequence buffer is not valid, so set the sequence.
01612                 int seqVal = 0;
01613                 switch(mySequence[i])
01614                 {
01615                     case '=':
01616                         seqVal = 0;
01617                         break;
01618                     case 'A':
01619                     case 'a':
01620                         seqVal = 1;
01621                         break;
01622                     case 'C':
01623                     case 'c':
01624                         seqVal = 2;
01625                         break;
01626                     case 'G':
01627                     case 'g':
01628                         seqVal = 4;
01629                         break;
01630                     case 'T':
01631                     case 't':
01632                         seqVal = 8;
01633                         break;
01634                     case 'N':
01635                     case 'n':
01636                     case '.':
01637                         seqVal = 15;
01638                         break;
01639                     default:
01640                         myStatus.addError(SamStatus::FAIL_PARSE,
01641                                           "Unknown Sequence character found.");
01642                         status = false;
01643                         break;
01644                 };
01645             
01646                 if(i & 1)
01647                 {
01648                     // Odd number i's go in the lower 4 bits, so OR in the
01649                     // lower bits
01650                     packedSequence[i/2] |= seqVal;
01651                 }
01652                 else
01653                 {
01654                     // Even i's go in the upper 4 bits and are always set first.
01655                     packedSequence[i/2] = seqVal << 4;
01656                 }
01657             }
01658 
01659             if(!myIsQualityBufferValid)
01660             {
01661                 // Set the quality.
01662                 if((noQuality) || (myQuality.Length() <= i))
01663                 {
01664                     // No quality or the quality is smaller than the sequence,
01665                     // so set it to 0xFF
01666                     packedQuality[i] = 0xFF;
01667                 }
01668                 else
01669                 {
01670                     // Copy the quality string.
01671                     packedQuality[i] = myQuality[i] - 33;
01672                 }
01673             }
01674         }
01675         myIsQualityBufferValid = true;
01676         myIsSequenceBufferValid = true;
01677     }
01678 
01679     if(!myIsTagsBufferValid)
01680     {
01681         status &= setTagsInBuffer();
01682     }
01683 
01684     // Set the lengths in the buffer.
01685     myRecordPtr->myReadNameLength = newReadNameLen;
01686     myRecordPtr->myCigarLength = newCigarLen;
01687     myRecordPtr->myReadLength = newReadLen;
01688 
01689     // Set the buffer block size to the size of the buffer minus the
01690     // first field.
01691     myRecordPtr->myBlockSize = newBufferSize - sizeof(int32_t);
01692 
01693     if(status)
01694     {
01695         myIsBufferSynced = true;
01696     }
01697 
01698     return(status);
01699 }
01700 
01701 
01702 // Sets the Sequence and Quality strings from the buffer.
01703 // They are done together in one method because they require the same
01704 // loop, so might as well be done at the same time.
01705 void SamRecord::setSequenceAndQualityFromBuffer()
01706 {
01707     // NOTE: If the sequence buffer is not valid, do not set the sequence
01708     // string from the buffer.
01709     // NOTE: If the quality buffer is not valid, do not set the quality string
01710     // from the buffer.
01711 
01712     // Extract the sequence if the buffer is valid and the string's length is 0.
01713     bool extractSequence = false;
01714     if(myIsSequenceBufferValid && (mySequence.Length() == 0))
01715     {
01716         extractSequence = true;
01717     }
01718 
01719     // Extract the quality if the buffer is valid and the string's length is 0.
01720     bool extractQuality = false;
01721     if(myIsQualityBufferValid && (myQuality.Length() == 0))
01722     {
01723         extractQuality = true;
01724     }
01725 
01726     // If neither the quality nor the sequence need to be extracted,
01727     // just return.
01728     if(!extractSequence && !extractQuality)
01729     {
01730         return;
01731     }
01732 
01733     // Set the sequence and quality strings..
01734     if(extractSequence)
01735     {
01736         mySequence.SetLength(myRecordPtr->myReadLength);
01737     }
01738     if(extractQuality)
01739     {
01740         myQuality.SetLength(myRecordPtr->myReadLength);
01741     }
01742    
01743     unsigned char * readNameEnds = 
01744         (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
01745     unsigned char * packedSequence = 
01746         readNameEnds + myRecordPtr->myCigarLength * sizeof(int);
01747     unsigned char * packedQuality = 
01748         packedSequence + (myRecordPtr->myReadLength + 1) / 2;
01749 
01750     const char * asciiBases = "0AC.G...T......N";
01751 
01752     // Flag to see if the quality is specified - the quality contains a value
01753     // other than 0xFF.  If all values are 0xFF, then there is no quality.
01754     bool qualitySpecified = false;
01755 
01756     for (int i = 0; i < myRecordPtr->myReadLength; i++)
01757     {
01758         if(extractSequence)
01759         {
01760             mySequence[i] = i & 1 ?
01761                 asciiBases[packedSequence[i / 2] & 0xF] :
01762                 asciiBases[packedSequence[i / 2] >> 4];
01763         }
01764 
01765         if(extractQuality)
01766         {
01767             if(packedQuality[i] != 0xFF)
01768             {
01769                 // Quality is specified, so mark the flag.
01770                 qualitySpecified = true;
01771             }
01772 
01773             myQuality[i] = packedQuality[i] + 33;
01774         }
01775     }
01776 
01777     // If the read length is 0, then set the sequence and quality to '*'
01778     if(myRecordPtr->myReadLength == 0)
01779     {
01780         if(extractSequence)
01781         {
01782             mySequence = "*";
01783         }
01784         if(extractQuality)
01785         {
01786             myQuality = "*";
01787         }
01788     }
01789     else if(extractQuality && !qualitySpecified)
01790     {
01791         // No quality was specified, so set it to "*"
01792         myQuality = "*";
01793     }
01794 }
01795 
01796 
01797 // Parse the cigar to calculate the alignment/unclipped end.
01798 bool SamRecord::parseCigar()
01799 {
01800     // Determine if the cigar string or cigar binary needs to be parsed.
01801     if(myCigar.Length() == 0)
01802     {
01803         // The cigar string is not yet set, so parse the binary.
01804         return(parseCigarBinary());
01805     }
01806     return(parseCigarString());
01807 }
01808 
01809 // Parse the cigar to calculate the alignment/unclipped end.
01810 bool SamRecord::parseCigarBinary()
01811 {
01812     // Only need to parse if the string is not already set.
01813     // The length of the cigar string is set to zero when the 
01814     // record is read from a file into the buffer.
01815     if(myCigar.Length() != 0)
01816     {
01817         // Already parsed.
01818         return(true);
01819     }
01820 
01821     unsigned char * readNameEnds = 
01822         (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
01823    
01824     unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
01825 
01826     myCigarRoller.Set(packedCigar, myRecordPtr->myCigarLength);
01827     
01828     myCigarRoller.getCigarString(myCigar);
01829 
01830     myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
01831 
01832     myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
01833     myUnclippedEndOffset = myCigarRoller.getNumEndClips();
01834 
01835     // if the cigar length is 0, then set the cigar string to "*"
01836     if(myRecordPtr->myCigarLength == 0)
01837     {
01838         myCigar = "*";
01839         return(true);
01840     }
01841 
01842     // Copy the cigar into a temporary buffer.
01843     int newBufferSize = myRecordPtr->myCigarLength * sizeof(uint32_t);
01844     if(newBufferSize > myCigarTempBufferAllocatedSize)
01845     {
01846         uint32_t* tempBufferPtr = 
01847             (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
01848         if(tempBufferPtr == NULL)
01849         {
01850             // Failed to allocate memory.
01851             // Do not parse, just return.
01852             fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
01853             myStatus.addError(SamStatus::FAIL_MEM,
01854                               "Failed to Allocate Memory.");
01855             return(false);
01856         }
01857         myCigarTempBuffer = tempBufferPtr;
01858         myCigarTempBufferAllocatedSize = newBufferSize;
01859     }
01860 
01861     memcpy(myCigarTempBuffer, packedCigar, 
01862            myRecordPtr->myCigarLength * sizeof(uint32_t));
01863 
01864     // Set the length of the temp buffer.
01865     myCigarTempBufferLength = myRecordPtr->myCigarLength;
01866 
01867     return(true);
01868 }
01869 
01870 // Parse the cigar string to calculate the cigar length and alignment end.
01871 bool SamRecord::parseCigarString()
01872 {
01873     myCigarTempBufferLength = 0;
01874     if(myCigar == "*")
01875     {
01876         // Cigar is empty, so initialize the variables.
01877         myAlignmentLength = 0;
01878         myUnclippedStartOffset = 0;
01879         myUnclippedEndOffset = 0;
01880         myCigarRoller.clear();
01881         return(true);
01882     }
01883 
01884     myCigarRoller.Set(myCigar);
01885     
01886     myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
01887     
01888     myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
01889     myUnclippedEndOffset = myCigarRoller.getNumEndClips();
01890 
01891     // Check to see if the Temporary Cigar Buffer is large enough to contain
01892     // this cigar.  If we make it the size of the length of the cigar string,
01893     // it will be more than large enough.
01894     int newBufferSize = myCigar.Length() * sizeof(uint32_t);
01895     if(newBufferSize > myCigarTempBufferAllocatedSize)
01896     {
01897         uint32_t* tempBufferPtr = 
01898             (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
01899         if(tempBufferPtr == NULL)
01900         {
01901             // Failed to allocate memory.
01902             // Do not parse, just return.
01903             fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
01904             myStatus.addError(SamStatus::FAIL_MEM,
01905                               "Failed to Allocate Memory.");
01906             return(false);
01907         }
01908         myCigarTempBuffer = tempBufferPtr;
01909         myCigarTempBufferAllocatedSize = newBufferSize;
01910     }
01911 
01912     // Track if there were any errors.
01913     bool status = true;
01914 
01915     // Track the index into the cigar string that is being parsed.
01916     char *cigarOp;
01917     const char* cigarEntryStart = myCigar.c_str();
01918     int opLen = 0;
01919     int op = 0;
01920 
01921     unsigned int * packedCigar = myCigarTempBuffer;
01922     // TODO - maybe one day make a cigar list... or maybe make a 
01923     // reference cigar string for ease of lookup....
01924     const char* endCigarString = cigarEntryStart + myCigar.Length();
01925     while(cigarEntryStart < endCigarString)
01926     {
01927         bool validCigarEntry = true;
01928         // Get the opLen from the string.  cigarOp will then point to 
01929         // the operation.
01930         opLen = strtol(cigarEntryStart, &cigarOp, 10);
01931         // Switch on the type of operation.
01932         switch(*cigarOp)
01933         {
01934             case('M'):
01935                 op = 0;
01936                 break;
01937             case('I'):
01938                 // Insert into the reference position, so do not increment the
01939                 // reference end position.
01940                 op = 1;
01941                 break;
01942             case('D'):
01943                 op = 2;
01944                 break;
01945             case('N'):
01946                 op = 3;
01947                 break;
01948             case('S'):
01949                 op = 4;
01950                 break;
01951             case('H'):
01952                 op = 5;
01953                 break;
01954             case('P'):
01955                 op = 6;
01956                 break;
01957             default:
01958                 fprintf(stderr, "ERROR parsing cigar\n");
01959                 validCigarEntry = false;
01960                 status = false;
01961                 myStatus.addError(SamStatus::FAIL_PARSE,
01962                                   "Unknown operation found when parsing the Cigar.");
01963                 break;
01964         };
01965         if(validCigarEntry)
01966         {
01967             // Increment the cigar length.
01968             ++myCigarTempBufferLength;
01969             *packedCigar = (opLen << 4) | op;
01970             packedCigar++;
01971         }
01972         // The next Entry starts at one past the cigar op, so set the start.
01973         cigarEntryStart = ++cigarOp;
01974     }
01975 
01976     // Check clipLength to adjust the end position.
01977     return(status);
01978 }
01979 
01980 
01981 bool SamRecord::setTagsFromBuffer()
01982 {
01983     // If the tags do not need to be set from the buffer, return true.
01984     if(myNeedToSetTagsFromBuffer == false)
01985     {
01986         // Already been set from the buffer.
01987         return(true);
01988     }
01989 
01990     // Mark false, as they are being set now.
01991     myNeedToSetTagsFromBuffer = false;
01992 
01993     unsigned char * readNameEnds = 
01994         (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
01995     unsigned char * packedSequence = 
01996         readNameEnds + myRecordPtr->myCigarLength * sizeof(int);
01997     unsigned char * packedQuality = 
01998         packedSequence + (myRecordPtr->myReadLength + 1) / 2;
01999 
02000     unsigned char * extraPtr = packedQuality + myRecordPtr->myReadLength;
02001 
02002     // Default to success, will be changed to false on failure.
02003     bool status = true;
02004 
02005     // Clear any previously set tags.
02006     clearTags();
02007     while (myRecordPtr->myBlockSize + 4 - 
02008            (extraPtr - (unsigned char *)myRecordPtr) > 0)
02009     {
02010         int key;
02011         int value;
02012         void * content = extraPtr + 3;
02013         int tagBufferSize = 0;
02014       
02015         switch (extraPtr[2])
02016         {
02017             case 'A' :
02018                 key = MAKEKEY(extraPtr[0], extraPtr[1], 'A');
02019                 value = integers.Length();
02020                 integers.Push(* (char *) content);
02021                 extraPtr += 4;
02022                 tagBufferSize += 4;
02023                 break;
02024             case 'c' :
02025                 key = MAKEKEY(extraPtr[0], extraPtr[1], 'c');
02026                 value = integers.Length();
02027                 integers.Push(* (char *) content);
02028                 extraPtr += 4;
02029                 tagBufferSize += 4;
02030                 break;
02031             case 'C' :
02032                 key = MAKEKEY(extraPtr[0], extraPtr[1], 'C');
02033                 value = integers.Length();
02034                 integers.Push(* (unsigned char *) content);
02035                 extraPtr += 4;
02036                 tagBufferSize += 4;
02037                 break;
02038             case 's' :
02039                 key = MAKEKEY(extraPtr[0], extraPtr[1], 's');
02040                 value = integers.Length();
02041                 integers.Push(* (short *) content);
02042                 extraPtr += 5;
02043                 tagBufferSize += 5;
02044                 break;
02045             case 'S' :
02046                 key = MAKEKEY(extraPtr[0], extraPtr[1], 'S');
02047                 value = integers.Length();
02048                 integers.Push(* (unsigned short *) content);
02049                 extraPtr += 5;
02050                 tagBufferSize += 5;
02051                 break;
02052             case 'i' :
02053                 key = MAKEKEY(extraPtr[0], extraPtr[1], 'i');
02054                 value = integers.Length();
02055                 integers.Push(* (int *) content);
02056                 extraPtr += 7;
02057                 tagBufferSize += 7;
02058                 break;
02059             case 'I' :
02060                 key = MAKEKEY(extraPtr[0], extraPtr[1], 'I');
02061                 value = integers.Length();
02062                 integers.Push((int) * (unsigned int *) content);
02063                 extraPtr += 7;
02064                 tagBufferSize += 7;
02065                 break;
02066             case 'Z' :
02067                 key = MAKEKEY(extraPtr[0], extraPtr[1], 'Z');
02068                 value = strings.Length();
02069                 strings.Push((const char *) content);
02070                 extraPtr += 4 + strings.Last().Length();
02071                 tagBufferSize += 4 + strings.Last().Length();
02072                 break;
02073             case 'f' :
02074                 key = MAKEKEY(extraPtr[0], extraPtr[1], 'f');
02075                 value = doubles.Length();
02076                 doubles.Push(* (float *) content);
02077                 fprintf(stderr, "\n\nFLOAT!!!\n\n");
02078                 extraPtr += 7;
02079                 tagBufferSize += 7;
02080                 break;
02081             default :
02082                 fprintf(stderr, 
02083                         "parsing BAM - Unknown custom field of type %c%c:%c\n",
02084                         extraPtr[0], extraPtr[1], extraPtr[2]);
02085                 // Failed on read.
02086                 // Increment extraPtr just by the size of the 3 known fields
02087                 extraPtr += 3;
02088                 myStatus.addError(SamStatus::FAIL_PARSE,
02089                                   "Unknown tag type.");
02090                 status = false;
02091         }
02092 
02093         // Only add the tag if it has so far been successfully processed.
02094         if(status)
02095         {
02096             extras.Add(key, value);
02097             myTagBufferSize += tagBufferSize;
02098         }
02099     }
02100     return(status);
02101 }
02102 
02103 
02104 bool SamRecord::setTagsInBuffer()
02105 {
02106     // The buffer size extends from the start of the record to data
02107     // plus the length of the variable fields,
02108     // Multiply the cigar length by 4 since it is the number of uint32_t fields.
02109     int bamSequenceLength = (myRecordPtr->myReadLength+1)/2;
02110     int newBufferSize = ((unsigned char*)(&(myRecordPtr->myData)) - 
02111                          (unsigned char*)myRecordPtr) +  
02112         myRecordPtr->myReadNameLength + ((myRecordPtr->myCigarLength)*4) +
02113         myRecordPtr->myReadLength + bamSequenceLength + myTagBufferSize;
02114 
02115     // Make sure the buffer is big enough.
02116     if(!allocateRecordStructure(newBufferSize))
02117     {
02118         // Failed to allocate space.
02119         return(false);
02120     }
02121 
02122     unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
02123         myRecordPtr->myReadNameLength;
02124     unsigned char * packedSequence = readNameEnds + 
02125         myRecordPtr->myCigarLength * sizeof(int);
02126     unsigned char * packedQuality = 
02127         packedSequence + bamSequenceLength;
02128    
02129     char * extraPtr = (char*)packedQuality + myRecordPtr->myReadLength;
02130 
02131     bool status = true;
02132 
02133     // Set the tags in the buffer.
02134     if (extras.Entries())
02135     {
02136         for (int i = 0; i < extras.Capacity(); i++)
02137         {
02138             if (extras.SlotInUse(i))
02139             {
02140                 int key = extras.GetKey(i);
02141                 getTag(key, extraPtr);
02142                 extraPtr += 2;
02143                 getVtype(key, extraPtr[0]);
02144                 char vtype = extraPtr[0];
02145 
02146                 // increment the pointer to where the value is.
02147                 extraPtr += 1;
02148 
02149                 switch (vtype)
02150                 {
02151                     case 'A' :
02152                         *(char*)extraPtr = (char)getInteger(i);
02153                         // sprintf(extraPtr, "%d", getInteger(i));
02154                         extraPtr += 1;
02155                         break;
02156                     case 'c' :
02157                         *(int8_t*)extraPtr = (int8_t)getInteger(i);
02158                         // sprintf(extraPtr, "%.4d", getInteger(i));
02159                         extraPtr += 1;
02160                         break;
02161                     case 'C' :
02162                         *(uint8_t*)extraPtr = (uint8_t)getInteger(i);
02163                         // sprintf(extraPtr, "%.4d", getInteger(i));
02164                         extraPtr += 1;
02165                         break;
02166                     case 's' :
02167                         *(int16_t*)extraPtr = (int16_t)getInteger(i);
02168                         // sprintf(extraPtr, "%.4d", getInteger(i));
02169                         extraPtr += 2;
02170                         break;
02171                     case 'S' :
02172                         *(uint16_t*)extraPtr = (uint16_t)getInteger(i);
02173                         // sprintf(extraPtr, "%.4d", getInteger(i));
02174                         extraPtr += 2;
02175                         break;
02176                     case 'i' :
02177                         *(int32_t*)extraPtr = (int32_t)getInteger(i);
02178                         // sprintf(extraPtr, "%.4d", getInteger(i));
02179                         extraPtr += 4;
02180                         break;
02181                     case 'I' :
02182                         *(uint32_t*)extraPtr = (uint32_t)getInteger(i);
02183                         // sprintf(extraPtr, "%.4d", getInteger(i));
02184                         extraPtr += 4;
02185                         break;
02186                     case 'Z' :
02187                         sprintf(extraPtr, "%s", getString(i).c_str());
02188                         extraPtr += getString(i).Length() + 1;
02189                         break;
02190                     case 'f' :
02191                         // TODO, figure out if %f is good enough.
02192                         sprintf(extraPtr, "%f", getDouble(i));
02193                         extraPtr += 4;
02194                         break;
02195                     default :
02196                         myStatus.addError(SamStatus::FAIL_PARSE,
02197                                           "Unknown tag type.");
02198                         status = false;
02199                         break;
02200                 }
02201             }
02202         }
02203     }
02204 
02205     // Validate that the extra pointer is at the end of the allocated buffer.
02206     // If not then there was a problem.
02207     if(extraPtr != (char*)myRecordPtr + newBufferSize)
02208     {
02209         fprintf(stderr, "ERROR updating the buffer.  Incorrect size.");
02210         myStatus.addError(SamStatus::FAIL_PARSE,
02211                           "ERROR updating the buffer.  Incorrect size.");
02212         status = false;
02213     }
02214 
02215 
02216     // The buffer tags are now in sync.
02217     myNeedToSetTagsInBuffer = false;
02218     myIsTagsBufferValid = true;
02219     return(status);
02220 }
02221 
02222 
02223 // Reset the variables for a newly set buffer.  The buffer must be set first
02224 // since this looks up the reference ids in the buffer to set the reference
02225 // names.
02226 void SamRecord::setVariablesForNewBuffer(SamFileHeader& header)
02227 {
02228     // Lookup the reference name & mate reference name associated with this
02229     // record.
02230     myReferenceName = 
02231         header.getReferenceLabel(myRecordPtr->myReferenceID);
02232     myMateReferenceName = 
02233         header.getReferenceLabel(myRecordPtr->myMateReferenceID);      
02234 
02235     // Clear the SAM Strings that are now not in-sync with the buffer.
02236     myReadName.SetLength(0);
02237     myCigar.SetLength(0);
02238     mySequence.SetLength(0);
02239     myQuality.SetLength(0);
02240     myNeedToSetTagsFromBuffer = true;
02241     myNeedToSetTagsInBuffer = false;
02242 
02243     //Set that the buffer is valid.
02244     myIsBufferSynced = true;
02245     // Set that the variable length buffer fields are valid.
02246     myIsReadNameBufferValid = true;
02247     myIsCigarBufferValid = true;
02248     myIsSequenceBufferValid = true;
02249     myIsQualityBufferValid = true;
02250     myIsTagsBufferValid = true;
02251     myIsBinValid = true;
02252 }
02253 
02254 
02255 // Extract the vtype from the key.
02256 void SamRecord::getVtype(int key, char& vtype) const
02257 {
02258     // Extract the vtype from the key.
02259     vtype = (key >> 16) & 0xFF;
02260 }
02261 
02262 // Extract the tag from the key.
02263 void SamRecord::getTag(int key, char* tag) const
02264 {
02265     // Extract the tag from the key.
02266     tag[0] = key & 0xFF;
02267     tag[1] = (key >> 8) & 0xFF;
02268     tag[2] = 0;
02269 }
02270 
02271 
02272 // Index is the index into the strings array.
02273 String & SamRecord::getString(int index)
02274 {
02275     int value = extras[index];
02276 
02277     return strings[value];
02278 }
02279 
02280 int & SamRecord::getInteger(int offset)
02281 {
02282     int value = extras[offset];
02283 
02284     return integers[value];
02285 }
02286 
02287 double & SamRecord::getDouble(int offset)
02288 {
02289     int value = extras[offset];
02290 
02291     return doubles[value];
02292 }
02293 
02294 
Generated on Wed Nov 17 15:38:27 2010 for StatGen Software by  doxygen 1.6.3