Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record. More...
#include <SamRecord.h>

Public Types | |
| enum | SequenceTranslation { NONE, EQUAL, BASES } |
Enum containing the settings on how to translate the sequence if a reference is available. More... | |
Public Member Functions | |
| SamRecord () | |
| Default Constructor. | |
| SamRecord (ErrorHandler::HandlingType errorHandlingType) | |
| Constructor that sets the error handling type. | |
| ~SamRecord () | |
| Destructor. | |
| void | resetRecord () |
| Reset the fields of the record to a default value. | |
| bool | isValid (SamFileHeader &header) |
| Returns whether or not the record is valid, setting the status to indicate success or failure. | |
| void | setReference (GenomeSequence *reference) |
| Set the reference to the specified genome sequence object. | |
| void | setSequenceTranslation (SequenceTranslation translation) |
| Set the type of sequence translation to use when getting the sequence. | |
| const SamStatus & | getStatus () |
| Returns the status associated with the last method that sets the status. | |
Set Alignment Data | |
| bool | setReadName (const char *readName) |
| Set QNAME to the passed in name. | |
| bool | setFlag (uint16_t flag) |
| Set the bitwise FLAG to the specified value. | |
| bool | setReferenceName (SamFileHeader &header, const char *referenceName) |
| Set the reference sequence name (RNAME) to the specified name, using the header to determine the reference id. | |
| bool | set1BasedPosition (int32_t position) |
| Set the leftmost position (POS) using the specified 1-based (SAM format) value. | |
| bool | set0BasedPosition (int32_t position) |
| Set the leftmost position using the specified 0-based (BAM format) value. | |
| bool | setMapQuality (uint8_t mapQuality) |
| Set the mapping quality (MAPQ). | |
| bool | setCigar (const char *cigar) |
| Set the CIGAR to the specified SAM formatted cigar string. | |
| bool | setCigar (const Cigar &cigar) |
| Set the CIGAR to the specified Cigar object. | |
| bool | setMateReferenceName (SamFileHeader &header, const char *mateReferenceName) |
| Set the mate/next fragment's reference sequence name (RNEXT) to the specified name, using the header to determine the mate reference id. | |
| bool | set1BasedMatePosition (int32_t matePosition) |
| Set the mate/next fragment's leftmost position (PNEXT) using the specified 1-based (SAM format) value. | |
| bool | set0BasedMatePosition (int32_t matePosition) |
| Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value. | |
| bool | setInsertSize (int32_t insertSize) |
| Sets the inferred insert size (ISIZE)/observed template length (TLEN). | |
| bool | setSequence (const char *seq) |
| Sets the sequence (SEQ) to the specified SAM formatted sequence string. | |
| bool | setQuality (const char *quality) |
| Sets the quality (QUAL) to the specified SAM formatted quality string. | |
| bool | shiftIndelsLeft () |
| Shift the indels (if any) to the left by updating the CIGAR. | |
| SamStatus::Status | setBuffer (const char *fromBuffer, uint32_t fromBufferSize, SamFileHeader &header) |
| Sets the SamRecord to contain the information in the BAM formatted fromBuffer. | |
| SamStatus::Status | setBufferFromFile (IFILE filePtr, SamFileHeader &header) |
| Read the BAM record from a file. | |
Set Tag Data | |
| bool | addIntTag (const char *tag, int32_t value) |
| Add the specified integer tag to the record. | |
| bool | addTag (const char *tag, char vtype, const char *value) |
| Add the specified tag,vtype,value to the record. | |
| void | clearTags () |
| Clear the tags in this record. | |
| bool | rmTag (const char *tag, char type) |
| Remove a tag. | |
| bool | rmTags (const char *tags) |
| Remove tags. | |
Get Alignment Data | |
| const void * | getRecordBuffer () |
| Get a const pointer to the buffer that contains the BAM representation of the record. | |
| const void * | getRecordBuffer (SequenceTranslation translation) |
| Get a const pointer to the buffer that contains the BAM representation of the record using the specified translation on the sequence. | |
| SamStatus::Status | writeRecordBuffer (IFILE filePtr) |
| Write the record as a BAM into the specified already opened file. | |
| SamStatus::Status | writeRecordBuffer (IFILE filePtr, SequenceTranslation translation) |
| Write the record as a BAM into the specified already opened file using the specified translation on the sequence. | |
| int32_t | getBlockSize () |
| Get the block size of the record (BAM format). | |
| const char * | getReferenceName () |
| Get the reference sequence name (RNAME) of the record. | |
| int32_t | getReferenceID () |
| Get the reference sequence id of the record (BAM format rid). | |
| int32_t | get1BasedPosition () |
| Get the 1-based(SAM) leftmost position (POS) of the record. | |
| int32_t | get0BasedPosition () |
| Get the 0-based(BAM) leftmost position of the record. | |
| uint8_t | getReadNameLength () |
| Get the length of the readname (QNAME) including the null. | |
| uint8_t | getMapQuality () |
| Get the mapping quality (MAPQ) of the record. | |
| uint16_t | getBin () |
| Get the BAM bin for the record. | |
| uint16_t | getCigarLength () |
| Get the length of the BAM formatted CIGAR. | |
| uint16_t | getFlag () |
| Get the flag (FLAG). | |
| int32_t | getReadLength () |
| Get the length of the read. | |
| const char * | getMateReferenceName () |
| Get the mate/next fragment's reference sequence name (RNEXT). | |
| const char * | getMateReferenceNameOrEqual () |
| Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the reference name, unless they are both "*" in which case "*" is returned. | |
| int32_t | getMateReferenceID () |
| Get the mate reference id of the record (BAM format: mate_rid/next_refID). | |
| int32_t | get1BasedMatePosition () |
| Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT). | |
| int32_t | get0BasedMatePosition () |
| Get the 0-based(BAM) leftmost mate/next fragment's position. | |
| int32_t | getInsertSize () |
| Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN). | |
| int32_t | get0BasedAlignmentEnd () |
| Returns the 0-based inclusive rightmost position of the clipped sequence. | |
| int32_t | get1BasedAlignmentEnd () |
| Returns the 1-based inclusive rightmost position of the clipped sequence. | |
| int32_t | getAlignmentLength () |
| Returns the length of the clipped sequence, returning 0 if the cigar is '*'. | |
| int32_t | get0BasedUnclippedStart () |
| Returns the 0-based inclusive left-most position adjusted for clipped bases. | |
| int32_t | get1BasedUnclippedStart () |
| Returns the 1-based inclusive left-most position adjusted for clipped bases. | |
| int32_t | get0BasedUnclippedEnd () |
| Returns the 0-based inclusive right-most position adjusted for clipped bases. | |
| int32_t | get1BasedUnclippedEnd () |
| Returns the 1-based inclusive right-most position adjusted for clipped bases. | |
| const char * | getReadName () |
| Returns the SAM formatted Read Name (QNAME). | |
| const char * | getCigar () |
| Returns the SAM formatted CIGAR string. | |
| const char * | getSequence () |
| Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTranslation. | |
| const char * | getSequence (SequenceTranslation translation) |
| Returns the SAM formatted sequence string (SEQ) performing the specified sequence translation. | |
| const char * | getQuality () |
| Returns the SAM formatted quality string (QUAL). | |
| char | getSequence (int index) |
| Get the sequence base at the specified index into this sequence 0 to readLength - 1, translating the base as specified by setSequenceTranslation. | |
| char | getSequence (int index, SequenceTranslation translation) |
| Get the sequence base at the specified index into this sequence 0 to readLength - 1 performing the specified sequence translation. | |
| char | getQuality (int index) |
| Get the quality character at the specified index into the quality 0 to readLength - 1. | |
| Cigar * | getCigarInfo () |
| Returns a pointer to the Cigar object associated with this record. | |
| uint32_t | getNumOverlaps (int32_t start, int32_t end) |
| Return the number of bases in this read that overlap the passed in region. | |
| bool | getFields (bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality) |
| Returns the values of all fields except the tags. | |
| bool | getFields (bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality, SequenceTranslation translation) |
| Returns the values of all fields except the tags using the specified sequence translation. | |
| GenomeSequence * | getReference () |
| Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it was not set). | |
Get Tag Methods | |
| uint32_t | getTagLength () |
| Returns the length of the BAM formatted tags. | |
| bool | getNextSamTag (char *tag, char &vtype, void **value) |
| Get the next tag from the record. | |
| void | resetTagIter () |
| Reset the tag iterator to the beginning of the tags. | |
| bool | isIntegerType (char vtype) const |
| Returns whether or not the specified vtype is an integer type. | |
| bool | isDoubleType (char vtype) const |
| Returns whether or not the specified vtype is a double type. | |
| bool | isCharType (char vtype) const |
| Returns whether or not the specified vtype is a char type. | |
| bool | isStringType (char vtype) const |
| Returns whether or not the specified vtype is a string type. | |
| bool | getTagsString (const char *tags, String &returnString, char delim= '\t') |
| Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE. | |
| String * | getStringTag (const char *tag) |
| Get the string value for the specified tag. | |
| int * | getIntegerTag (const char *tag) |
| Get the integer value for the specified tag. | |
| double * | getDoubleTag (const char *tag) |
| Get the double value for the specified tag. | |
| String & | getString (const char *tag) |
| Get the string value for the specified tag. | |
| int & | getInteger (const char *tag) |
| Get the integer value for the specified tag. | |
| double & | getDouble (const char *tag) |
| Get the double value for the specified tag. | |
| bool | checkString (const char *tag) |
| Check if the specified tag contains a string. | |
| bool | checkInteger (const char *tag) |
| Check if the specified tag contains a string. | |
| bool | checkDouble (const char *tag) |
| Check if the specified tag contains a string. | |
| bool | checkTag (const char *tag, char type) |
| Check if the specified tag contains a value of the specified vtype. | |
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition at line 51 of file SamRecord.h.
Enum containing the settings on how to translate the sequence if a reference is available.
If no reference is available, no translation is done.
| NONE |
Leave the sequence as is. |
| EQUAL |
Translate bases that match the reference to '='. |
| BASES |
Translate '=' to the actual base. |
Definition at line 57 of file SamRecord.h.
| SamRecord::SamRecord | ( | ErrorHandler::HandlingType | errorHandlingType | ) |
Constructor that sets the error handling type.
| errorHandlingType | how to handle errors. |
Definition at line 53 of file SamRecord.cpp.
References resetRecord().
00054 : myStatus(errorHandlingType), 00055 myRefPtr(NULL), 00056 mySequenceTranslation(NONE) 00057 { 00058 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t); 00059 00060 myRecordPtr = 00061 (bamRecordStruct *) malloc(defaultAllocSize); 00062 00063 myCigarTempBuffer = NULL; 00064 myCigarTempBufferAllocatedSize = 0; 00065 00066 allocatedSize = defaultAllocSize; 00067 00068 resetRecord(); 00069 }
| bool SamRecord::addIntTag | ( | const char * | tag, | |
| int32_t | value | |||
| ) |
Add the specified integer tag to the record.
Internal processing handles switching between SAM/BAM formats when read/written and determining the type for BAM format. If the tag is already there this code will replace it if the specified value is different.
| tag | two character tag to be added to the SAM/BAM record. | |
| value | value for the specified tag. |
Definition at line 631 of file SamRecord.cpp.
References SamStatus::INVALID, SamStatus::setStatus(), and SamStatus::SUCCESS.
Referenced by addTag().
00632 { 00633 myStatus = SamStatus::SUCCESS; 00634 int key = 0; 00635 int index = 0; 00636 char bamvtype; 00637 00638 int tagBufferSize = 0; 00639 00640 // First check to see if the tags need to be synced to the buffer. 00641 if(myNeedToSetTagsFromBuffer) 00642 { 00643 if(!setTagsFromBuffer()) 00644 { 00645 // Failed to read tags from the buffer, so cannot add new ones. 00646 return(false); 00647 } 00648 } 00649 00650 // Ints come in as int. But it can be represented in fewer bits. 00651 // So determine a more specific type that is in line with the 00652 // types for BAM files. 00653 // First check to see if it is a negative. 00654 if(value < 0) 00655 { 00656 // The int is negative, so it will need to use a signed type. 00657 // See if it is greater than the min value for a char. 00658 if(value > std::numeric_limits<char>::min()) 00659 { 00660 // It can be stored in a signed char. 00661 bamvtype = 'c'; 00662 tagBufferSize += 4; 00663 } 00664 else if(value > std::numeric_limits<short>::min()) 00665 { 00666 // It fits in a signed short. 00667 bamvtype = 's'; 00668 tagBufferSize += 5; 00669 } 00670 else 00671 { 00672 // Just store it as a signed int. 00673 bamvtype = 'i'; 00674 tagBufferSize += 7; 00675 } 00676 } 00677 else 00678 { 00679 // It is positive, so an unsigned type can be used. 00680 if(value < std::numeric_limits<unsigned char>::max()) 00681 { 00682 // It is under the max of an unsigned char. 00683 bamvtype = 'C'; 00684 tagBufferSize += 4; 00685 } 00686 else if(value < std::numeric_limits<unsigned short>::max()) 00687 { 00688 // It is under the max of an unsigned short. 00689 bamvtype = 'S'; 00690 tagBufferSize += 5; 00691 } 00692 else 00693 { 00694 // Just store it as an unsigned int. 00695 bamvtype = 'I'; 00696 tagBufferSize += 7; 00697 } 00698 } 00699 00700 // Check to see if the tag is already there. 00701 key = MAKEKEY(tag[0], tag[1], bamvtype); 00702 unsigned int hashIndex = extras.Find(key); 00703 if(hashIndex != LH_NOTFOUND) 00704 { 00705 // Tag was already found. 00706 index = extras[hashIndex]; 00707 00708 // First check to see if the value changed. 00709 if((integers[index] == value) && (intType[index] == bamvtype)) 00710 { 00711 // The value has not changed, so do nothing. 00712 return(true); 00713 } 00714 else 00715 { 00716 // Not the same value, so adjust the settings. 00717 // Subtract the size of the previous tag from tagBufferSize to get 00718 // the adjusted size. 00719 switch(intType[index]) 00720 { 00721 case 'c': 00722 case 'C': 00723 tagBufferSize -= 4; 00724 break; 00725 case 's': 00726 case 'S': 00727 tagBufferSize -= 5; 00728 break; 00729 case 'i': 00730 case 'I': 00731 tagBufferSize -= 7; 00732 break; 00733 default: 00734 myStatus.setStatus(SamStatus::INVALID, 00735 "unknown tag inttype type found.\n"); 00736 return(false); 00737 } 00738 00739 // Update the integer value and type. 00740 integers[index] = value; 00741 intType[index] = bamvtype; 00742 } 00743 } 00744 else 00745 { 00746 // Tag is not already there, so add it. 00747 index = integers.Length(); 00748 00749 integers.Push(value); 00750 intType.push_back(bamvtype); 00751 00752 extras.Add(key, index); 00753 } 00754 00755 // The buffer tags are now out of sync. 00756 myNeedToSetTagsInBuffer = true; 00757 myIsTagsBufferValid = false; 00758 myIsBufferSynced = false; 00759 myTagBufferSize += tagBufferSize; 00760 00761 return(true); 00762 }
| bool SamRecord::addTag | ( | const char * | tag, | |
| char | vtype, | |||
| const char * | value | |||
| ) |
Add the specified tag,vtype,value to the record.
Vtype can be SAM/BAM format. Internal processing handles switching between SAM/BAM formats when read/written. If the tag is already there this code will replace it if the specified value is different.
| tag | two character tag to be added to the SAM/BAM record. | |
| vtype | vtype of the specified value - either SAM/BAM vtypes. | |
| value | value as a string for the specified tag. |
Definition at line 768 of file SamRecord.cpp.
References addIntTag(), SamStatus::FAIL_PARSE, SamStatus::setStatus(), and SamStatus::SUCCESS.
00769 { 00770 if(vtype == 'i') 00771 { 00772 // integer type. Call addIntTag to handle it. 00773 int intVal = atoi(valuePtr); 00774 return(addIntTag(tag, intVal)); 00775 } 00776 00777 // Non-int type. 00778 myStatus = SamStatus::SUCCESS; 00779 bool status = true; // default to successful. 00780 int key = 0; 00781 int index = 0; 00782 00783 int tagBufferSize = 0; 00784 00785 // First check to see if the tags need to be synced to the buffer. 00786 if(myNeedToSetTagsFromBuffer) 00787 { 00788 if(!setTagsFromBuffer()) 00789 { 00790 // Failed to read tags from the buffer, so cannot add new ones. 00791 return(false); 00792 } 00793 } 00794 00795 // First check to see if the tag is already there. 00796 key = MAKEKEY(tag[0], tag[1], vtype); 00797 unsigned int hashIndex = extras.Find(key); 00798 if(hashIndex != LH_NOTFOUND) 00799 { 00800 // The key was found in the hash, so get the lookup index. 00801 index = extras[hashIndex]; 00802 00803 // Adjust the currently pointed to value to the new setting. 00804 switch (vtype) 00805 { 00806 case 'A' : 00807 // First check to see if the value changed. 00808 if(integers[index] == (const int)*(valuePtr)) 00809 { 00810 // The value has not changed, so do nothing. 00811 return(true); 00812 } 00813 else 00814 { 00815 // Tag buffer size doesn't change between different 'A' entries. 00816 integers[index] = (const int)*(valuePtr); 00817 intType[index] = vtype; 00818 } 00819 break; 00820 case 'Z' : 00821 // First check to see if the value changed. 00822 if(strings[index] == valuePtr) 00823 { 00824 // The value has not changed, so do nothing. 00825 return(true); 00826 } 00827 else 00828 { 00829 // Adjust the tagBufferSize by removing the size of the old string. 00830 tagBufferSize -= strings[index].Length(); 00831 strings[index] = valuePtr; 00832 // Adjust the tagBufferSize by adding the size of the new string. 00833 tagBufferSize += strings[index].Length(); 00834 } 00835 break; 00836 case 'f' : 00837 // First check to see if the value changed. 00838 if(doubles[index] == atof(valuePtr)) 00839 { 00840 // The value has not changed, so do nothing. 00841 return(true); 00842 } 00843 else 00844 { 00845 // Tag buffer size doesn't change between different 'f' entries. 00846 doubles[index] = atof(valuePtr); 00847 } 00848 break; 00849 default : 00850 fprintf(stderr, 00851 "samFile::ReadSAM() - Unknown custom field of type %c\n", 00852 vtype); 00853 myStatus.setStatus(SamStatus::FAIL_PARSE, 00854 "Unknown custom field in a tag"); 00855 status = false; 00856 break; 00857 } 00858 } 00859 else 00860 { 00861 // The key was found not found in the hash, so add it. 00862 switch (vtype) 00863 { 00864 case 'A' : 00865 index = integers.Length(); 00866 integers.Push((const int)*(valuePtr)); 00867 intType.push_back(vtype); 00868 tagBufferSize += 4; 00869 break; 00870 case 'Z' : 00871 index = strings.Length(); 00872 strings.Push(valuePtr); 00873 tagBufferSize += 4 + strings.Last().Length(); 00874 break; 00875 case 'f' : 00876 index = doubles.Length(); 00877 doubles.Push(atof(valuePtr)); 00878 tagBufferSize += 7; 00879 break; 00880 default : 00881 fprintf(stderr, 00882 "samFile::ReadSAM() - Unknown custom field of type %c\n", 00883 vtype); 00884 myStatus.setStatus(SamStatus::FAIL_PARSE, 00885 "Unknown custom field in a tag"); 00886 status = false; 00887 break; 00888 } 00889 if(status) 00890 { 00891 // If successful, add the key to extras. 00892 extras.Add(key, index); 00893 } 00894 } 00895 00896 // Only add the tag if it has so far been successfully processed. 00897 if(status) 00898 { 00899 // The buffer tags are now out of sync. 00900 myNeedToSetTagsInBuffer = true; 00901 myIsTagsBufferValid = false; 00902 myIsBufferSynced = false; 00903 myTagBufferSize += tagBufferSize; 00904 } 00905 return(status); 00906 }
| bool SamRecord::checkDouble | ( | const char * | tag | ) | [inline] |
Check if the specified tag contains a string.
Does not set SamStatus.
| tag | SAM tag to check contents of. |
Definition at line 600 of file SamRecord.h.
References checkTag().
00600 { return checkTag(tag, 'f'); }
| bool SamRecord::checkInteger | ( | const char * | tag | ) | [inline] |
Check if the specified tag contains a string.
Does not set SamStatus.
| tag | SAM tag to check contents of. |
Definition at line 594 of file SamRecord.h.
References checkTag().
00594 { return checkTag(tag, 'i'); }
| bool SamRecord::checkString | ( | const char * | tag | ) | [inline] |
Check if the specified tag contains a string.
Does not set SamStatus.
| tag | SAM tag to check contents of. |
Definition at line 588 of file SamRecord.h.
References checkTag().
00588 { return checkTag(tag, 'Z'); }
| bool SamRecord::checkTag | ( | const char * | tag, | |
| char | type | |||
| ) |
Check if the specified tag contains a value of the specified vtype.
Does not set SamStatus.
| tag | SAM tag to check contents of. | |
| type | value type to check if the SAM tag matches. |
Definition at line 2285 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by checkDouble(), checkInteger(), and checkString().
02286 { 02287 // Init to success. 02288 myStatus = SamStatus::SUCCESS; 02289 // Parse the buffer if necessary. 02290 if(myNeedToSetTagsFromBuffer) 02291 { 02292 if(!setTagsFromBuffer()) 02293 { 02294 // Failed to read the tags from the buffer, so cannot 02295 // get tags. setTagsFromBuffer set the error. 02296 return(""); 02297 } 02298 } 02299 02300 int key = MAKEKEY(tag[0], tag[1], type); 02301 02302 return (extras.Find(key) != LH_NOTFOUND); 02303 }
| void SamRecord::clearTags | ( | ) |
Clear the tags in this record.
Does not set SamStatus.
Definition at line 909 of file SamRecord.cpp.
References resetTagIter().
Referenced by resetRecord().
00910 { 00911 if(extras.Entries() != 0) 00912 { 00913 extras.Clear(); 00914 } 00915 strings.Clear(); 00916 integers.Clear(); 00917 intType.clear(); 00918 doubles.Clear(); 00919 myTagBufferSize = 0; 00920 resetTagIter(); 00921 }
| int32_t SamRecord::get0BasedAlignmentEnd | ( | ) |
Returns the 0-based inclusive rightmost position of the clipped sequence.
Definition at line 1393 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by get0BasedUnclippedEnd(), get1BasedAlignmentEnd(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignment(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignmentRegion(), and SamFile::readIndexedRecord().
01394 { 01395 myStatus = SamStatus::SUCCESS; 01396 if(myAlignmentLength == -1) 01397 { 01398 // Alignment end has not been set, so calculate it. 01399 parseCigar(); 01400 } 01401 // If alignment length > 0, subtract 1 from it to get the end. 01402 if(myAlignmentLength == 0) 01403 { 01404 // Length is 0, just return the start position. 01405 return(myRecordPtr->myPosition); 01406 } 01407 return(myRecordPtr->myPosition + myAlignmentLength - 1); 01408 }
| int32_t SamRecord::get0BasedMatePosition | ( | ) |
Get the 0-based(BAM) leftmost mate/next fragment's position.
Definition at line 1378 of file SamRecord.cpp.
References SamStatus::SUCCESS.
01379 { 01380 myStatus = SamStatus::SUCCESS; 01381 return myRecordPtr->myMatePosition; 01382 }
| int32_t SamRecord::get0BasedPosition | ( | ) |
Get the 0-based(BAM) leftmost position of the record.
Definition at line 1245 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by SamTags::createMDTag(), getNumOverlaps(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignment(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignmentRegion(), SamFile::readIndexedRecord(), SamQuerySeqWithRefIter::reset(), SamFilter::softClip(), and SamFile::validateSortOrder().
01246 { 01247 myStatus = SamStatus::SUCCESS; 01248 return myRecordPtr->myPosition; 01249 }
| int32_t SamRecord::get0BasedUnclippedEnd | ( | ) |
Returns the 0-based inclusive right-most position adjusted for clipped bases.
Definition at line 1452 of file SamRecord.cpp.
References get0BasedAlignmentEnd().
Referenced by get1BasedUnclippedEnd().
01453 { 01454 // myUnclippedEndOffset will be set by get0BasedAlignmentEnd if the 01455 // cigar has not yet been parsed, so no need to check it here. 01456 return(get0BasedAlignmentEnd() + myUnclippedEndOffset); 01457 }
| int32_t SamRecord::get0BasedUnclippedStart | ( | ) |
Returns the 0-based inclusive left-most position adjusted for clipped bases.
Definition at line 1432 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by get1BasedUnclippedStart().
01433 { 01434 myStatus = SamStatus::SUCCESS; 01435 if(myUnclippedStartOffset == -1) 01436 { 01437 // Unclipped has not yet been calculated, so parse the cigar to get it 01438 parseCigar(); 01439 } 01440 return(myRecordPtr->myPosition - myUnclippedStartOffset); 01441 }
| int32_t SamRecord::get1BasedAlignmentEnd | ( | ) |
Returns the 1-based inclusive rightmost position of the clipped sequence.
Definition at line 1412 of file SamRecord.cpp.
References get0BasedAlignmentEnd().
Referenced by getBin().
01413 { 01414 return(get0BasedAlignmentEnd() + 1); 01415 }
| int32_t SamRecord::get1BasedMatePosition | ( | ) |
Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT).
Definition at line 1371 of file SamRecord.cpp.
References SamStatus::SUCCESS.
01372 { 01373 myStatus = SamStatus::SUCCESS; 01374 return (myRecordPtr->myMatePosition + 1); 01375 }
| int32_t SamRecord::get1BasedPosition | ( | ) |
Get the 1-based(SAM) leftmost position (POS) of the record.
Definition at line 1238 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by SamValidator::isValid().
01239 { 01240 myStatus = SamStatus::SUCCESS; 01241 return (myRecordPtr->myPosition + 1); 01242 }
| int32_t SamRecord::get1BasedUnclippedEnd | ( | ) |
Returns the 1-based inclusive right-most position adjusted for clipped bases.
Definition at line 1461 of file SamRecord.cpp.
References get0BasedUnclippedEnd().
01462 { 01463 return(get0BasedUnclippedEnd() + 1); 01464 }
| int32_t SamRecord::get1BasedUnclippedStart | ( | ) |
Returns the 1-based inclusive left-most position adjusted for clipped bases.
Definition at line 1445 of file SamRecord.cpp.
References get0BasedUnclippedStart().
01446 { 01447 return(get0BasedUnclippedStart() + 1); 01448 }
| int32_t SamRecord::getAlignmentLength | ( | ) |
Returns the length of the clipped sequence, returning 0 if the cigar is '*'.
Definition at line 1419 of file SamRecord.cpp.
References SamStatus::SUCCESS.
01420 { 01421 myStatus = SamStatus::SUCCESS; 01422 if(myAlignmentLength == -1) 01423 { 01424 // Alignment end has not been set, so calculate it. 01425 parseCigar(); 01426 } 01427 // Return the alignment length. 01428 return(myAlignmentLength); 01429 }
| uint16_t SamRecord::getBin | ( | ) |
Get the BAM bin for the record.
Definition at line 1273 of file SamRecord.cpp.
References get1BasedAlignmentEnd(), and SamStatus::SUCCESS.
01274 { 01275 myStatus = SamStatus::SUCCESS; 01276 if(!myIsBinValid) 01277 { 01278 // The bin that is set in the record is not valid, so 01279 // reset it. 01280 myRecordPtr->myBin = 01281 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd()); 01282 myIsBinValid = true; 01283 } 01284 return(myRecordPtr->myBin); 01285 }
| int32_t SamRecord::getBlockSize | ( | ) |
Get the block size of the record (BAM format).
Definition at line 1207 of file SamRecord.cpp.
References SamStatus::SUCCESS.
01208 { 01209 myStatus = SamStatus::SUCCESS; 01210 // If the buffer isn't synced, sync the buffer to determine the 01211 // block size. 01212 if(myIsBufferSynced == false) 01213 { 01214 // Since this just returns the block size, the translation of 01215 // the sequence does not matter, so just use the currently set 01216 // value. 01217 fixBuffer(myBufferSequenceTranslation); 01218 } 01219 return myRecordPtr->myBlockSize; 01220 }
| const char * SamRecord::getCigar | ( | ) |
Returns the SAM formatted CIGAR string.
Definition at line 1481 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by getFields(), and SamValidator::isValidCigar().
01482 { 01483 myStatus = SamStatus::SUCCESS; 01484 if(myCigar.Length() == 0) 01485 { 01486 // 0 Length, means that it is in the buffer, but has not yet 01487 // been synced to the string, so do the sync. 01488 parseCigarBinary(); 01489 } 01490 return myCigar.c_str(); 01491 }
| Cigar * SamRecord::getCigarInfo | ( | ) |
Returns a pointer to the Cigar object associated with this record.
The object is essentially read-only, only allowing modifications due to lazy evaluations.
Definition at line 1752 of file SamRecord.cpp.
Referenced by SamTags::createMDTag(), getSequence(), SamQuerySeqWithRefIter::reset(), and SamFilter::softClip().
01753 { 01754 // Check to see whether or not the Cigar has already been 01755 // set - this is determined by checking if alignment length 01756 // is set since alignment length and the cigar are set 01757 // at the same time. 01758 if(myAlignmentLength == -1) 01759 { 01760 // Not been set, so calculate it. 01761 parseCigar(); 01762 } 01763 return(&myCigarRoller); 01764 }
| uint16_t SamRecord::getCigarLength | ( | ) |
Get the length of the BAM formatted CIGAR.
Definition at line 1288 of file SamRecord.cpp.
References SamStatus::SUCCESS.
01289 { 01290 myStatus = SamStatus::SUCCESS; 01291 // If the cigar buffer is valid 01292 // then get the length from there. 01293 if(myIsCigarBufferValid) 01294 { 01295 return myRecordPtr->myCigarLength; 01296 } 01297 01298 if(myCigarTempBufferLength == -1) 01299 { 01300 // The cigar buffer is not valid and the cigar temp buffer is not set, 01301 // so parse the string. 01302 parseCigarString(); 01303 } 01304 01305 // The temp buffer is now set, so return the size. 01306 return(myCigarTempBufferLength); 01307 }
| double * SamRecord::getDoubleTag | ( | const char * | tag | ) |
Get the double value for the specified tag.
| tag | tag to retrieve |
Definition at line 2161 of file SamRecord.cpp.
References SamStatus::SUCCESS.
02162 { 02163 // Init to success. 02164 myStatus = SamStatus::SUCCESS; 02165 // Parse the buffer if necessary. 02166 if(myNeedToSetTagsFromBuffer) 02167 { 02168 if(!setTagsFromBuffer()) 02169 { 02170 // Failed to read the tags from the buffer, so cannot 02171 // get tags. setTagsFromBuffer set the errors, 02172 // so just return null. 02173 return(NULL); 02174 } 02175 } 02176 02177 int key = MAKEKEY(tag[0], tag[1], 'f'); 02178 int offset = extras.Find(key); 02179 02180 int value; 02181 if (offset < 0) 02182 { 02183 // Failed to find the tag. 02184 return(NULL); 02185 } 02186 else 02187 value = extras[offset]; 02188 02189 return(&(doubles[value])); 02190 }
| bool SamRecord::getFields | ( | bamRecordStruct & | recStruct, | |
| String & | readName, | |||
| String & | cigar, | |||
| String & | sequence, | |||
| String & | quality, | |||
| SequenceTranslation | translation | |||
| ) |
Returns the values of all fields except the tags using the specified sequence translation.
| recStruct | structure containing the contents of all non-variable length fields. | |
| readName | read name from the record (return param) | |
| cigar | cigar string from the record (return param) | |
| sequence | sequence string from the record (return param) | |
| quality | quality string from the record (return param) | |
| translation | type of sequence translation to use. |
Definition at line 1791 of file SamRecord.cpp.
References getCigar(), getQuality(), getReadName(), getSequence(), and SamStatus::SUCCESS.
01794 { 01795 myStatus = SamStatus::SUCCESS; 01796 if(myIsBufferSynced == false) 01797 { 01798 if(!fixBuffer(translation)) 01799 { 01800 // failed to set the buffer, return false. 01801 return(false); 01802 } 01803 } 01804 memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct)); 01805 01806 readName = getReadName(); 01807 // Check the status. 01808 if(myStatus != SamStatus::SUCCESS) 01809 { 01810 // Failed to set the fields, return false. 01811 return(false); 01812 } 01813 cigar = getCigar(); 01814 // Check the status. 01815 if(myStatus != SamStatus::SUCCESS) 01816 { 01817 // Failed to set the fields, return false. 01818 return(false); 01819 } 01820 sequence = getSequence(translation); 01821 // Check the status. 01822 if(myStatus != SamStatus::SUCCESS) 01823 { 01824 // Failed to set the fields, return false. 01825 return(false); 01826 } 01827 quality = getQuality(); 01828 // Check the status. 01829 if(myStatus != SamStatus::SUCCESS) 01830 { 01831 // Failed to set the fields, return false. 01832 return(false); 01833 } 01834 return(true); 01835 }
| bool SamRecord::getFields | ( | bamRecordStruct & | recStruct, | |
| String & | readName, | |||
| String & | cigar, | |||
| String & | sequence, | |||
| String & | quality | |||
| ) |
Returns the values of all fields except the tags.
| recStruct | structure containing the contents of all non-variable length fields. | |
| readName | read name from the record (return param) | |
| cigar | cigar string from the record (return param) | |
| sequence | sequence string from the record (return param) | |
| quality | quality string from the record (return param) |
Definition at line 1782 of file SamRecord.cpp.
01784 { 01785 return(getFields(recStruct, readName, cigar, sequence, quality, 01786 mySequenceTranslation)); 01787 }
| uint16_t SamRecord::getFlag | ( | ) |
Get the flag (FLAG).
Definition at line 1310 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by SamQuerySeqWithRefIter::getNextMatchMismatch(), and SamValidator::isValid().
01311 { 01312 myStatus = SamStatus::SUCCESS; 01313 return myRecordPtr->myFlag; 01314 }
| int32_t SamRecord::getInsertSize | ( | ) |
Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN).
Definition at line 1385 of file SamRecord.cpp.
References SamStatus::SUCCESS.
01386 { 01387 myStatus = SamStatus::SUCCESS; 01388 return myRecordPtr->myInsertSize; 01389 }
| int * SamRecord::getIntegerTag | ( | const char * | tag | ) |
Get the integer value for the specified tag.
| tag | tag to retrieve pointer to the tag's integer value if found, NULL if not found. |
Definition at line 2129 of file SamRecord.cpp.
References SamStatus::SUCCESS.
02130 { 02131 // Init to success. 02132 myStatus = SamStatus::SUCCESS; 02133 // Parse the buffer if necessary. 02134 if(myNeedToSetTagsFromBuffer) 02135 { 02136 if(!setTagsFromBuffer()) 02137 { 02138 // Failed to read the tags from the buffer, so cannot 02139 // get tags. setTagsFromBuffer set the errors, 02140 // so just return null. 02141 return(NULL); 02142 } 02143 } 02144 02145 int key = MAKEKEY(tag[0], tag[1], 'i'); 02146 int offset = extras.Find(key); 02147 02148 int value; 02149 if (offset < 0) 02150 { 02151 // Failed to find the tag. 02152 return(NULL); 02153 } 02154 else 02155 value = extras[offset]; 02156 02157 return(&(integers[value])); 02158 }
| uint8_t SamRecord::getMapQuality | ( | ) |
Get the mapping quality (MAPQ) of the record.
Definition at line 1266 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by SamValidator::isValid().
01267 { 01268 myStatus = SamStatus::SUCCESS; 01269 return myRecordPtr->myMapQuality; 01270 }
| int32_t SamRecord::getMateReferenceID | ( | ) |
Get the mate reference id of the record (BAM format: mate_rid/next_refID).
Definition at line 1364 of file SamRecord.cpp.
References SamStatus::SUCCESS.
01365 { 01366 myStatus = SamStatus::SUCCESS; 01367 return myRecordPtr->myMateReferenceID; 01368 }
| const char * SamRecord::getMateReferenceName | ( | ) |
Get the mate/next fragment's reference sequence name (RNEXT).
If it is equal to the reference name, it still returns the reference name.
Definition at line 1336 of file SamRecord.cpp.
References SamStatus::SUCCESS.
01337 { 01338 myStatus = SamStatus::SUCCESS; 01339 return myMateReferenceName.c_str(); 01340 }
| const char * SamRecord::getMateReferenceNameOrEqual | ( | ) |
Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the reference name, unless they are both "*" in which case "*" is returned.
Definition at line 1346 of file SamRecord.cpp.
References getReferenceName(), and SamStatus::SUCCESS.
01347 { 01348 myStatus = SamStatus::SUCCESS; 01349 if(myMateReferenceName == "*") 01350 { 01351 return(myMateReferenceName); 01352 } 01353 if(myMateReferenceName == getReferenceName()) 01354 { 01355 return(FIELD_ABSENT_STRING); 01356 } 01357 else 01358 { 01359 return(myMateReferenceName); 01360 } 01361 }
| bool SamRecord::getNextSamTag | ( | char * | tag, | |
| char & | vtype, | |||
| void ** | value | |||
| ) |
Get the next tag from the record.
Sets the Status to SUCCESS when a tag is successfully returned or when there are no more tags. Otherwise the status is set to describe why it failed (parsing, etc).
| tag | set to the tag when a tag is read. | |
| vtype | set to the vtype when a tag is read. | |
| value | pointer to the value of the tag (will need to cast to int, double, char, or string based on vtype). |
Definition at line 1878 of file SamRecord.cpp.
References SamStatus::FAIL_PARSE, SamStatus::setStatus(), and SamStatus::SUCCESS.
01879 { 01880 myStatus = SamStatus::SUCCESS; 01881 if(myNeedToSetTagsFromBuffer) 01882 { 01883 if(!setTagsFromBuffer()) 01884 { 01885 // Failed to read the tags from the buffer, so cannot 01886 // get tags. 01887 return(false); 01888 } 01889 } 01890 01891 // Increment the tag index to start looking at the next tag. 01892 // At the beginning, it is set to -1. 01893 myLastTagIndex++; 01894 int maxTagIndex = extras.Capacity(); 01895 if(myLastTagIndex >= maxTagIndex) 01896 { 01897 // Hit the end of the tags, return false, no more tags. 01898 // Status is still success since this is not an error, 01899 // it is just the end of the list. 01900 return(false); 01901 } 01902 01903 bool tagFound = false; 01904 // Loop until a tag is found or the end of extras is hit. 01905 while((tagFound == false) && (myLastTagIndex < maxTagIndex)) 01906 { 01907 if(extras.SlotInUse(myLastTagIndex)) 01908 { 01909 // Found a slot to use. 01910 int key = extras.GetKey(myLastTagIndex); 01911 getTag(key, tag); 01912 getTypeFromKey(key, vtype); 01913 tagFound = true; 01914 // Get the value associated with the key based on the vtype. 01915 switch (vtype) 01916 { 01917 case 'f' : 01918 *value = getDoublePtr(myLastTagIndex); 01919 break; 01920 case 'i' : 01921 *value = getIntegerPtr(myLastTagIndex, vtype); 01922 if(vtype != 'A') 01923 { 01924 // Convert all int types to 'i' 01925 vtype = 'i'; 01926 } 01927 break; 01928 case 'Z' : 01929 *value = getStringPtr(myLastTagIndex); 01930 break; 01931 default: 01932 myStatus.setStatus(SamStatus::FAIL_PARSE, 01933 "Unknown tag type"); 01934 tagFound = false; 01935 break; 01936 } 01937 } 01938 if(!tagFound) 01939 { 01940 // Increment the index since a tag was not found. 01941 myLastTagIndex++; 01942 } 01943 } 01944 return(tagFound); 01945 }
| uint32_t SamRecord::getNumOverlaps | ( | int32_t | start, | |
| int32_t | end | |||
| ) |
Return the number of bases in this read that overlap the passed in region.
Matches & mismatches between the read and the reference are counted as overlaps, but insertions, deletions, skips, clips, and pads are not counted.
| start | inclusive 0-based start position (reference position) of the region to check for overlaps in. (-1 indicates to start at the beginning of the reference.) | |
| end | exclusive 0-based end position (reference position) of the region to check for overlaps in. (-1 indicates to go to the end of the reference.) |
Definition at line 1769 of file SamRecord.cpp.
References get0BasedPosition(), and Cigar::getNumOverlaps().
Referenced by SamFile::GetNumOverlaps().
01770 { 01771 // Determine whether or not the cigar has been parsed, which sets up 01772 // the cigar roller. This is determined by checking the alignment length. 01773 if(myAlignmentLength == -1) 01774 { 01775 parseCigar(); 01776 } 01777 return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition())); 01778 }
| char SamRecord::getQuality | ( | int | index | ) |
Get the quality character at the specified index into the quality 0 to readLength - 1.
Throws an exception if index is out of range.
| index | index into the quality string (0 to readLength-1). |
Definition at line 1705 of file SamRecord.cpp.
References getReadLength(), and BaseUtilities::UNKNOWN_QUALITY_CHAR.
01706 { 01707 // Determine the read length. 01708 int32_t readLen = getReadLength(); 01709 01710 // If the read length is 0, return ' ' whose ascii code is below 01711 // the minimum ascii code for qualities. 01712 if(readLen == 0) 01713 { 01714 return(BaseUtilities::UNKNOWN_QUALITY_CHAR); 01715 } 01716 else if((index < 0) || (index >= readLen)) 01717 { 01718 // Only get here if the index was out of range, so thow an exception. 01719 String exceptionString = "SamRecord::getQuality("; 01720 exceptionString += index; 01721 exceptionString += ") is out of range. Index must be between 0 and "; 01722 exceptionString += (readLen - 1); 01723 throw std::runtime_error(exceptionString.c_str()); 01724 } 01725 01726 if(myQuality.Length() == 0) 01727 { 01728 // Parse BAM Quality. 01729 unsigned char * packedQuality = 01730 (unsigned char *)myRecordPtr->myData + 01731 myRecordPtr->myReadNameLength + 01732 myRecordPtr->myCigarLength * sizeof(int) + 01733 (myRecordPtr->myReadLength + 1) / 2; 01734 return(packedQuality[index] + 33); 01735 } 01736 else 01737 { 01738 // Already have string. 01739 if((myQuality.Length() == 1) && (myQuality[0] == '*')) 01740 { 01741 // Return the unknown quality character. 01742 return(BaseUtilities::UNKNOWN_QUALITY_CHAR); 01743 } 01744 else 01745 { 01746 return(myQuality[index]); 01747 } 01748 } 01749 }
| const char * SamRecord::getQuality | ( | ) |
Returns the SAM formatted quality string (QUAL).
Definition at line 1564 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by getFields(), and SamValidator::isValidQuality().
01565 { 01566 myStatus = SamStatus::SUCCESS; 01567 if(myQuality.Length() == 0) 01568 { 01569 // 0 Length, means that it is in the buffer, but has not yet 01570 // been synced to the string, so do the sync. 01571 setSequenceAndQualityFromBuffer(); 01572 } 01573 return myQuality.c_str(); 01574 }
| int32_t SamRecord::getReadLength | ( | ) |
Get the length of the read.
Definition at line 1317 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by SamQuerySeqWithRefIter::getNextMatchMismatch(), getQuality(), getSequence(), SamValidator::isValidCigar(), SamValidator::isValidQuality(), and SamQuerySeqWithRefIter::reset().
01318 { 01319 myStatus = SamStatus::SUCCESS; 01320 if(myIsSequenceBufferValid == false) 01321 { 01322 // If the sequence is "*", then return 0. 01323 if((mySequence.Length() == 1) && (mySequence[0] == '*')) 01324 { 01325 return(0); 01326 } 01327 // Do not add 1 since it is not null terminated. 01328 return(mySequence.Length()); 01329 } 01330 return(myRecordPtr->myReadLength); 01331 }
| const char * SamRecord::getReadName | ( | ) |
Returns the SAM formatted Read Name (QNAME).
Definition at line 1468 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by getFields(), SamValidator::isValid(), and SamFile::validateSortOrder().
01469 { 01470 myStatus = SamStatus::SUCCESS; 01471 if(myReadName.Length() == 0) 01472 { 01473 // 0 Length, means that it is in the buffer, but has not yet 01474 // been synced to the string, so do the sync. 01475 myReadName = (char*)&(myRecordPtr->myData); 01476 } 01477 return myReadName.c_str(); 01478 }
| uint8_t SamRecord::getReadNameLength | ( | ) |
Get the length of the readname (QNAME) including the null.
Definition at line 1252 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by SamValidator::isValid().
01253 { 01254 myStatus = SamStatus::SUCCESS; 01255 // If the buffer is valid, return the size from there, otherwise get the 01256 // size from the string length + 1 (ending null). 01257 if(myIsReadNameBufferValid) 01258 { 01259 return(myRecordPtr->myReadNameLength); 01260 } 01261 01262 return(myReadName.Length() + 1); 01263 }
| const void * SamRecord::getRecordBuffer | ( | SequenceTranslation | translation | ) |
Get a const pointer to the buffer that contains the BAM representation of the record using the specified translation on the sequence.
| translation | type of sequence translation to use. |
Definition at line 1137 of file SamRecord.cpp.
References SamStatus::SUCCESS.
01138 { 01139 myStatus = SamStatus::SUCCESS; 01140 bool status = true; 01141 // If the buffer is not synced or the sequence in the buffer is not 01142 // properly translated, fix the buffer. 01143 if((myIsBufferSynced == false) || 01144 (myBufferSequenceTranslation != translation)) 01145 { 01146 status &= fixBuffer(translation); 01147 } 01148 // If the buffer is synced, check to see if the tags need to be synced. 01149 if(myNeedToSetTagsInBuffer) 01150 { 01151 status &= setTagsInBuffer(); 01152 } 01153 if(!status) 01154 { 01155 return(NULL); 01156 } 01157 return (const void *)myRecordPtr; 01158 }
| const void * SamRecord::getRecordBuffer | ( | ) |
Get a const pointer to the buffer that contains the BAM representation of the record.
Definition at line 1130 of file SamRecord.cpp.
01131 { 01132 return(getRecordBuffer(mySequenceTranslation)); 01133 }
| GenomeSequence * SamRecord::getReference | ( | ) |
Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it was not set).
Definition at line 1839 of file SamRecord.cpp.
Referenced by SamValidator::isValidTags().
| int32_t SamRecord::getReferenceID | ( | ) |
Get the reference sequence id of the record (BAM format rid).
Definition at line 1231 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by SamValidator::isValid(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignment(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignmentRegion(), SamFile::readIndexedRecord(), and SamFile::validateSortOrder().
01232 { 01233 myStatus = SamStatus::SUCCESS; 01234 return myRecordPtr->myReferenceID; 01235 }
| const char * SamRecord::getReferenceName | ( | ) |
Get the reference sequence name (RNAME) of the record.
Definition at line 1224 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by SamTags::createMDTag(), getMateReferenceNameOrEqual(), getSequence(), SamValidator::isValid(), and SamQuerySeqWithRefIter::reset().
01225 { 01226 myStatus = SamStatus::SUCCESS; 01227 return myReferenceName.c_str(); 01228 }
| char SamRecord::getSequence | ( | int | index, | |
| SequenceTranslation | translation | |||
| ) |
Get the sequence base at the specified index into this sequence 0 to readLength - 1 performing the specified sequence translation.
Throws an exception if index is out of range.
| index | index into the sequence string (0 to readLength-1). | |
| translation | type of sequence translation to use. |
Definition at line 1583 of file SamRecord.cpp.
References EQUAL, getCigarInfo(), getReadLength(), getReferenceName(), NONE, SamQuerySeqWithRef::seqWithEquals(), and SamQuerySeqWithRef::seqWithoutEquals().
01584 { 01585 static const char * asciiBases = "=AC.G...T......N"; 01586 01587 // Determine the read length. 01588 int32_t readLen = getReadLength(); 01589 01590 // If the read length is 0, this method should not be called. 01591 if(readLen == 0) 01592 { 01593 String exceptionString = "SamRecord::getSequence("; 01594 exceptionString += index; 01595 exceptionString += ") is not allowed since sequence = '*'"; 01596 throw std::runtime_error(exceptionString.c_str()); 01597 } 01598 else if((index < 0) || (index >= readLen)) 01599 { 01600 // Only get here if the index was out of range, so thow an exception. 01601 String exceptionString = "SamRecord::getSequence("; 01602 exceptionString += index; 01603 exceptionString += ") is out of range. Index must be between 0 and "; 01604 exceptionString += (readLen - 1); 01605 throw std::runtime_error(exceptionString.c_str()); 01606 } 01607 01608 // Determine if translation needs to be done. 01609 if((translation == NONE) || (myRefPtr == NULL)) 01610 { 01611 // No translation needs to be done. 01612 if(mySequence.Length() == 0) 01613 { 01614 // Parse BAM sequence. 01615 // TODO - maybe store this pointer - and use that to track when 01616 // valid? 01617 unsigned char * packedSequence = 01618 (unsigned char *)myRecordPtr->myData + 01619 myRecordPtr->myReadNameLength + 01620 myRecordPtr->myCigarLength * sizeof(int); 01621 01622 return(index & 1 ? 01623 asciiBases[packedSequence[index / 2] & 0xF] : 01624 asciiBases[packedSequence[index / 2] >> 4]); 01625 } 01626 // Already have string. 01627 return(mySequence[index]); 01628 } 01629 else 01630 { 01631 // Need to translate the sequence either to have '=' or to not 01632 // have it. 01633 // First check to see if the sequence has been set. 01634 if(mySequence.Length() == 0) 01635 { 01636 // 0 Length, means that it is in the buffer, but has not yet 01637 // been synced to the string, so do the sync. 01638 setSequenceAndQualityFromBuffer(); 01639 } 01640 01641 // Check the type of translation. 01642 if(translation == EQUAL) 01643 { 01644 // Check whether or not the string has already been 01645 // retrieved that has the '=' in it. 01646 if(mySeqWithEq.length() == 0) 01647 { 01648 // The string with '=' has not yet been determined, 01649 // so get the string. 01650 // Check to see if the sequence is defined. 01651 if(mySequence == "*") 01652 { 01653 // Sequence is undefined, so no translation necessary. 01654 mySeqWithEq = '*'; 01655 } 01656 else 01657 { 01658 // Sequence defined, so translate it. 01659 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(), 01660 myRecordPtr->myPosition, 01661 *(getCigarInfo()), 01662 getReferenceName(), 01663 *myRefPtr, 01664 mySeqWithEq); 01665 } 01666 } 01667 // Sequence is set, so return it. 01668 return(mySeqWithEq[index]); 01669 } 01670 else 01671 { 01672 // translation == BASES 01673 // Check whether or not the string has already been 01674 // retrieved that does not have the '=' in it. 01675 if(mySeqWithoutEq.length() == 0) 01676 { 01677 // The string with '=' has not yet been determined, 01678 // so get the string. 01679 // Check to see if the sequence is defined. 01680 if(mySequence == "*") 01681 { 01682 // Sequence is undefined, so no translation necessary. 01683 mySeqWithoutEq = '*'; 01684 } 01685 else 01686 { 01687 // Sequence defined, so translate it. 01688 // The string without '=' has not yet been determined, 01689 // so get the string. 01690 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(), 01691 myRecordPtr->myPosition, 01692 *(getCigarInfo()), 01693 getReferenceName(), 01694 *myRefPtr, 01695 mySeqWithoutEq); 01696 } 01697 } 01698 // Sequence is set, so return it. 01699 return(mySeqWithoutEq[index]); 01700 } 01701 } 01702 }
| char SamRecord::getSequence | ( | int | index | ) |
Get the sequence base at the specified index into this sequence 0 to readLength - 1, translating the base as specified by setSequenceTranslation.
Throws an exception if index is out of range.
| index | index into the sequence string (0 to readLength-1). |
Definition at line 1577 of file SamRecord.cpp.
References getSequence().
01578 { 01579 return(getSequence(index, mySequenceTranslation)); 01580 }
| const char * SamRecord::getSequence | ( | SequenceTranslation | translation | ) |
Returns the SAM formatted sequence string (SEQ) performing the specified sequence translation.
| translation | type of sequence translation to use. |
Definition at line 1500 of file SamRecord.cpp.
References EQUAL, getCigarInfo(), getReferenceName(), NONE, SamQuerySeqWithRef::seqWithEquals(), SamQuerySeqWithRef::seqWithoutEquals(), and SamStatus::SUCCESS.
01501 { 01502 myStatus = SamStatus::SUCCESS; 01503 if(mySequence.Length() == 0) 01504 { 01505 // 0 Length, means that it is in the buffer, but has not yet 01506 // been synced to the string, so do the sync. 01507 setSequenceAndQualityFromBuffer(); 01508 } 01509 01510 // Determine if translation needs to be done. 01511 if((translation == NONE) || (myRefPtr == NULL)) 01512 { 01513 return mySequence.c_str(); 01514 } 01515 else if(translation == EQUAL) 01516 { 01517 if(mySeqWithEq.length() == 0) 01518 { 01519 // Check to see if the sequence is defined. 01520 if(mySequence == "*") 01521 { 01522 // Sequence is undefined, so no translation necessary. 01523 mySeqWithEq = '*'; 01524 } 01525 else 01526 { 01527 // Sequence defined, so translate it. 01528 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(), 01529 myRecordPtr->myPosition, 01530 *(getCigarInfo()), 01531 getReferenceName(), 01532 *myRefPtr, 01533 mySeqWithEq); 01534 } 01535 } 01536 return(mySeqWithEq.c_str()); 01537 } 01538 else 01539 { 01540 // translation == BASES 01541 if(mySeqWithoutEq.length() == 0) 01542 { 01543 if(mySequence == "*") 01544 { 01545 // Sequence is undefined, so no translation necessary. 01546 mySeqWithoutEq = '*'; 01547 } 01548 else 01549 { 01550 // Sequence defined, so translate it. 01551 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(), 01552 myRecordPtr->myPosition, 01553 *(getCigarInfo()), 01554 getReferenceName(), 01555 *myRefPtr, 01556 mySeqWithoutEq); 01557 } 01558 } 01559 return(mySeqWithoutEq.c_str()); 01560 } 01561 }
| const char * SamRecord::getSequence | ( | ) |
Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTranslation.
Definition at line 1494 of file SamRecord.cpp.
Referenced by SamTags::createMDTag(), getFields(), SamQuerySeqWithRefIter::getNextMatchMismatch(), getSequence(), and shiftIndelsLeft().
01495 { 01496 return(getSequence(mySequenceTranslation)); 01497 }
| const SamStatus & SamRecord::getStatus | ( | ) |
Returns the status associated with the last method that sets the status.
Definition at line 2307 of file SamRecord.cpp.
| String * SamRecord::getStringTag | ( | const char * | tag | ) |
Get the string value for the specified tag.
| tag | tag to retrieve | |
| pointer | to the tag's string value if found, NULL if not found. |
Definition at line 2099 of file SamRecord.cpp.
Referenced by SamTags::isMDTagCorrect(), and SamValidator::isValidTags().
02100 { 02101 // Parse the buffer if necessary. 02102 if(myNeedToSetTagsFromBuffer) 02103 { 02104 if(!setTagsFromBuffer()) 02105 { 02106 // Failed to read the tags from the buffer, so cannot 02107 // get tags. setTagsFromBuffer set the errors, 02108 // so just return null. 02109 return(NULL); 02110 } 02111 } 02112 02113 int key = MAKEKEY(tag[0], tag[1], 'Z'); 02114 int offset = extras.Find(key); 02115 02116 int value; 02117 if (offset < 0) 02118 { 02119 // Tag not found. 02120 return(NULL); 02121 } 02122 02123 // Offset is valid, so return the tag. 02124 value = extras[offset]; 02125 return(&(strings[value])); 02126 }
| uint32_t SamRecord::getTagLength | ( | ) |
Returns the length of the BAM formatted tags.
Definition at line 1845 of file SamRecord.cpp.
References SamStatus::SUCCESS.
01846 { 01847 myStatus = SamStatus::SUCCESS; 01848 if(myNeedToSetTagsFromBuffer) 01849 { 01850 // Tags are only set in the buffer, so the size of the tags is 01851 // the length of the record minus the starting location of the tags. 01852 unsigned char * tagStart = 01853 (unsigned char *)myRecordPtr->myData 01854 + myRecordPtr->myReadNameLength 01855 + myRecordPtr->myCigarLength * sizeof(int) 01856 + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength; 01857 01858 // The non-tags take up from the start of the record to the tag start. 01859 // Do not include the block size part of the record since it is not 01860 // included in the size. 01861 uint32_t nonTagSize = 01862 tagStart - (unsigned char*)&(myRecordPtr->myReferenceID); 01863 // Tags take up the size of the block minus the non-tag section. 01864 uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize; 01865 return(tagSize); 01866 } 01867 01868 // Tags are stored outside the buffer, so myTagBufferSize is set. 01869 return(myTagBufferSize); 01870 }
| bool SamRecord::getTagsString | ( | const char * | tags, | |
| String & | returnString, | |||
| char | delim = '\t' | |||
| ) |
Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE.
.. Sets the Status to SUCCESS when the tags are successfully returned or the tags were not found. If a different error occured, the status is set appropriately.
| tags | the tags to retrieve, formatted as TAG:TYPE;TAG:TYPE... | |
| returnString | the String to set (this method first clears returnString) to TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE... | |
| delim | delimiter to use to separate two tags, default is a tab. |
Definition at line 1997 of file SamRecord.cpp.
References SamStatus::INVALID, SamStatus::setStatus(), and SamStatus::SUCCESS.
01998 { 01999 const char* currentTagPtr = tags; 02000 02001 returnString.Clear(); 02002 myStatus = SamStatus::SUCCESS; 02003 if(myNeedToSetTagsFromBuffer) 02004 { 02005 if(!setTagsFromBuffer()) 02006 { 02007 // Failed to read the tags from the buffer, so cannot 02008 // get tags. 02009 return(false); 02010 } 02011 } 02012 02013 bool returnStatus = true; 02014 02015 while(*currentTagPtr != '\0') 02016 { 02017 // Tags are formatted as: XY:Z 02018 // Where X is [A-Za-z], Y is [A-Za-z], and 02019 // Z is A,i,f,Z,H (cCsSI are also excepted) 02020 if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') || 02021 (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0')) 02022 { 02023 myStatus.setStatus(SamStatus::INVALID, 02024 "getTagsString called with improperly formatted tags.\n"); 02025 returnStatus = false; 02026 break; 02027 } 02028 02029 // Construct the key. 02030 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1], 02031 currentTagPtr[3]); 02032 // Look to see if the key exsists in the hash. 02033 int offset = extras.Find(key); 02034 02035 if(offset >= 0) 02036 { 02037 // Offset is set, so the key was found. 02038 if(!returnString.IsEmpty()) 02039 { 02040 returnString += delim; 02041 } 02042 returnString += currentTagPtr[0]; 02043 returnString += currentTagPtr[1]; 02044 returnString += ':'; 02045 returnString += currentTagPtr[3]; 02046 returnString += ':'; 02047 02048 // First if it is an integer, determine the actual type of the int. 02049 char vtype; 02050 getTypeFromKey(key, vtype); 02051 02052 02053 // Offset is set, so recalculate the buffer size without this entry. 02054 // Do NOT remove from strings, integers, or doubles because then 02055 // extras would need to be updated for all entries with the new indexes 02056 // into those variables. 02057 switch(vtype) 02058 { 02059 case 'i': 02060 returnString += *(int*)getIntegerPtr(offset, vtype); 02061 break; 02062 case 'f': 02063 returnString += *(double*)getDoublePtr(offset); 02064 break; 02065 case 'Z': 02066 returnString += *(String*)getStringPtr(offset); 02067 break; 02068 default: 02069 myStatus.setStatus(SamStatus::INVALID, 02070 "rmTag called with unknown type.\n"); 02071 returnStatus = false; 02072 break; 02073 }; 02074 } 02075 // Increment to the next tag. 02076 if(currentTagPtr[4] == ';') 02077 { 02078 // Increment once more. 02079 currentTagPtr += 5; 02080 } 02081 else if(currentTagPtr[4] != '\0') 02082 { 02083 // Invalid tag format. 02084 myStatus.setStatus(SamStatus::INVALID, 02085 "rmTags called with improperly formatted tags.\n"); 02086 returnStatus = false; 02087 break; 02088 } 02089 else 02090 { 02091 // Last Tag. 02092 currentTagPtr += 4; 02093 } 02094 } 02095 return(returnStatus); 02096 }
| bool SamRecord::isCharType | ( | char | vtype | ) | const |
Returns whether or not the specified vtype is a char type.
Does not set SamStatus.
| vtype | value type to check. |
Definition at line 1977 of file SamRecord.cpp.
| bool SamRecord::isDoubleType | ( | char | vtype | ) | const |
Returns whether or not the specified vtype is a double type.
Does not set SamStatus.
| vtype | value type to check. |
Definition at line 1967 of file SamRecord.cpp.
| bool SamRecord::isIntegerType | ( | char | vtype | ) | const |
Returns whether or not the specified vtype is an integer type.
Does not set SamStatus.
| vtype | value type to check. |
Definition at line 1955 of file SamRecord.cpp.
| bool SamRecord::isStringType | ( | char | vtype | ) | const |
Returns whether or not the specified vtype is a string type.
Does not set SamStatus.
| vtype | value type to check. |
Definition at line 1987 of file SamRecord.cpp.
| bool SamRecord::isValid | ( | SamFileHeader & | header | ) |
Returns whether or not the record is valid, setting the status to indicate success or failure.
| header | SAM Header associated with the record. Used to perform some validation against the header. |
Definition at line 157 of file SamRecord.cpp.
References SamValidationErrors::getErrorString(), SamStatus::INVALID, SamValidator::isValid(), SamStatus::setStatus(), and SamStatus::SUCCESS.
00158 { 00159 myStatus = SamStatus::SUCCESS; 00160 SamValidationErrors invalidSamErrors; 00161 if(!SamValidator::isValid(header, *this, invalidSamErrors)) 00162 { 00163 // The record is not valid. 00164 std::string errorMessage = ""; 00165 invalidSamErrors.getErrorString(errorMessage); 00166 myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str()); 00167 return(false); 00168 } 00169 // The record is valid. 00170 return(true); 00171 }
| void SamRecord::resetRecord | ( | ) |
Reset the fields of the record to a default value.
This is not necessary when you are reading a SAM/BAM file, but if you are setting fields, it is a good idea to clean out a record before reusing it. Clearing it allows you to not have to set any empty fields.
Definition at line 91 of file SamRecord.cpp.
References clearTags(), NONE, and SamStatus::SUCCESS.
Referenced by SamRecord(), setBuffer(), setBufferFromFile(), and ~SamRecord().
00092 { 00093 myIsBufferSynced = true; 00094 00095 myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE; 00096 myRecordPtr->myReferenceID = -1; 00097 myRecordPtr->myPosition = -1; 00098 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH; 00099 myRecordPtr->myMapQuality = 0; 00100 myRecordPtr->myBin = DEFAULT_BIN; 00101 myRecordPtr->myCigarLength = 0; 00102 myRecordPtr->myFlag = 0; 00103 myRecordPtr->myReadLength = 0; 00104 myRecordPtr->myMateReferenceID = -1; 00105 myRecordPtr->myMatePosition = -1; 00106 myRecordPtr->myInsertSize = 0; 00107 00108 // Set the sam values for the variable length fields. 00109 // TODO - one way to speed this up might be to not set to "*" and just 00110 // clear them, and write out a '*' for SAM if it is empty. 00111 myReadName = DEFAULT_READ_NAME; 00112 myReferenceName = "*"; 00113 myMateReferenceName = "*"; 00114 myCigar = "*"; 00115 mySequence = "*"; 00116 mySeqWithEq.clear(); 00117 mySeqWithoutEq.clear(); 00118 myQuality = "*"; 00119 myNeedToSetTagsFromBuffer = false; 00120 myNeedToSetTagsInBuffer = false; 00121 00122 // Initialize the calculated alignment info to the uncalculated value. 00123 myAlignmentLength = -1; 00124 myUnclippedStartOffset = -1; 00125 myUnclippedEndOffset = -1; 00126 00127 clearTags(); 00128 00129 // Set the bam values for the variable length fields. 00130 // Only the read name needs to be set, the others are a length of 0. 00131 // Set the read name. The min size of myRecordPtr includes the size for 00132 // the default read name. 00133 memcpy(&(myRecordPtr->myData), myReadName.c_str(), 00134 myRecordPtr->myReadNameLength); 00135 00136 // Set that the variable length buffer fields are valid. 00137 myIsReadNameBufferValid = true; 00138 myIsCigarBufferValid = true; 00139 myIsSequenceBufferValid = true; 00140 myBufferSequenceTranslation = NONE; 00141 myIsQualityBufferValid = true; 00142 myIsTagsBufferValid = true; 00143 myIsBinValid = true; 00144 00145 myCigarTempBufferLength = -1; 00146 00147 myStatus = SamStatus::SUCCESS; 00148 00149 NOT_FOUND_TAG_STRING = ""; 00150 NOT_FOUND_TAG_INT = -1; 00151 NOT_FOUND_TAG_DOUBLE = -1; 00152 }
| bool SamRecord::rmTag | ( | const char * | tag, | |
| char | type | |||
| ) |
Remove a tag.
| tag | tag to remove. | |
| type | of the tag to be removed. |
Definition at line 924 of file SamRecord.cpp.
References getString(), SamStatus::INVALID, SamStatus::setStatus(), and SamStatus::SUCCESS.
00925 { 00926 // Check the length of tag. 00927 if(strlen(tag) != 2) 00928 { 00929 // Tag is the wrong length. 00930 myStatus.setStatus(SamStatus::INVALID, 00931 "rmTag called with tag that is not 2 characters\n"); 00932 return(false); 00933 } 00934 00935 myStatus = SamStatus::SUCCESS; 00936 if(myNeedToSetTagsFromBuffer) 00937 { 00938 if(!setTagsFromBuffer()) 00939 { 00940 // Failed to read the tags from the buffer, so cannot 00941 // get tags. 00942 return(false); 00943 } 00944 } 00945 00946 // Construct the key. 00947 int key = MAKEKEY(tag[0], tag[1], type); 00948 // Look to see if the key exsists in the hash. 00949 int offset = extras.Find(key); 00950 00951 if(offset < 0) 00952 { 00953 // Not found, so return true, successfully removed since 00954 // it is not in tag. 00955 return(true); 00956 } 00957 00958 // Offset is set, so the key was found. 00959 // First if it is an integer, determine the actual type of the int. 00960 char vtype; 00961 getTypeFromKey(key, vtype); 00962 if(vtype == 'i') 00963 { 00964 vtype = getIntegerType(offset); 00965 } 00966 00967 // Offset is set, so recalculate the buffer size without this entry. 00968 // Do NOT remove from strings, integers, or doubles because then 00969 // extras would need to be updated for all entries with the new indexes 00970 // into those variables. 00971 int rmBuffSize = 0; 00972 switch(vtype) 00973 { 00974 case 'A': 00975 case 'c': 00976 case 'C': 00977 rmBuffSize = 4; 00978 break; 00979 case 's': 00980 case 'S': 00981 rmBuffSize = 5; 00982 break; 00983 case 'i': 00984 case 'I': 00985 rmBuffSize = 7; 00986 break; 00987 case 'f': 00988 rmBuffSize = 7; 00989 break; 00990 case 'Z': 00991 rmBuffSize = 4 + getString(offset).Length(); 00992 break; 00993 default: 00994 myStatus.setStatus(SamStatus::INVALID, 00995 "rmTag called with unknown type.\n"); 00996 return(false); 00997 break; 00998 }; 00999 01000 // The buffer tags are now out of sync. 01001 myNeedToSetTagsInBuffer = true; 01002 myIsTagsBufferValid = false; 01003 myIsBufferSynced = false; 01004 myTagBufferSize -= rmBuffSize; 01005 01006 // Remove from the hash. 01007 extras.Delete(offset); 01008 return(true); 01009 }
| bool SamRecord::rmTags | ( | const char * | tags | ) |
Remove tags.
| tags | tags to remove, formatted as Tag:Type;Tag:Type;Tag:Type... |
Definition at line 1012 of file SamRecord.cpp.
References getString(), SamStatus::INVALID, SamStatus::setStatus(), and SamStatus::SUCCESS.
01013 { 01014 const char* currentTagPtr = tags; 01015 01016 myStatus = SamStatus::SUCCESS; 01017 if(myNeedToSetTagsFromBuffer) 01018 { 01019 if(!setTagsFromBuffer()) 01020 { 01021 // Failed to read the tags from the buffer, so cannot 01022 // get tags. 01023 return(false); 01024 } 01025 } 01026 01027 bool returnStatus = true; 01028 01029 int rmBuffSize = 0; 01030 while(*currentTagPtr != '\0') 01031 { 01032 01033 // Tags are formatted as: XY:Z 01034 // Where X is [A-Za-z], Y is [A-Za-z], and 01035 // Z is A,i,f,Z,H (cCsSI are also excepted) 01036 if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') || 01037 (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0')) 01038 { 01039 myStatus.setStatus(SamStatus::INVALID, 01040 "rmTags called with improperly formatted tags.\n"); 01041 returnStatus = false; 01042 break; 01043 } 01044 01045 // Construct the key. 01046 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1], 01047 currentTagPtr[3]); 01048 // Look to see if the key exsists in the hash. 01049 int offset = extras.Find(key); 01050 01051 if(offset >= 0) 01052 { 01053 // Offset is set, so the key was found. 01054 // First if it is an integer, determine the actual type of the int. 01055 char vtype; 01056 getTypeFromKey(key, vtype); 01057 if(vtype == 'i') 01058 { 01059 vtype = getIntegerType(offset); 01060 } 01061 01062 // Offset is set, so recalculate the buffer size without this entry. 01063 // Do NOT remove from strings, integers, or doubles because then 01064 // extras would need to be updated for all entries with the new indexes 01065 // into those variables. 01066 switch(vtype) 01067 { 01068 case 'A': 01069 case 'c': 01070 case 'C': 01071 rmBuffSize += 4; 01072 break; 01073 case 's': 01074 case 'S': 01075 rmBuffSize += 5; 01076 break; 01077 case 'i': 01078 case 'I': 01079 rmBuffSize += 7; 01080 break; 01081 case 'f': 01082 rmBuffSize += 7; 01083 break; 01084 case 'Z': 01085 rmBuffSize += 4 + getString(offset).Length(); 01086 break; 01087 default: 01088 myStatus.setStatus(SamStatus::INVALID, 01089 "rmTag called with unknown type.\n"); 01090 returnStatus = false; 01091 break; 01092 }; 01093 01094 // Remove from the hash. 01095 extras.Delete(offset); 01096 } 01097 // Increment to the next tag. 01098 if(currentTagPtr[4] == ';') 01099 { 01100 // Increment once more. 01101 currentTagPtr += 5; 01102 } 01103 else if(currentTagPtr[4] != '\0') 01104 { 01105 // Invalid tag format. 01106 myStatus.setStatus(SamStatus::INVALID, 01107 "rmTags called with improperly formatted tags.\n"); 01108 returnStatus = false; 01109 break; 01110 } 01111 else 01112 { 01113 // Last Tag. 01114 currentTagPtr += 4; 01115 } 01116 } 01117 01118 // The buffer tags are now out of sync. 01119 myNeedToSetTagsInBuffer = true; 01120 myIsTagsBufferValid = false; 01121 myIsBufferSynced = false; 01122 myTagBufferSize -= rmBuffSize; 01123 01124 01125 return(returnStatus); 01126 }
| bool SamRecord::set0BasedMatePosition | ( | int32_t | matePosition | ) |
Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
| position | 0-based start position |
Definition at line 324 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by set1BasedMatePosition().
00325 { 00326 myStatus = SamStatus::SUCCESS; 00327 myRecordPtr->myMatePosition = matePosition; 00328 return true; 00329 }
| bool SamRecord::set0BasedPosition | ( | int32_t | position | ) |
Set the leftmost position using the specified 0-based (BAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
| position | 0-based start position |
Definition at line 238 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by set1BasedPosition(), and SamFilter::softClip().
00239 { 00240 myStatus = SamStatus::SUCCESS; 00241 myRecordPtr->myPosition = position; 00242 myIsBinValid = false; 00243 return true; 00244 }
| bool SamRecord::set1BasedMatePosition | ( | int32_t | matePosition | ) |
Set the mate/next fragment's leftmost position (PNEXT) using the specified 1-based (SAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
| position | 1-based start position |
Definition at line 318 of file SamRecord.cpp.
References set0BasedMatePosition().
00319 { 00320 return(set0BasedMatePosition(matePosition - 1)); 00321 }
| bool SamRecord::set1BasedPosition | ( | int32_t | position | ) |
Set the leftmost position (POS) using the specified 1-based (SAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
| position | 1-based start position |
Definition at line 232 of file SamRecord.cpp.
References set0BasedPosition().
00233 { 00234 return(set0BasedPosition(position - 1)); 00235 }
| SamStatus::Status SamRecord::setBuffer | ( | const char * | fromBuffer, | |
| uint32_t | fromBufferSize, | |||
| SamFileHeader & | header | |||
| ) |
Sets the SamRecord to contain the information in the BAM formatted fromBuffer.
| fromBuffer | buffer to read the BAM record from. | |
| fromBufferSize | size of the buffer containing the BAM record. | |
| header | BAM header for the record. |
Definition at line 521 of file SamRecord.cpp.
References SamStatus::FAIL_MEM, SamStatus::FAIL_PARSE, resetRecord(), SamStatus::setStatus(), and SamStatus::SUCCESS.
00524 { 00525 myStatus = SamStatus::SUCCESS; 00526 if((fromBuffer == NULL) || (fromBufferSize == 0)) 00527 { 00528 // Buffer is empty. 00529 myStatus.setStatus(SamStatus::FAIL_PARSE, 00530 "Cannot parse an empty file."); 00531 return(SamStatus::FAIL_PARSE); 00532 } 00533 00534 // Clear the record. 00535 resetRecord(); 00536 00537 // allocate space for the record size. 00538 if(!allocateRecordStructure(fromBufferSize)) 00539 { 00540 // Failed to allocate space. 00541 return(SamStatus::FAIL_MEM); 00542 } 00543 00544 memcpy(myRecordPtr, fromBuffer, fromBufferSize); 00545 00546 setVariablesForNewBuffer(header); 00547 00548 // Return the status of the record. 00549 return(SamStatus::SUCCESS); 00550 }
| SamStatus::Status SamRecord::setBufferFromFile | ( | IFILE | filePtr, | |
| SamFileHeader & | header | |||
| ) |
Read the BAM record from a file.
| filePtr | file to read the buffer from. | |
| header | BAM header for the record. |
Definition at line 554 of file SamRecord.cpp.
References SamStatus::FAIL_IO, SamStatus::FAIL_MEM, SamStatus::FAIL_ORDER, SamStatus::FAIL_PARSE, ifeof(), ifread(), InputFile::isOpen(), SamStatus::NO_MORE_RECS, resetRecord(), SamStatus::setStatus(), and SamStatus::SUCCESS.
00556 { 00557 myStatus = SamStatus::SUCCESS; 00558 if((filePtr == NULL) || (filePtr->isOpen() == false)) 00559 { 00560 // File is not open, return failure. 00561 myStatus.setStatus(SamStatus::FAIL_ORDER, 00562 "Can't read from an unopened file."); 00563 return(SamStatus::FAIL_ORDER); 00564 } 00565 00566 // Clear the record. 00567 resetRecord(); 00568 00569 // read the record size. 00570 int numBytes = 00571 ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t)); 00572 00573 // Check to see if the end of the file was hit and no bytes were read. 00574 if(ifeof(filePtr) && (numBytes == 0)) 00575 { 00576 // End of file, nothing was read, no more records. 00577 myStatus.setStatus(SamStatus::NO_MORE_RECS, 00578 "No more records left to read."); 00579 return(SamStatus::NO_MORE_RECS); 00580 } 00581 00582 if(numBytes != sizeof(int32_t)) 00583 { 00584 // Failed to read the entire block size. Either the end of the file 00585 // was reached early or there was an error. 00586 if(ifeof(filePtr)) 00587 { 00588 // Error: end of the file reached prior to reading the rest of the 00589 // record. 00590 myStatus.setStatus(SamStatus::FAIL_PARSE, 00591 "EOF reached in the middle of a record."); 00592 return(SamStatus::FAIL_PARSE); 00593 } 00594 else 00595 { 00596 // Error reading. 00597 myStatus.setStatus(SamStatus::FAIL_IO, 00598 "Failed to read the record size."); 00599 return(SamStatus::FAIL_IO); 00600 } 00601 } 00602 00603 // allocate space for the record size. 00604 if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t))) 00605 { 00606 // Failed to allocate space. 00607 // Status is set by allocateRecordStructure. 00608 return(SamStatus::FAIL_MEM); 00609 } 00610 00611 // Read the rest of the alignment block, starting at the reference id. 00612 if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize) 00613 != (unsigned int)myRecordPtr->myBlockSize) 00614 { 00615 // Error reading the record. Reset it and return failure. 00616 resetRecord(); 00617 myStatus.setStatus(SamStatus::FAIL_IO, 00618 "Failed to read the record"); 00619 return(SamStatus::FAIL_IO); 00620 } 00621 00622 setVariablesForNewBuffer(header); 00623 00624 // Return the status of the record. 00625 return(SamStatus::SUCCESS); 00626 }
| bool SamRecord::setCigar | ( | const Cigar & | cigar | ) |
Set the CIGAR to the specified Cigar object.
Internal processing handles the switching between SAM/BAM formats when read/written.
| cigar | object to set this record's cigar to have. |
Definition at line 274 of file SamRecord.cpp.
References Cigar::getCigarString(), and SamStatus::SUCCESS.
00275 { 00276 myStatus = SamStatus::SUCCESS; 00277 cigar.getCigarString(myCigar); 00278 00279 myIsBufferSynced = false; 00280 myIsCigarBufferValid = false; 00281 myCigarTempBufferLength = -1; 00282 myIsBinValid = false; 00283 00284 // Initialize the calculated alignment info to the uncalculated value. 00285 myAlignmentLength = -1; 00286 myUnclippedStartOffset = -1; 00287 myUnclippedEndOffset = -1; 00288 00289 return true; 00290 }
| bool SamRecord::setCigar | ( | const char * | cigar | ) |
Set the CIGAR to the specified SAM formatted cigar string.
Internal processing handles the switching between SAM/BAM formats when read/written.
| cigar | string containing the SAM formatted cigar. |
Definition at line 255 of file SamRecord.cpp.
References SamStatus::SUCCESS.
Referenced by shiftIndelsLeft(), and SamFilter::softClip().
00256 { 00257 myStatus = SamStatus::SUCCESS; 00258 myCigar = cigar; 00259 00260 myIsBufferSynced = false; 00261 myIsCigarBufferValid = false; 00262 myCigarTempBufferLength = -1; 00263 myIsBinValid = false; 00264 00265 // Initialize the calculated alignment info to the uncalculated value. 00266 myAlignmentLength = -1; 00267 myUnclippedStartOffset = -1; 00268 myUnclippedEndOffset = -1; 00269 00270 return true; 00271 }
| bool SamRecord::setFlag | ( | uint16_t | flag | ) |
Set the bitwise FLAG to the specified value.
| flag | integer flag to use. |
Definition at line 211 of file SamRecord.cpp.
References SamStatus::SUCCESS.
00212 { 00213 myStatus = SamStatus::SUCCESS; 00214 myRecordPtr->myFlag = flag; 00215 return true; 00216 }
| bool SamRecord::setInsertSize | ( | int32_t | insertSize | ) |
Sets the inferred insert size (ISIZE)/observed template length (TLEN).
| insertSize | inferred insert size/observed template length. |
Definition at line 332 of file SamRecord.cpp.
References SamStatus::SUCCESS.
00333 { 00334 myStatus = SamStatus::SUCCESS; 00335 myRecordPtr->myInsertSize = insertSize; 00336 return true; 00337 }
| bool SamRecord::setMapQuality | ( | uint8_t | mapQuality | ) |
Set the mapping quality (MAPQ).
| mapQuality | map quality to set in the record. |
Definition at line 247 of file SamRecord.cpp.
References SamStatus::SUCCESS.
00248 { 00249 myStatus = SamStatus::SUCCESS; 00250 myRecordPtr->myMapQuality = mapQuality; 00251 return true; 00252 }
| bool SamRecord::setMateReferenceName | ( | SamFileHeader & | header, | |
| const char * | mateReferenceName | |||
| ) |
Set the mate/next fragment's reference sequence name (RNEXT) to the specified name, using the header to determine the mate reference id.
| header | SAM/BAM header to use to determine the mate reference id. | |
| referenceName | mate reference name to use. |
Definition at line 293 of file SamRecord.cpp.
References SamFileHeader::getReferenceID(), and SamStatus::SUCCESS.
00295 { 00296 myStatus = SamStatus::SUCCESS; 00297 // Set the mate reference, if it is "=", set it to be equal 00298 // to myReferenceName. This assumes that myReferenceName has already 00299 // been called. 00300 if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0) 00301 { 00302 myMateReferenceName = myReferenceName; 00303 } 00304 else 00305 { 00306 myMateReferenceName = mateReferenceName; 00307 } 00308 00309 // Set the Mate Reference ID. 00310 // If the reference ID does not already exist, add it (pass true) 00311 myRecordPtr->myMateReferenceID = 00312 header.getReferenceID(myMateReferenceName, true); 00313 00314 return true; 00315 }
| bool SamRecord::setQuality | ( | const char * | quality | ) |
Sets the quality (QUAL) to the specified SAM formatted quality string.
Internal processing handles switching between SAM/BAM formats when read/written.
| quality | SAM quality string. |
Definition at line 353 of file SamRecord.cpp.
References SamStatus::SUCCESS.
00354 { 00355 myStatus = SamStatus::SUCCESS; 00356 myQuality = quality; 00357 myIsBufferSynced = false; 00358 myIsQualityBufferValid = false; 00359 return true; 00360 }
| bool SamRecord::setReadName | ( | const char * | readName | ) |
Set QNAME to the passed in name.
| readName | the readname to set the QNAME to. |
Definition at line 189 of file SamRecord.cpp.
References SamStatus::INVALID, SamStatus::setStatus(), and SamStatus::SUCCESS.
00190 { 00191 myReadName = readName; 00192 myIsBufferSynced = false; 00193 myIsReadNameBufferValid = false; 00194 myStatus = SamStatus::SUCCESS; 00195 00196 // The read name must at least have some length, otherwise this is a parsing 00197 // error. 00198 if(myReadName.Length() == 0) 00199 { 00200 // Invalid - reset ReadName return false. 00201 myReadName = DEFAULT_READ_NAME; 00202 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH; 00203 myStatus.setStatus(SamStatus::INVALID, "0 length Query Name."); 00204 return(false); 00205 } 00206 00207 return true; 00208 }
| void SamRecord::setReference | ( | GenomeSequence * | reference | ) |
Set the reference to the specified genome sequence object.
| reference | pointer to the GenomeSequence object. |
Definition at line 174 of file SamRecord.cpp.
Referenced by SamFile::GetNumOverlaps(), SamFile::readIndexedRecord(), SamFile::ReadRecord(), SamFile::validateSortOrder(), and SamFile::WriteRecord().
| bool SamRecord::setReferenceName | ( | SamFileHeader & | header, | |
| const char * | referenceName | |||
| ) |
Set the reference sequence name (RNAME) to the specified name, using the header to determine the reference id.
| header | SAM/BAM header to use to determine the reference id. | |
| referenceName | reference name to use. |
Definition at line 219 of file SamRecord.cpp.
References SamFileHeader::getReferenceID(), and SamStatus::SUCCESS.
00221 { 00222 myStatus = SamStatus::SUCCESS; 00223 00224 myReferenceName = referenceName; 00225 // If the reference ID does not already exist, add it (pass true) 00226 myRecordPtr->myReferenceID = header.getReferenceID(referenceName, true); 00227 00228 return true; 00229 }
| bool SamRecord::setSequence | ( | const char * | seq | ) |
Sets the sequence (SEQ) to the specified SAM formatted sequence string.
Internal processing handles switching between SAM/BAM formats when read/written.
| seq | SAM sequence string. May contain '='. |
Definition at line 340 of file SamRecord.cpp.
References SamStatus::SUCCESS.
00341 { 00342 myStatus = SamStatus::SUCCESS; 00343 mySequence = seq; 00344 mySeqWithEq.clear(); 00345 mySeqWithoutEq.clear(); 00346 00347 myIsBufferSynced = false; 00348 myIsSequenceBufferValid = false; 00349 return true; 00350 }
| void SamRecord::setSequenceTranslation | ( | SequenceTranslation | translation | ) |
Set the type of sequence translation to use when getting the sequence.
The default type (if this method is never called) is NONE (the sequence is left as-is). Can be over-ridden by using the accessors that take a SequenceTranslation parameter.
| translation | type of sequence translation to use. |
Definition at line 183 of file SamRecord.cpp.
Referenced by SamFile::GetNumOverlaps(), SamFile::readIndexedRecord(), SamFile::ReadRecord(), and SamFile::validateSortOrder().
| bool SamRecord::shiftIndelsLeft | ( | ) |
Shift the indels (if any) to the left by updating the CIGAR.
Definition at line 364 of file SamRecord.cpp.
References BASES, Cigar::foundInQuery(), getSequence(), CigarRoller::IncrementCount(), Cigar::insert, Cigar::isMatchOrMismatch(), CigarRoller::Remove(), setCigar(), Cigar::size(), and CigarRoller::Update().
00365 { 00366 // Check to see whether or not the Cigar has already been 00367 // set - this is determined by checking if alignment length 00368 // is set since alignment length and the cigar are set 00369 // at the same time. 00370 if(myAlignmentLength == -1) 00371 { 00372 // Not been set, so calculate it. 00373 parseCigar(); 00374 } 00375 00376 // Track whether or not there was a shift. 00377 bool shifted = false; 00378 00379 // Cigar is set, so now myCigarRoller can be used. 00380 // Track where in the read we are. 00381 uint32_t currentPos = 0; 00382 00383 // Since the loop starts at 1 because the first operation can't be shifted, 00384 // increment the currentPos past the first operation. 00385 if(Cigar::foundInQuery(myCigarRoller[0])) 00386 { 00387 // This op was found in the read, increment the current position. 00388 currentPos += myCigarRoller[0].count; 00389 } 00390 00391 int numOps = myCigarRoller.size(); 00392 00393 // Loop through the cigar operations from the 2nd operation since 00394 // the first operation is already on the end and can't shift. 00395 for(int currentOp = 1; currentOp < numOps; currentOp++) 00396 { 00397 if(myCigarRoller[currentOp].operation == Cigar::insert) 00398 { 00399 // For now, only shift a max of 1 operation. 00400 int prevOpIndex = currentOp-1; 00401 // Track the next op for seeing if it is the same as the 00402 // previous for merging reasons. 00403 int nextOpIndex = currentOp+1; 00404 if(nextOpIndex == numOps) 00405 { 00406 // There is no next op, so set it equal to the current one. 00407 nextOpIndex = currentOp; 00408 } 00409 // The start of the previous operation, so we know when we hit it 00410 // so we don't shift past it. 00411 uint32_t prevOpStart = 00412 currentPos - myCigarRoller[prevOpIndex].count; 00413 00414 // We can only shift if the previous operation 00415 if(!Cigar::isMatchOrMismatch(myCigarRoller[prevOpIndex])) 00416 { 00417 // TODO - shift past pads 00418 // An insert is in the read, so increment the position. 00419 currentPos += myCigarRoller[currentOp].count; 00420 // Not a match/mismatch, so can't shift into it. 00421 continue; 00422 } 00423 00424 // It is a match or mismatch, so check to see if we can 00425 // shift into it. 00426 00427 // The end of the insert is calculated by adding the size 00428 // of this insert minus 1 to the start of the insert. 00429 uint32_t insertEndPos = 00430 currentPos + myCigarRoller[currentOp].count - 1; 00431 00432 // The insert starts at the current position. 00433 uint32_t insertStartPos = currentPos; 00434 00435 // Loop as long as the position before the insert start 00436 // matches the last character in the insert. If they match, 00437 // the insert can be shifted one index left because the 00438 // implied reference will not change. If they do not match, 00439 // we can't shift because the implied reference would change. 00440 // Stop loop when insertStartPos = prevOpStart, because we 00441 // don't want to move past that. 00442 while((insertStartPos > prevOpStart) && 00443 (getSequence(insertEndPos,BASES) == 00444 getSequence(insertStartPos - 1, BASES))) 00445 { 00446 // We can shift, so move the insert start & end one left. 00447 --insertEndPos; 00448 --insertStartPos; 00449 } 00450 00451 // Determine if a shift has occurred. 00452 int shiftLen = currentPos - insertStartPos; 00453 if(shiftLen > 0) 00454 { 00455 // Shift occured, so adjust the cigar if the cigar will 00456 // not become more operations. 00457 // If the next operation is the same as the previous or 00458 // if the insert and the previous operation switch positions 00459 // then the cigar has the same number of operations. 00460 // If the next operation is different, and the shift splits 00461 // the previous operation in 2, then the cigar would 00462 // become longer, so we do not want to shift. 00463 if(myCigarRoller[nextOpIndex].operation == 00464 myCigarRoller[prevOpIndex].operation) 00465 { 00466 // The operations are the same, so merge them by adding 00467 // the length of the shift to the next operation. 00468 myCigarRoller.IncrementCount(nextOpIndex, shiftLen); 00469 myCigarRoller.IncrementCount(prevOpIndex, -shiftLen); 00470 00471 // If the previous op length is 0, just remove that 00472 // operation. 00473 if(myCigarRoller[prevOpIndex].count == 0) 00474 { 00475 myCigarRoller.Remove(prevOpIndex); 00476 } 00477 shifted = true; 00478 } 00479 else 00480 { 00481 // Can only shift if the insert shifts past the 00482 // entire previous operation, otherwise an operation 00483 // would need to be added. 00484 if(insertStartPos == prevOpStart) 00485 { 00486 // Swap the positions of the insert and the 00487 // previous operation. 00488 myCigarRoller.Update(currentOp, 00489 myCigarRoller[prevOpIndex].operation, 00490 myCigarRoller[prevOpIndex].count); 00491 // Size of the previous op is the entire 00492 // shift length. 00493 myCigarRoller.Update(prevOpIndex, 00494 Cigar::insert, 00495 shiftLen); 00496 shifted = true; 00497 } 00498 } 00499 } 00500 // An insert is in the read, so increment the position. 00501 currentPos += myCigarRoller[currentOp].count; 00502 } 00503 else if(Cigar::foundInQuery(myCigarRoller[currentOp])) 00504 { 00505 // This op was found in the read, increment the current position. 00506 currentPos += myCigarRoller[currentOp].count; 00507 } 00508 } 00509 if(shifted) 00510 { 00511 // TODO - setCigar is currently inefficient because later the cigar 00512 // roller will be recalculated, but for now it will work. 00513 setCigar(myCigarRoller); 00514 } 00515 return(shifted); 00516 }
| SamStatus::Status SamRecord::writeRecordBuffer | ( | IFILE | filePtr, | |
| SequenceTranslation | translation | |||
| ) |
Write the record as a BAM into the specified already opened file using the specified translation on the sequence.
| filePtr | file to write the BAM record into. | |
| translation | type of sequence translation to use. |
Definition at line 1170 of file SamRecord.cpp.
References SamStatus::FAIL_IO, SamStatus::FAIL_ORDER, SamStatus::getStatus(), ifwrite(), InputFile::isOpen(), SamStatus::setStatus(), and SamStatus::SUCCESS.
01172 { 01173 myStatus = SamStatus::SUCCESS; 01174 if((filePtr == NULL) || (filePtr->isOpen() == false)) 01175 { 01176 // File is not open, return failure. 01177 myStatus.setStatus(SamStatus::FAIL_ORDER, 01178 "Can't write to an unopened file."); 01179 return(SamStatus::FAIL_ORDER); 01180 } 01181 01182 if((myIsBufferSynced == false) || 01183 (myBufferSequenceTranslation != translation)) 01184 { 01185 if(!fixBuffer(translation)) 01186 { 01187 return(myStatus.getStatus()); 01188 } 01189 } 01190 01191 // Write the record. 01192 unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t); 01193 unsigned int numBytesWritten = 01194 ifwrite(filePtr, myRecordPtr, numBytesToWrite); 01195 01196 // Return status based on if the correct number of bytes were written. 01197 if(numBytesToWrite == numBytesWritten) 01198 { 01199 return(SamStatus::SUCCESS); 01200 } 01201 // The correct number of bytes were not written. 01202 myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record."); 01203 return(SamStatus::FAIL_IO); 01204 }
| SamStatus::Status SamRecord::writeRecordBuffer | ( | IFILE | filePtr | ) |
Write the record as a BAM into the specified already opened file.
| filePtr | file to write the BAM record into. |
Definition at line 1163 of file SamRecord.cpp.
01164 { 01165 return(writeRecordBuffer(filePtr, mySequenceTranslation)); 01166 }
1.6.3