|
libStatGen Software
1
|
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record. More...
#include <SamRecord.h>
Public Types | |
| enum | SequenceTranslation { NONE, EQUAL, BASES } |
| Enum containing the settings on how to translate the sequence if a reference is available. More... | |
Public Member Functions | |
| SamRecord () | |
| Default Constructor. | |
| SamRecord (ErrorHandler::HandlingType errorHandlingType) | |
| Constructor that sets the error handling type. | |
| ~SamRecord () | |
| Destructor. | |
| void | resetRecord () |
| Reset the fields of the record to a default value. | |
| bool | isValid (SamFileHeader &header) |
| Returns whether or not the record is valid, setting the status to indicate success or failure. | |
| void | setReference (GenomeSequence *reference) |
| Set the reference to the specified genome sequence object. | |
| void | setSequenceTranslation (SequenceTranslation translation) |
| Set the type of sequence translation to use when getting the sequence. | |
| const SamStatus & | getStatus () |
| Returns the status associated with the last method that sets the status. | |
Set Alignment Data | |
Set methods for record fields. All of the "set" methods set the status to indicate success or the failure reason. | |
| bool | setReadName (const char *readName) |
| Set QNAME to the passed in name. | |
| bool | setFlag (uint16_t flag) |
| Set the bitwise FLAG to the specified value. | |
| bool | setReferenceName (SamFileHeader &header, const char *referenceName) |
| Set the reference sequence name (RNAME) to the specified name, using the header to determine the reference id. | |
| bool | set1BasedPosition (int32_t position) |
| Set the leftmost position (POS) using the specified 1-based (SAM format) value. | |
| bool | set0BasedPosition (int32_t position) |
| Set the leftmost position using the specified 0-based (BAM format) value. | |
| bool | setMapQuality (uint8_t mapQuality) |
| Set the mapping quality (MAPQ). | |
| bool | setCigar (const char *cigar) |
| Set the CIGAR to the specified SAM formatted cigar string. | |
| bool | setCigar (const Cigar &cigar) |
| Set the CIGAR to the specified Cigar object. | |
| bool | setMateReferenceName (SamFileHeader &header, const char *mateReferenceName) |
| Set the mate/next fragment's reference sequence name (RNEXT) to the specified name, using the header to determine the mate reference id. | |
| bool | set1BasedMatePosition (int32_t matePosition) |
| Set the mate/next fragment's leftmost position (PNEXT) using the specified 1-based (SAM format) value. | |
| bool | set0BasedMatePosition (int32_t matePosition) |
| Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value. | |
| bool | setInsertSize (int32_t insertSize) |
| Sets the inferred insert size (ISIZE)/observed template length (TLEN). | |
| bool | setSequence (const char *seq) |
| Sets the sequence (SEQ) to the specified SAM formatted sequence string. | |
| bool | setQuality (const char *quality) |
| Sets the quality (QUAL) to the specified SAM formatted quality string. | |
| bool | shiftIndelsLeft () |
| Shift the indels (if any) to the left by updating the CIGAR. | |
| SamStatus::Status | setBuffer (const char *fromBuffer, uint32_t fromBufferSize, SamFileHeader &header) |
| Sets the SamRecord to contain the information in the BAM formatted fromBuffer. | |
| SamStatus::Status | setBufferFromFile (IFILE filePtr, SamFileHeader &header) |
| Read the BAM record from a file. | |
Set Tag Data | |
Set methods for tags. | |
| bool | addIntTag (const char *tag, int32_t value) |
| Add the specified integer tag to the record. | |
| bool | addTag (const char *tag, char vtype, const char *value) |
| Add the specified tag,vtype,value to the record. | |
| void | clearTags () |
| Clear the tags in this record. | |
| bool | rmTag (const char *tag, char type) |
| Remove a tag. | |
| bool | rmTags (const char *tags) |
| Remove tags. | |
Get Alignment Data | |
Get methods for record fields. All of the "get" methods set the status to indicate success or the failure reason. | |
| const void * | getRecordBuffer () |
| Get a const pointer to the buffer that contains the BAM representation of the record. | |
| const void * | getRecordBuffer (SequenceTranslation translation) |
| Get a const pointer to the buffer that contains the BAM representation of the record using the specified translation on the sequence. | |
| SamStatus::Status | writeRecordBuffer (IFILE filePtr) |
| Write the record as a BAM into the specified already opened file. | |
| SamStatus::Status | writeRecordBuffer (IFILE filePtr, SequenceTranslation translation) |
| Write the record as a BAM into the specified already opened file using the specified translation on the sequence. | |
| int32_t | getBlockSize () |
| Get the block size of the record (BAM format). | |
| const char * | getReferenceName () |
| Get the reference sequence name (RNAME) of the record. | |
| int32_t | getReferenceID () |
| Get the reference sequence id of the record (BAM format rid). | |
| int32_t | get1BasedPosition () |
| Get the 1-based(SAM) leftmost position (POS) of the record. | |
| int32_t | get0BasedPosition () |
| Get the 0-based(BAM) leftmost position of the record. | |
| uint8_t | getReadNameLength () |
| Get the length of the readname (QNAME) including the null. | |
| uint8_t | getMapQuality () |
| Get the mapping quality (MAPQ) of the record. | |
| uint16_t | getBin () |
| Get the BAM bin for the record. | |
| uint16_t | getCigarLength () |
| Get the length of the BAM formatted CIGAR. | |
| uint16_t | getFlag () |
| Get the flag (FLAG). | |
| int32_t | getReadLength () |
| Get the length of the read. | |
| const char * | getMateReferenceName () |
| Get the mate/next fragment's reference sequence name (RNEXT). | |
| const char * | getMateReferenceNameOrEqual () |
| Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the reference name, unless they are both "*" in which case "*" is returned. | |
| int32_t | getMateReferenceID () |
| Get the mate reference id of the record (BAM format: mate_rid/next_refID). | |
| int32_t | get1BasedMatePosition () |
| Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT). | |
| int32_t | get0BasedMatePosition () |
| Get the 0-based(BAM) leftmost mate/next fragment's position. | |
| int32_t | getInsertSize () |
| Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN). | |
| int32_t | get0BasedAlignmentEnd () |
| Returns the 0-based inclusive rightmost position of the clipped sequence. | |
| int32_t | get1BasedAlignmentEnd () |
| Returns the 1-based inclusive rightmost position of the clipped sequence. | |
| int32_t | getAlignmentLength () |
| Returns the length of the clipped sequence, returning 0 if the cigar is '*'. | |
| int32_t | get0BasedUnclippedStart () |
| Returns the 0-based inclusive left-most position adjusted for clipped bases. | |
| int32_t | get1BasedUnclippedStart () |
| Returns the 1-based inclusive left-most position adjusted for clipped bases. | |
| int32_t | get0BasedUnclippedEnd () |
| Returns the 0-based inclusive right-most position adjusted for clipped bases. | |
| int32_t | get1BasedUnclippedEnd () |
| Returns the 1-based inclusive right-most position adjusted for clipped bases. | |
| const char * | getReadName () |
| Returns the SAM formatted Read Name (QNAME). | |
| const char * | getCigar () |
| Returns the SAM formatted CIGAR string. | |
| const char * | getSequence () |
| Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTranslation. | |
| const char * | getSequence (SequenceTranslation translation) |
| Returns the SAM formatted sequence string (SEQ) performing the specified sequence translation. | |
| const char * | getQuality () |
| Returns the SAM formatted quality string (QUAL). | |
| char | getSequence (int index) |
| Get the sequence base at the specified index into this sequence 0 to readLength - 1, translating the base as specified by setSequenceTranslation. | |
| char | getSequence (int index, SequenceTranslation translation) |
| Get the sequence base at the specified index into this sequence 0 to readLength - 1 performing the specified sequence translation. | |
| char | getQuality (int index) |
| Get the quality character at the specified index into the quality 0 to readLength - 1. | |
| Cigar * | getCigarInfo () |
| Returns a pointer to the Cigar object associated with this record. | |
| uint32_t | getNumOverlaps (int32_t start, int32_t end) |
| Return the number of bases in this read that overlap the passed in region. | |
| bool | getFields (bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality) |
| Returns the values of all fields except the tags. | |
| bool | getFields (bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality, SequenceTranslation translation) |
| Returns the values of all fields except the tags using the specified sequence translation. | |
| GenomeSequence * | getReference () |
| Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it was not set). | |
Get Tag Methods | |
| uint32_t | getTagLength () |
| Returns the length of the BAM formatted tags. | |
| bool | getNextSamTag (char *tag, char &vtype, void **value) |
| Get the next tag from the record. | |
| void | resetTagIter () |
| Reset the tag iterator to the beginning of the tags. | |
| bool | getTagsString (const char *tags, String &returnString, char delim= '\t') |
| Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE... | |
| const String * | getStringTag (const char *tag) |
| Get the string value for the specified tag. | |
| int * | getIntegerTag (const char *tag) |
| Get the integer value for the specified tag, DEPRECATED, use one that returns a bool (success/failure). | |
| bool | getIntegerTag (const char *tag, int &tagVal) |
| Get the integer value for the specified tag. | |
| bool | getFloatTag (const char *tag, float &tagVal) |
| Get the float value for the specified tag. | |
| const String & | getString (const char *tag) |
| Get the string value for the specified tag. | |
| int & | getInteger (const char *tag) |
| Get the integer value for the specified tag, DEPRECATED, use getIntegerTag that returns a bool. | |
| bool | checkString (const char *tag) |
| Check if the specified tag contains a string. | |
| bool | checkInteger (const char *tag) |
| Check if the specified tag contains an integer. | |
| bool | checkFloat (const char *tag) |
| Check if the specified tag contains a string. | |
| bool | checkTag (const char *tag, char type) |
| Check if the specified tag contains a value of the specified vtype. | |
| static bool | isIntegerType (char vtype) |
| Returns whether or not the specified vtype is an integer type. | |
| static bool | isFloatType (char vtype) |
| Returns whether or not the specified vtype is a float type. | |
| static bool | isCharType (char vtype) |
| Returns whether or not the specified vtype is a char type. | |
| static bool | isStringType (char vtype) |
| Returns whether or not the specified vtype is a string type. | |
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition at line 51 of file SamRecord.h.
Enum containing the settings on how to translate the sequence if a reference is available.
If no reference is available, no translation is done.
| NONE |
Leave the sequence as is. |
| EQUAL |
Translate bases that match the reference to '='. |
| BASES |
Translate '=' to the actual base. |
Definition at line 57 of file SamRecord.h.
| SamRecord::SamRecord | ( | ErrorHandler::HandlingType | errorHandlingType | ) |
Constructor that sets the error handling type.
| errorHandlingType | how to handle errors. |
Definition at line 53 of file SamRecord.cpp.
References resetRecord().
: myStatus(errorHandlingType),
myRefPtr(NULL),
mySequenceTranslation(NONE)
{
int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
myRecordPtr =
(bamRecordStruct *) malloc(defaultAllocSize);
myCigarTempBuffer = NULL;
myCigarTempBufferAllocatedSize = 0;
allocatedSize = defaultAllocSize;
resetRecord();
}
| bool SamRecord::addIntTag | ( | const char * | tag, |
| int32_t | value | ||
| ) |
Add the specified integer tag to the record.
Internal processing handles switching between SAM/BAM formats when read/written and determining the type for BAM format. If the tag is already there this code will replace it if the specified value is different.
| tag | two character tag to be added to the SAM/BAM record. |
| value | value for the specified tag. |
Definition at line 635 of file SamRecord.cpp.
References StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
Referenced by addTag().
{
myStatus = SamStatus::SUCCESS;
int key = 0;
int index = 0;
char bamvtype;
int tagBufferSize = 0;
// First check to see if the tags need to be synced to the buffer.
if(myNeedToSetTagsFromBuffer)
{
if(!setTagsFromBuffer())
{
// Failed to read tags from the buffer, so cannot add new ones.
return(false);
}
}
// Ints come in as int. But it can be represented in fewer bits.
// So determine a more specific type that is in line with the
// types for BAM files.
// First check to see if it is a negative.
if(value < 0)
{
// The int is negative, so it will need to use a signed type.
// See if it is greater than the min value for a char.
if(value > ((std::numeric_limits<char>::min)()))
{
// It can be stored in a signed char.
bamvtype = 'c';
tagBufferSize += 4;
}
else if(value > ((std::numeric_limits<short>::min)()))
{
// It fits in a signed short.
bamvtype = 's';
tagBufferSize += 5;
}
else
{
// Just store it as a signed int.
bamvtype = 'i';
tagBufferSize += 7;
}
}
else
{
// It is positive, so an unsigned type can be used.
if(value < ((std::numeric_limits<unsigned char>::max)()))
{
// It is under the max of an unsigned char.
bamvtype = 'C';
tagBufferSize += 4;
}
else if(value < ((std::numeric_limits<unsigned short>::max)()))
{
// It is under the max of an unsigned short.
bamvtype = 'S';
tagBufferSize += 5;
}
else
{
// Just store it as an unsigned int.
bamvtype = 'I';
tagBufferSize += 7;
}
}
// Check to see if the tag is already there.
key = MAKEKEY(tag[0], tag[1], bamvtype);
unsigned int hashIndex = extras.Find(key);
if(hashIndex != LH_NOTFOUND)
{
// Tag was already found.
index = extras[hashIndex];
// Since the tagBufferSize was already updated with the new value,
// subtract the size for the previous tag (even if they are the same).
switch(intType[index])
{
case 'c':
case 'C':
case 'A':
tagBufferSize -= 4;
break;
case 's':
case 'S':
tagBufferSize -= 5;
break;
case 'i':
case 'I':
tagBufferSize -= 7;
break;
default:
myStatus.setStatus(SamStatus::INVALID,
"unknown tag inttype type found.\n");
return(false);
}
// Tag already existed, print message about overwriting.
// WARN about dropping duplicate tags.
if(myNumWarns++ < myMaxWarns)
{
String newVal;
String origVal;
appendIntArrayValue(index, origVal);
appendIntArrayValue(bamvtype, value, newVal);
fprintf(stderr, "WARNING: Duplicate Tags, overwritting %c%c:%c:%s with %c%c:%c:%s\n",
tag[0], tag[1], intType[index], origVal.c_str(), tag[0], tag[1], bamvtype, newVal.c_str());
if(myNumWarns == myMaxWarns)
{
fprintf(stderr, "Suppressing rest of Duplicate Tag warnings.\n");
}
}
// Update the integer value and type.
integers[index] = value;
intType[index] = bamvtype;
}
else
{
// Tag is not already there, so add it.
index = integers.Length();
integers.Push(value);
intType.push_back(bamvtype);
extras.Add(key, index);
}
// The buffer tags are now out of sync.
myNeedToSetTagsInBuffer = true;
myIsTagsBufferValid = false;
myIsBufferSynced = false;
myTagBufferSize += tagBufferSize;
return(true);
}
| bool SamRecord::addTag | ( | const char * | tag, |
| char | vtype, | ||
| const char * | value | ||
| ) |
Add the specified tag,vtype,value to the record.
Vtype can be SAM/BAM format. Internal processing handles switching between SAM/BAM formats when read/written. If the tag is already there this code will replace it if the specified value is different.
| tag | two character tag to be added to the SAM/BAM record. |
| vtype | vtype of the specified value - either SAM/BAM vtypes. |
| value | value as a string for the specified tag. |
Definition at line 779 of file SamRecord.cpp.
References addIntTag(), StatGenStatus::FAIL_PARSE, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{
if(vtype == 'i')
{
// integer type. Call addIntTag to handle it.
int intVal = atoi(valuePtr);
return(addIntTag(tag, intVal));
}
// Non-int type.
myStatus = SamStatus::SUCCESS;
bool status = true; // default to successful.
int key = 0;
int index = 0;
int tagBufferSize = 0;
// First check to see if the tags need to be synced to the buffer.
if(myNeedToSetTagsFromBuffer)
{
if(!setTagsFromBuffer())
{
// Failed to read tags from the buffer, so cannot add new ones.
return(false);
}
}
// First check to see if the tag is already there.
key = MAKEKEY(tag[0], tag[1], vtype);
unsigned int hashIndex = extras.Find(key);
if(hashIndex != LH_NOTFOUND)
{
// The key was found in the hash, so get the lookup index.
index = extras[hashIndex];
String origTag;
char origType = vtype;
// Adjust the currently pointed to value to the new setting.
switch (vtype)
{
case 'A' :
// First check to see if the value changed.
if((integers[index] == (const int)*(valuePtr)) &&
(intType[index] == vtype))
{
// The value & type has not changed, so do nothing.
return(true);
}
else
{
// Tag buffer size changes if type changes, so subtract & add.
origType = intType[index];
appendIntArrayValue(index, origTag);
tagBufferSize -= getNumericTagTypeSize(intType[index]);
tagBufferSize += getNumericTagTypeSize(vtype);
integers[index] = (const int)*(valuePtr);
intType[index] = vtype;
}
break;
case 'Z' :
// First check to see if the value changed.
if(strings[index] == valuePtr)
{
// The value has not changed, so do nothing.
return(true);
}
else
{
// Adjust the tagBufferSize by removing the size of the old string.
origTag = strings[index];
tagBufferSize -= strings[index].Length();
strings[index] = valuePtr;
// Adjust the tagBufferSize by adding the size of the new string.
tagBufferSize += strings[index].Length();
}
break;
case 'B' :
// First check to see if the value changed.
if(strings[index] == valuePtr)
{
// The value has not changed, so do nothing.
return(true);
}
else
{
// Adjust the tagBufferSize by removing the size of the old field.
origTag = strings[index];
tagBufferSize -= getBtagBufferSize(strings[index]);
strings[index] = valuePtr;
// Adjust the tagBufferSize by adding the size of the new field.
tagBufferSize += getBtagBufferSize(strings[index]);
}
break;
case 'f' :
// First check to see if the value changed.
if(floats[index] == (float)atof(valuePtr))
{
// The value has not changed, so do nothing.
return(true);
}
else
{
// Tag buffer size doesn't change between different 'f' entries.
origTag.appendFullFloat(floats[index]);
floats[index] = (float)atof(valuePtr);
}
break;
default :
fprintf(stderr,
"samRecord::addTag() - Unknown custom field of type %c\n",
vtype);
myStatus.setStatus(SamStatus::FAIL_PARSE,
"Unknown custom field in a tag");
status = false;
break;
}
// Duplicate tag in this record.
// Tag already existed, print message about overwriting.
// WARN about dropping duplicate tags.
if(myNumWarns++ < myMaxWarns)
{
fprintf(stderr, "WARNING: Duplicate Tags, overwritting %c%c:%c:%s with %c%c:%c:%s\n",
tag[0], tag[1], origType, origTag.c_str(), tag[0], tag[1], vtype, valuePtr);
if(myNumWarns == myMaxWarns)
{
fprintf(stderr, "Suppressing rest of Duplicate Tag warnings.\n");
}
}
}
else
{
// The key was not found in the hash, so add it.
switch (vtype)
{
case 'A' :
index = integers.Length();
integers.Push((const int)*(valuePtr));
intType.push_back(vtype);
tagBufferSize += 4;
break;
case 'Z' :
index = strings.Length();
strings.Push(valuePtr);
tagBufferSize += 4 + strings.Last().Length();
break;
case 'B' :
index = strings.Length();
strings.Push(valuePtr);
tagBufferSize += 3 + getBtagBufferSize(strings[index]);
break;
case 'f' :
index = floats.size();
floats.push_back((float)atof(valuePtr));
tagBufferSize += 7;
break;
default :
fprintf(stderr,
"samRecord::addTag() - Unknown custom field of type %c\n",
vtype);
myStatus.setStatus(SamStatus::FAIL_PARSE,
"Unknown custom field in a tag");
status = false;
break;
}
if(status)
{
// If successful, add the key to extras.
extras.Add(key, index);
}
}
// Only add the tag if it has so far been successfully processed.
if(status)
{
// The buffer tags are now out of sync.
myNeedToSetTagsInBuffer = true;
myIsTagsBufferValid = false;
myIsBufferSynced = false;
myTagBufferSize += tagBufferSize;
}
return(status);
}
| bool SamRecord::checkFloat | ( | const char * | tag | ) | [inline] |
Check if the specified tag contains a string.
Does not set SamStatus.
| tag | SAM tag to check contents of. |
Definition at line 613 of file SamRecord.h.
References checkTag().
{ return checkTag(tag, 'f'); }
| bool SamRecord::checkInteger | ( | const char * | tag | ) | [inline] |
Check if the specified tag contains an integer.
Does not set SamStatus.
| tag | SAM tag to check contents of. |
Definition at line 607 of file SamRecord.h.
References checkTag().
{ return checkTag(tag, 'i'); }
| bool SamRecord::checkString | ( | const char * | tag | ) | [inline] |
Check if the specified tag contains a string.
Does not set SamStatus.
| tag | SAM tag to check contents of. |
Definition at line 600 of file SamRecord.h.
References checkTag().
| bool SamRecord::checkTag | ( | const char * | tag, |
| char | type | ||
| ) |
Check if the specified tag contains a value of the specified vtype.
Does not set SamStatus.
| tag | SAM tag to check contents of. |
| type | value type to check if the SAM tag matches. |
Definition at line 2369 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by checkFloat(), checkInteger(), and checkString().
{
// Init to success.
myStatus = SamStatus::SUCCESS;
// Parse the buffer if necessary.
if(myNeedToSetTagsFromBuffer)
{
if(!setTagsFromBuffer())
{
// Failed to read the tags from the buffer, so cannot
// get tags. setTagsFromBuffer set the error.
return("");
}
}
int key = MAKEKEY(tag[0], tag[1], type);
return (extras.Find(key) != LH_NOTFOUND);
}
| void SamRecord::clearTags | ( | ) |
Clear the tags in this record.
Does not set SamStatus.
Definition at line 965 of file SamRecord.cpp.
References resetTagIter().
Referenced by resetRecord().
{
if(extras.Entries() != 0)
{
extras.Clear();
}
strings.Clear();
integers.Clear();
intType.clear();
floats.clear();
myTagBufferSize = 0;
resetTagIter();
}
| int32_t SamRecord::get0BasedAlignmentEnd | ( | ) |
Returns the 0-based inclusive rightmost position of the clipped sequence.
Definition at line 1455 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by get0BasedUnclippedEnd(), get1BasedAlignmentEnd(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignment(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignmentRegion(), and CigarHelper::softClipEndByRefPos().
{
myStatus = SamStatus::SUCCESS;
if(myAlignmentLength == -1)
{
// Alignment end has not been set, so calculate it.
parseCigar();
}
// If alignment length > 0, subtract 1 from it to get the end.
if(myAlignmentLength == 0)
{
// Length is 0, just return the start position.
return(myRecordPtr->myPosition);
}
return(myRecordPtr->myPosition + myAlignmentLength - 1);
}
| int32_t SamRecord::get0BasedMatePosition | ( | ) |
Get the 0-based(BAM) leftmost mate/next fragment's position.
Definition at line 1440 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
return myRecordPtr->myMatePosition;
}
| int32_t SamRecord::get0BasedPosition | ( | ) |
Get the 0-based(BAM) leftmost position of the record.
Definition at line 1307 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamCoordOutput::add(), PileupElementBaseQual::addEntry(), SamRecordHelper::checkSequence(), SamTags::createMDTag(), getNumOverlaps(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignment(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignmentRegion(), SamQuerySeqWithRefIter::reset(), SamFilter::softClip(), CigarHelper::softClipBeginByRefPos(), CigarHelper::softClipEndByRefPos(), and SamFile::validateSortOrder().
{
myStatus = SamStatus::SUCCESS;
return myRecordPtr->myPosition;
}
| int32_t SamRecord::get0BasedUnclippedEnd | ( | ) |
Returns the 0-based inclusive right-most position adjusted for clipped bases.
Definition at line 1514 of file SamRecord.cpp.
References get0BasedAlignmentEnd().
Referenced by get1BasedUnclippedEnd().
{
// myUnclippedEndOffset will be set by get0BasedAlignmentEnd if the
// cigar has not yet been parsed, so no need to check it here.
return(get0BasedAlignmentEnd() + myUnclippedEndOffset);
}
| int32_t SamRecord::get0BasedUnclippedStart | ( | ) |
Returns the 0-based inclusive left-most position adjusted for clipped bases.
Definition at line 1494 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by get1BasedUnclippedStart().
{
myStatus = SamStatus::SUCCESS;
if(myUnclippedStartOffset == -1)
{
// Unclipped has not yet been calculated, so parse the cigar to get it
parseCigar();
}
return(myRecordPtr->myPosition - myUnclippedStartOffset);
}
| int32_t SamRecord::get1BasedAlignmentEnd | ( | ) |
Returns the 1-based inclusive rightmost position of the clipped sequence.
Definition at line 1474 of file SamRecord.cpp.
References get0BasedAlignmentEnd().
Referenced by getBin().
{
return(get0BasedAlignmentEnd() + 1);
}
| int32_t SamRecord::get1BasedMatePosition | ( | ) |
Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT).
Definition at line 1433 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
return (myRecordPtr->myMatePosition + 1);
}
| int32_t SamRecord::get1BasedPosition | ( | ) |
Get the 1-based(SAM) leftmost position (POS) of the record.
Definition at line 1300 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamValidator::isValid().
{
myStatus = SamStatus::SUCCESS;
return (myRecordPtr->myPosition + 1);
}
| int32_t SamRecord::get1BasedUnclippedEnd | ( | ) |
Returns the 1-based inclusive right-most position adjusted for clipped bases.
Definition at line 1523 of file SamRecord.cpp.
References get0BasedUnclippedEnd().
{
return(get0BasedUnclippedEnd() + 1);
}
| int32_t SamRecord::get1BasedUnclippedStart | ( | ) |
Returns the 1-based inclusive left-most position adjusted for clipped bases.
Definition at line 1507 of file SamRecord.cpp.
References get0BasedUnclippedStart().
{
return(get0BasedUnclippedStart() + 1);
}
| int32_t SamRecord::getAlignmentLength | ( | ) |
Returns the length of the clipped sequence, returning 0 if the cigar is '*'.
Definition at line 1481 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
if(myAlignmentLength == -1)
{
// Alignment end has not been set, so calculate it.
parseCigar();
}
// Return the alignment length.
return(myAlignmentLength);
}
| uint16_t SamRecord::getBin | ( | ) |
Get the BAM bin for the record.
Definition at line 1335 of file SamRecord.cpp.
References get1BasedAlignmentEnd(), and StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
if(!myIsBinValid)
{
// The bin that is set in the record is not valid, so
// reset it.
myRecordPtr->myBin =
bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
myIsBinValid = true;
}
return(myRecordPtr->myBin);
}
| int32_t SamRecord::getBlockSize | ( | ) |
Get the block size of the record (BAM format).
Definition at line 1269 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
// If the buffer isn't synced, sync the buffer to determine the
// block size.
if(myIsBufferSynced == false)
{
// Since this just returns the block size, the translation of
// the sequence does not matter, so just use the currently set
// value.
fixBuffer(myBufferSequenceTranslation);
}
return myRecordPtr->myBlockSize;
}
| const char * SamRecord::getCigar | ( | ) |
Returns the SAM formatted CIGAR string.
Definition at line 1543 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by getFields(), SamValidator::isValidCigar(), CigarHelper::softClipBeginByRefPos(), and CigarHelper::softClipEndByRefPos().
{
myStatus = SamStatus::SUCCESS;
if(myCigar.Length() == 0)
{
// 0 Length, means that it is in the buffer, but has not yet
// been synced to the string, so do the sync.
parseCigarBinary();
}
return myCigar.c_str();
}
| Cigar * SamRecord::getCigarInfo | ( | ) |
Returns a pointer to the Cigar object associated with this record.
The object is essentially read-only, only allowing modifications due to lazy evaluations.
Definition at line 1824 of file SamRecord.cpp.
Referenced by PileupElementBaseQual::addEntry(), SamRecordHelper::checkSequence(), SamTags::createMDTag(), getSequence(), SamQuerySeqWithRefIter::reset(), SamFilter::softClip(), CigarHelper::softClipBeginByRefPos(), and CigarHelper::softClipEndByRefPos().
{
// Check to see whether or not the Cigar has already been
// set - this is determined by checking if alignment length
// is set since alignment length and the cigar are set
// at the same time.
if(myAlignmentLength == -1)
{
// Not been set, so calculate it.
parseCigar();
}
return(&myCigarRoller);
}
| uint16_t SamRecord::getCigarLength | ( | ) |
Get the length of the BAM formatted CIGAR.
Definition at line 1350 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
// If the cigar buffer is valid
// then get the length from there.
if(myIsCigarBufferValid)
{
return myRecordPtr->myCigarLength;
}
if(myCigarTempBufferLength == -1)
{
// The cigar buffer is not valid and the cigar temp buffer is not set,
// so parse the string.
parseCigarString();
}
// The temp buffer is now set, so return the size.
return(myCigarTempBufferLength);
}
| bool SamRecord::getFields | ( | bamRecordStruct & | recStruct, |
| String & | readName, | ||
| String & | cigar, | ||
| String & | sequence, | ||
| String & | quality | ||
| ) |
Returns the values of all fields except the tags.
| recStruct | structure containing the contents of all non-variable length fields. |
| readName | read name from the record (return param) |
| cigar | cigar string from the record (return param) |
| sequence | sequence string from the record (return param) |
| quality | quality string from the record (return param) |
Definition at line 1854 of file SamRecord.cpp.
{
return(getFields(recStruct, readName, cigar, sequence, quality,
mySequenceTranslation));
}
| bool SamRecord::getFields | ( | bamRecordStruct & | recStruct, |
| String & | readName, | ||
| String & | cigar, | ||
| String & | sequence, | ||
| String & | quality, | ||
| SequenceTranslation | translation | ||
| ) |
Returns the values of all fields except the tags using the specified sequence translation.
| recStruct | structure containing the contents of all non-variable length fields. |
| readName | read name from the record (return param) |
| cigar | cigar string from the record (return param) |
| sequence | sequence string from the record (return param) |
| quality | quality string from the record (return param) |
| translation | type of sequence translation to use. |
Definition at line 1863 of file SamRecord.cpp.
References getCigar(), getQuality(), getReadName(), getSequence(), and StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
if(myIsBufferSynced == false)
{
if(!fixBuffer(translation))
{
// failed to set the buffer, return false.
return(false);
}
}
memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct));
readName = getReadName();
// Check the status.
if(myStatus != SamStatus::SUCCESS)
{
// Failed to set the fields, return false.
return(false);
}
cigar = getCigar();
// Check the status.
if(myStatus != SamStatus::SUCCESS)
{
// Failed to set the fields, return false.
return(false);
}
sequence = getSequence(translation);
// Check the status.
if(myStatus != SamStatus::SUCCESS)
{
// Failed to set the fields, return false.
return(false);
}
quality = getQuality();
// Check the status.
if(myStatus != SamStatus::SUCCESS)
{
// Failed to set the fields, return false.
return(false);
}
return(true);
}
| uint16_t SamRecord::getFlag | ( | ) |
Get the flag (FLAG).
Definition at line 1372 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamFilter::filterRead(), SamQuerySeqWithRefIter::getNextMatchMismatch(), SamValidator::isValid(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processFile(), and SamFile::ReadRecord().
{
myStatus = SamStatus::SUCCESS;
return myRecordPtr->myFlag;
}
| bool SamRecord::getFloatTag | ( | const char * | tag, |
| float & | tagVal | ||
| ) |
Get the float value for the specified tag.
| tag | tag to retrieve |
| tagVal | return parameter with integer value for the tag |
Definition at line 2269 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{
// Init to success.
myStatus = SamStatus::SUCCESS;
// Parse the buffer if necessary.
if(myNeedToSetTagsFromBuffer)
{
if(!setTagsFromBuffer())
{
// Failed to read the tags from the buffer, so cannot
// get tags. setTagsFromBuffer set the errors,
// so just return false.
return(false);
}
}
int key = MAKEKEY(tag[0], tag[1], 'f');
int offset = extras.Find(key);
int value;
if (offset < 0)
{
// Failed to find the tag.
return(false);
}
else
value = extras[offset];
tagVal = floats[value];
return(true);
}
| int32_t SamRecord::getInsertSize | ( | ) |
Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN).
Definition at line 1447 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
return myRecordPtr->myInsertSize;
}
| int * SamRecord::getIntegerTag | ( | const char * | tag | ) |
Get the integer value for the specified tag, DEPRECATED, use one that returns a bool (success/failure).
| tag | tag to retrieve pointer to the tag's integer value if found, NULL if not found. |
Definition at line 2204 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{
// Init to success.
myStatus = SamStatus::SUCCESS;
// Parse the buffer if necessary.
if(myNeedToSetTagsFromBuffer)
{
if(!setTagsFromBuffer())
{
// Failed to read the tags from the buffer, so cannot
// get tags. setTagsFromBuffer set the errors,
// so just return NULL.
return(NULL);
}
}
int key = MAKEKEY(tag[0], tag[1], 'i');
int offset = extras.Find(key);
int value;
if (offset < 0)
{
// Failed to find the tag.
return(NULL);
}
else
value = extras[offset];
return(&(integers[value]));
}
| bool SamRecord::getIntegerTag | ( | const char * | tag, |
| int & | tagVal | ||
| ) |
Get the integer value for the specified tag.
| tag | tag to retrieve |
| tagVal | return parameter with integer value for the tag bool true if Integer tag was found and tagVal was set, false if not. |
Definition at line 2236 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{
// Init to success.
myStatus = SamStatus::SUCCESS;
// Parse the buffer if necessary.
if(myNeedToSetTagsFromBuffer)
{
if(!setTagsFromBuffer())
{
// Failed to read the tags from the buffer, so cannot
// get tags. setTagsFromBuffer set the errors,
// so just return false.
return(false);
}
}
int key = MAKEKEY(tag[0], tag[1], 'i');
int offset = extras.Find(key);
int value;
if (offset < 0)
{
// Failed to find the tag.
return(false);
}
else
value = extras[offset];
tagVal = integers[value];
return(true);
}
| uint8_t SamRecord::getMapQuality | ( | ) |
Get the mapping quality (MAPQ) of the record.
Definition at line 1328 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamValidator::isValid().
{
myStatus = SamStatus::SUCCESS;
return myRecordPtr->myMapQuality;
}
| int32_t SamRecord::getMateReferenceID | ( | ) |
Get the mate reference id of the record (BAM format: mate_rid/next_refID).
Definition at line 1426 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
return myRecordPtr->myMateReferenceID;
}
| const char * SamRecord::getMateReferenceName | ( | ) |
Get the mate/next fragment's reference sequence name (RNEXT).
If it is equal to the reference name, it still returns the reference name.
Definition at line 1398 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
return myMateReferenceName.c_str();
}
| const char * SamRecord::getMateReferenceNameOrEqual | ( | ) |
Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the reference name, unless they are both "*" in which case "*" is returned.
Definition at line 1408 of file SamRecord.cpp.
References getReferenceName(), and StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
if(myMateReferenceName == "*")
{
return(myMateReferenceName);
}
if(myMateReferenceName == getReferenceName())
{
return(FIELD_ABSENT_STRING);
}
else
{
return(myMateReferenceName);
}
}
| bool SamRecord::getNextSamTag | ( | char * | tag, |
| char & | vtype, | ||
| void ** | value | ||
| ) |
Get the next tag from the record.
Sets the Status to SUCCESS when a tag is successfully returned or when there are no more tags. Otherwise the status is set to describe why it failed (parsing, etc).
| tag | set to the tag when a tag is read. |
| vtype | set to the vtype when a tag is read. |
| value | pointer to the value of the tag (will need to cast to int, float, char, or string based on vtype). |
Definition at line 1950 of file SamRecord.cpp.
References StatGenStatus::FAIL_PARSE, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
Referenced by SamRecordHelper::genSamTagsString().
{
myStatus = SamStatus::SUCCESS;
if(myNeedToSetTagsFromBuffer)
{
if(!setTagsFromBuffer())
{
// Failed to read the tags from the buffer, so cannot
// get tags.
return(false);
}
}
// Increment the tag index to start looking at the next tag.
// At the beginning, it is set to -1.
myLastTagIndex++;
int maxTagIndex = extras.Capacity();
if(myLastTagIndex >= maxTagIndex)
{
// Hit the end of the tags, return false, no more tags.
// Status is still success since this is not an error,
// it is just the end of the list.
return(false);
}
bool tagFound = false;
// Loop until a tag is found or the end of extras is hit.
while((tagFound == false) && (myLastTagIndex < maxTagIndex))
{
if(extras.SlotInUse(myLastTagIndex))
{
// Found a slot to use.
int key = extras.GetKey(myLastTagIndex);
getTag(key, tag);
getTypeFromKey(key, vtype);
tagFound = true;
// Get the value associated with the key based on the vtype.
switch (vtype)
{
case 'f' :
*value = getFloatPtr(myLastTagIndex);
break;
case 'i' :
*value = getIntegerPtr(myLastTagIndex, vtype);
if(vtype != 'A')
{
// Convert all int types to 'i'
vtype = 'i';
}
break;
case 'Z' :
case 'B' :
*value = getStringPtr(myLastTagIndex);
break;
default:
myStatus.setStatus(SamStatus::FAIL_PARSE,
"Unknown tag type");
tagFound = false;
break;
}
}
if(!tagFound)
{
// Increment the index since a tag was not found.
myLastTagIndex++;
}
}
return(tagFound);
}
| uint32_t SamRecord::getNumOverlaps | ( | int32_t | start, |
| int32_t | end | ||
| ) |
Return the number of bases in this read that overlap the passed in region.
Matches & mismatches between the read and the reference are counted as overlaps, but insertions, deletions, skips, clips, and pads are not counted.
| start | inclusive 0-based start position (reference position) of the region to check for overlaps in. (-1 indicates to start at the beginning of the reference.) |
| end | exclusive 0-based end position (reference position) of the region to check for overlaps in. (-1 indicates to go to the end of the reference.) |
Definition at line 1841 of file SamRecord.cpp.
References get0BasedPosition(), and Cigar::getNumOverlaps().
Referenced by SamFile::GetNumOverlaps().
{
// Determine whether or not the cigar has been parsed, which sets up
// the cigar roller. This is determined by checking the alignment length.
if(myAlignmentLength == -1)
{
parseCigar();
}
return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition()));
}
| const char * SamRecord::getQuality | ( | ) |
Returns the SAM formatted quality string (QUAL).
Definition at line 1626 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by PileupElementBaseQual::addEntry(), getFields(), SamValidator::isValidQuality(), and SamFilter::sumMismatchQuality().
{
myStatus = SamStatus::SUCCESS;
if(myQuality.Length() == 0)
{
// 0 Length, means that it is in the buffer, but has not yet
// been synced to the string, so do the sync.
setSequenceAndQualityFromBuffer();
}
return myQuality.c_str();
}
| char SamRecord::getQuality | ( | int | index | ) |
Get the quality character at the specified index into the quality 0 to readLength - 1.
Throws an exception if index is out of range.
| index | index into the quality string (0 to readLength-1). |
Definition at line 1770 of file SamRecord.cpp.
References getReadLength(), and BaseUtilities::UNKNOWN_QUALITY_CHAR.
{
// Determine the read length.
int32_t readLen = getReadLength();
// If the read length is 0, return ' ' whose ascii code is below
// the minimum ascii code for qualities.
if(readLen == 0)
{
return(BaseUtilities::UNKNOWN_QUALITY_CHAR);
}
else if((index < 0) || (index >= readLen))
{
// Only get here if the index was out of range, so thow an exception.
String exceptionString = "SamRecord::getQuality(";
exceptionString += index;
exceptionString += ") is out of range. Index must be between 0 and ";
exceptionString += (readLen - 1);
throw std::runtime_error(exceptionString.c_str());
}
if(myQuality.Length() == 0)
{
// Parse BAM Quality.
// Know that myPackedQuality is correct since readLen != 0.
return(myPackedQuality[index] + 33);
}
else
{
// Already have string.
if((myQuality.Length() == 1) && (myQuality[0] == '*'))
{
// Return the unknown quality character.
return(BaseUtilities::UNKNOWN_QUALITY_CHAR);
}
else if(index >= myQuality.Length())
{
// Only get here if the index was out of range, so thow an exception.
// Technically the myQuality string is not guaranteed to be the same length
// as the sequence, so this catches that error.
String exceptionString = "SamRecord::getQuality(";
exceptionString += index;
exceptionString += ") is out of range. Index must be between 0 and ";
exceptionString += (myQuality.Length() - 1);
throw std::runtime_error(exceptionString.c_str());
}
else
{
return(myQuality[index]);
}
}
}
| int32_t SamRecord::getReadLength | ( | ) |
Get the length of the read.
Definition at line 1379 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamFilter::clipOnMismatchThreshold(), SamQuerySeqWithRefIter::getNextMatchMismatch(), getQuality(), getSequence(), SamValidator::isValidCigar(), SamValidator::isValidQuality(), SamQuerySeqWithRefIter::reset(), and CigarHelper::softClipEndByRefPos().
{
myStatus = SamStatus::SUCCESS;
if(myIsSequenceBufferValid == false)
{
// If the sequence is "*", then return 0.
if((mySequence.Length() == 1) && (mySequence[0] == '*'))
{
return(0);
}
// Do not add 1 since it is not null terminated.
return(mySequence.Length());
}
return(myRecordPtr->myReadLength);
}
| const char * SamRecord::getReadName | ( | ) |
Returns the SAM formatted Read Name (QNAME).
Definition at line 1530 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by getFields(), SamValidator::isValid(), and SamFile::validateSortOrder().
{
myStatus = SamStatus::SUCCESS;
if(myReadName.Length() == 0)
{
// 0 Length, means that it is in the buffer, but has not yet
// been synced to the string, so do the sync.
myReadName = (char*)&(myRecordPtr->myData);
}
return myReadName.c_str();
}
| uint8_t SamRecord::getReadNameLength | ( | ) |
Get the length of the readname (QNAME) including the null.
Definition at line 1314 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamValidator::isValid().
{
myStatus = SamStatus::SUCCESS;
// If the buffer is valid, return the size from there, otherwise get the
// size from the string length + 1 (ending null).
if(myIsReadNameBufferValid)
{
return(myRecordPtr->myReadNameLength);
}
return(myReadName.Length() + 1);
}
| const void * SamRecord::getRecordBuffer | ( | ) |
Get a const pointer to the buffer that contains the BAM representation of the record.
Definition at line 1192 of file SamRecord.cpp.
{
return(getRecordBuffer(mySequenceTranslation));
}
| const void * SamRecord::getRecordBuffer | ( | SequenceTranslation | translation | ) |
Get a const pointer to the buffer that contains the BAM representation of the record using the specified translation on the sequence.
| translation | type of sequence translation to use. |
Definition at line 1199 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
bool status = true;
// If the buffer is not synced or the sequence in the buffer is not
// properly translated, fix the buffer.
if((myIsBufferSynced == false) ||
(myBufferSequenceTranslation != translation))
{
status &= fixBuffer(translation);
}
// If the buffer is synced, check to see if the tags need to be synced.
if(myNeedToSetTagsInBuffer)
{
status &= setTagsInBuffer();
}
if(!status)
{
return(NULL);
}
return (const void *)myRecordPtr;
}
Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it was not set).
Definition at line 1911 of file SamRecord.cpp.
Referenced by SamValidator::isValidTags().
{
return(myRefPtr);
}
| int32_t SamRecord::getReferenceID | ( | ) |
Get the reference sequence id of the record (BAM format rid).
Definition at line 1293 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamCoordOutput::add(), SamValidator::isValid(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignment(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignmentRegion(), and SamFile::validateSortOrder().
{
myStatus = SamStatus::SUCCESS;
return myRecordPtr->myReferenceID;
}
| const char * SamRecord::getReferenceName | ( | ) |
Get the reference sequence name (RNAME) of the record.
Definition at line 1286 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by PileupElement::addEntry(), SamTags::createMDTag(), getMateReferenceNameOrEqual(), getSequence(), SamValidator::isValid(), and SamQuerySeqWithRefIter::reset().
{
myStatus = SamStatus::SUCCESS;
return myReferenceName.c_str();
}
| const char * SamRecord::getSequence | ( | ) |
Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTranslation.
Definition at line 1556 of file SamRecord.cpp.
Referenced by PileupElementBaseQual::addEntry(), SamRecordHelper::checkSequence(), SamTags::createMDTag(), getFields(), SamQuerySeqWithRefIter::getNextMatchMismatch(), getSequence(), and shiftIndelsLeft().
{
return(getSequence(mySequenceTranslation));
}
| const char * SamRecord::getSequence | ( | SequenceTranslation | translation | ) |
Returns the SAM formatted sequence string (SEQ) performing the specified sequence translation.
| translation | type of sequence translation to use. |
Definition at line 1562 of file SamRecord.cpp.
References EQUAL, getCigarInfo(), getReferenceName(), NONE, SamQuerySeqWithRef::seqWithEquals(), SamQuerySeqWithRef::seqWithoutEquals(), and StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
if(mySequence.Length() == 0)
{
// 0 Length, means that it is in the buffer, but has not yet
// been synced to the string, so do the sync.
setSequenceAndQualityFromBuffer();
}
// Determine if translation needs to be done.
if((translation == NONE) || (myRefPtr == NULL))
{
return mySequence.c_str();
}
else if(translation == EQUAL)
{
if(mySeqWithEq.length() == 0)
{
// Check to see if the sequence is defined.
if(mySequence == "*")
{
// Sequence is undefined, so no translation necessary.
mySeqWithEq = '*';
}
else
{
// Sequence defined, so translate it.
SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
myRecordPtr->myPosition,
*(getCigarInfo()),
getReferenceName(),
*myRefPtr,
mySeqWithEq);
}
}
return(mySeqWithEq.c_str());
}
else
{
// translation == BASES
if(mySeqWithoutEq.length() == 0)
{
if(mySequence == "*")
{
// Sequence is undefined, so no translation necessary.
mySeqWithoutEq = '*';
}
else
{
// Sequence defined, so translate it.
SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
myRecordPtr->myPosition,
*(getCigarInfo()),
getReferenceName(),
*myRefPtr,
mySeqWithoutEq);
}
}
return(mySeqWithoutEq.c_str());
}
}
| char SamRecord::getSequence | ( | int | index | ) |
Get the sequence base at the specified index into this sequence 0 to readLength - 1, translating the base as specified by setSequenceTranslation.
Throws an exception if index is out of range.
| index | index into the sequence string (0 to readLength-1). |
Definition at line 1639 of file SamRecord.cpp.
References getSequence().
{
return(getSequence(index, mySequenceTranslation));
}
| char SamRecord::getSequence | ( | int | index, |
| SequenceTranslation | translation | ||
| ) |
Get the sequence base at the specified index into this sequence 0 to readLength - 1 performing the specified sequence translation.
Throws an exception if index is out of range.
| index | index into the sequence string (0 to readLength-1). |
| translation | type of sequence translation to use. |
Definition at line 1645 of file SamRecord.cpp.
References EQUAL, getCigarInfo(), getReadLength(), getReferenceName(), NONE, SamQuerySeqWithRef::seqWithEquals(), and SamQuerySeqWithRef::seqWithoutEquals().
{
static const char * asciiBases = "=AC.G...T......N";
// Determine the read length.
int32_t readLen = getReadLength();
// If the read length is 0, this method should not be called.
if(readLen == 0)
{
String exceptionString = "SamRecord::getSequence(";
exceptionString += index;
exceptionString += ") is not allowed since sequence = '*'";
throw std::runtime_error(exceptionString.c_str());
}
else if((index < 0) || (index >= readLen))
{
// Only get here if the index was out of range, so thow an exception.
String exceptionString = "SamRecord::getSequence(";
exceptionString += index;
exceptionString += ") is out of range. Index must be between 0 and ";
exceptionString += (readLen - 1);
throw std::runtime_error(exceptionString.c_str());
}
// Determine if translation needs to be done.
if((translation == NONE) || (myRefPtr == NULL))
{
// No translation needs to be done.
if(mySequence.Length() == 0)
{
// Parse BAM sequence.
if(myIsSequenceBufferValid)
{
return(index & 1 ?
asciiBases[myPackedSequence[index / 2] & 0xF] :
asciiBases[myPackedSequence[index / 2] >> 4]);
}
else
{
String exceptionString = "SamRecord::getSequence(";
exceptionString += index;
exceptionString += ") called with no sequence set";
throw std::runtime_error(exceptionString.c_str());
}
}
// Already have string.
return(mySequence[index]);
}
else
{
// Need to translate the sequence either to have '=' or to not
// have it.
// First check to see if the sequence has been set.
if(mySequence.Length() == 0)
{
// 0 Length, means that it is in the buffer, but has not yet
// been synced to the string, so do the sync.
setSequenceAndQualityFromBuffer();
}
// Check the type of translation.
if(translation == EQUAL)
{
// Check whether or not the string has already been
// retrieved that has the '=' in it.
if(mySeqWithEq.length() == 0)
{
// The string with '=' has not yet been determined,
// so get the string.
// Check to see if the sequence is defined.
if(mySequence == "*")
{
// Sequence is undefined, so no translation necessary.
mySeqWithEq = '*';
}
else
{
// Sequence defined, so translate it.
SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
myRecordPtr->myPosition,
*(getCigarInfo()),
getReferenceName(),
*myRefPtr,
mySeqWithEq);
}
}
// Sequence is set, so return it.
return(mySeqWithEq[index]);
}
else
{
// translation == BASES
// Check whether or not the string has already been
// retrieved that does not have the '=' in it.
if(mySeqWithoutEq.length() == 0)
{
// The string with '=' has not yet been determined,
// so get the string.
// Check to see if the sequence is defined.
if(mySequence == "*")
{
// Sequence is undefined, so no translation necessary.
mySeqWithoutEq = '*';
}
else
{
// Sequence defined, so translate it.
// The string without '=' has not yet been determined,
// so get the string.
SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
myRecordPtr->myPosition,
*(getCigarInfo()),
getReferenceName(),
*myRefPtr,
mySeqWithoutEq);
}
}
// Sequence is set, so return it.
return(mySeqWithoutEq[index]);
}
}
}
| const SamStatus & SamRecord::getStatus | ( | ) |
Returns the status associated with the last method that sets the status.
Definition at line 2391 of file SamRecord.cpp.
{
return(myStatus);
}
| const String * SamRecord::getStringTag | ( | const char * | tag | ) |
Get the string value for the specified tag.
| tag | tag to retrieve |
| pointer | to the tag's string value if found, NULL if not found. |
Definition at line 2168 of file SamRecord.cpp.
Referenced by SamTags::isMDTagCorrect(), and SamValidator::isValidTags().
{
// Parse the buffer if necessary.
if(myNeedToSetTagsFromBuffer)
{
if(!setTagsFromBuffer())
{
// Failed to read the tags from the buffer, so cannot
// get tags. setTagsFromBuffer set the errors,
// so just return null.
return(NULL);
}
}
int key = MAKEKEY(tag[0], tag[1], 'Z');
int offset = extras.Find(key);
int value;
if (offset < 0)
{
// Check for 'B' tag.
key = MAKEKEY(tag[0], tag[1], 'B');
offset = extras.Find(key);
if(offset < 0)
{
// Tag not found.
return(NULL);
}
}
// Offset is valid, so return the tag.
value = extras[offset];
return(&(strings[value]));
}
| uint32_t SamRecord::getTagLength | ( | ) |
Returns the length of the BAM formatted tags.
Definition at line 1917 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
if(myNeedToSetTagsFromBuffer)
{
// Tags are only set in the buffer, so the size of the tags is
// the length of the record minus the starting location of the tags.
unsigned char * tagStart =
(unsigned char *)myRecordPtr->myData
+ myRecordPtr->myReadNameLength
+ myRecordPtr->myCigarLength * sizeof(int)
+ (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength;
// The non-tags take up from the start of the record to the tag start.
// Do not include the block size part of the record since it is not
// included in the size.
uint32_t nonTagSize =
tagStart - (unsigned char*)&(myRecordPtr->myReferenceID);
// Tags take up the size of the block minus the non-tag section.
uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize;
return(tagSize);
}
// Tags are stored outside the buffer, so myTagBufferSize is set.
return(myTagBufferSize);
}
| bool SamRecord::getTagsString | ( | const char * | tags, |
| String & | returnString, | ||
| char | delim = '\t' |
||
| ) |
Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE...
Sets the Status to SUCCESS when the tags are successfully returned or the tags were not found. If a different error occured, the status is set appropriately. The delimiter between the tags to retrieve is ',' or ';'. ',' was added since the original delimiter, ';', requires the string to be quoted on the command-line.
| tags | the tags to retrieve, formatted as TAG:TYPE,TAG:TYPE... |
| returnString | the String to set (this method first clears returnString) to TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE... |
| delim | delimiter to use to separate two tags, default is a tab. |
Definition at line 2070 of file SamRecord.cpp.
References StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{
const char* currentTagPtr = tags;
returnString.Clear();
myStatus = SamStatus::SUCCESS;
if(myNeedToSetTagsFromBuffer)
{
if(!setTagsFromBuffer())
{
// Failed to read the tags from the buffer, so cannot
// get tags.
return(false);
}
}
bool returnStatus = true;
while(*currentTagPtr != '\0')
{
// Tags are formatted as: XY:Z
// Where X is [A-Za-z], Y is [A-Za-z], and
// Z is A,i,f,Z,H (cCsSI are also excepted)
if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
(currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
{
myStatus.setStatus(SamStatus::INVALID,
"getTagsString called with improperly formatted tags.\n");
returnStatus = false;
break;
}
// Construct the key.
int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
currentTagPtr[3]);
// Look to see if the key exsists in the hash.
int offset = extras.Find(key);
if(offset >= 0)
{
// Offset is set, so the key was found.
if(!returnString.IsEmpty())
{
returnString += delim;
}
returnString += currentTagPtr[0];
returnString += currentTagPtr[1];
returnString += ':';
returnString += currentTagPtr[3];
returnString += ':';
// First if it is an integer, determine the actual type of the int.
char vtype;
getTypeFromKey(key, vtype);
switch(vtype)
{
case 'i':
returnString += *(int*)getIntegerPtr(offset, vtype);
break;
case 'f':
returnString += *(float*)getFloatPtr(offset);
break;
case 'Z':
case 'B':
returnString += *(String*)getStringPtr(offset);
break;
default:
myStatus.setStatus(SamStatus::INVALID,
"rmTag called with unknown type.\n");
returnStatus = false;
break;
};
}
// Increment to the next tag.
if((currentTagPtr[4] == ';') || (currentTagPtr[4] == ','))
{
// Increment once more.
currentTagPtr += 5;
}
else if(currentTagPtr[4] != '\0')
{
// Invalid tag format.
myStatus.setStatus(SamStatus::INVALID,
"rmTags called with improperly formatted tags.\n");
returnStatus = false;
break;
}
else
{
// Last Tag.
currentTagPtr += 4;
}
}
return(returnStatus);
}
| bool SamRecord::isCharType | ( | char | vtype | ) | [static] |
Returns whether or not the specified vtype is a char type.
Does not set SamStatus.
| vtype | value type to check. |
Definition at line 2050 of file SamRecord.cpp.
Referenced by SamRecordHelper::genSamTagString().
{
if(vtype == 'A')
{
return(true);
}
return(false);
}
| bool SamRecord::isFloatType | ( | char | vtype | ) | [static] |
Returns whether or not the specified vtype is a float type.
Does not set SamStatus.
| vtype | value type to check. |
Definition at line 2040 of file SamRecord.cpp.
Referenced by SamRecordHelper::genSamTagString().
{
if(vtype == 'f')
{
return(true);
}
return(false);
}
| bool SamRecord::isIntegerType | ( | char | vtype | ) | [static] |
Returns whether or not the specified vtype is an integer type.
Does not set SamStatus.
| vtype | value type to check. |
Definition at line 2028 of file SamRecord.cpp.
Referenced by SamRecordHelper::genSamTagString().
{
if((vtype == 'c') || (vtype == 'C') ||
(vtype == 's') || (vtype == 'S') ||
(vtype == 'i') || (vtype == 'I'))
{
return(true);
}
return(false);
}
| bool SamRecord::isStringType | ( | char | vtype | ) | [static] |
Returns whether or not the specified vtype is a string type.
Does not set SamStatus.
| vtype | value type to check. |
Definition at line 2060 of file SamRecord.cpp.
Referenced by SamRecordHelper::genSamTagString().
{
if((vtype == 'Z') || (vtype == 'B'))
{
return(true);
}
return(false);
}
| bool SamRecord::isValid | ( | SamFileHeader & | header | ) |
Returns whether or not the record is valid, setting the status to indicate success or failure.
| header | SAM Header associated with the record. Used to perform some validation against the header. |
Definition at line 161 of file SamRecord.cpp.
References SamValidationErrors::getErrorString(), StatGenStatus::INVALID, SamValidator::isValid(), StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
SamValidationErrors invalidSamErrors;
if(!SamValidator::isValid(header, *this, invalidSamErrors))
{
// The record is not valid.
std::string errorMessage = "";
invalidSamErrors.getErrorString(errorMessage);
myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str());
return(false);
}
// The record is valid.
return(true);
}
| void SamRecord::resetRecord | ( | ) |
Reset the fields of the record to a default value.
This is not necessary when you are reading a SAM/BAM file, but if you are setting fields, it is a good idea to clean out a record before reusing it. Clearing it allows you to not have to set any empty fields.
Definition at line 91 of file SamRecord.cpp.
References clearTags(), NONE, and StatGenStatus::SUCCESS.
Referenced by SamRecord(), setBuffer(), setBufferFromFile(), and ~SamRecord().
{
myIsBufferSynced = true;
myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE;
myRecordPtr->myReferenceID = -1;
myRecordPtr->myPosition = -1;
myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
myRecordPtr->myMapQuality = 0;
myRecordPtr->myBin = DEFAULT_BIN;
myRecordPtr->myCigarLength = 0;
myRecordPtr->myFlag = 0;
myRecordPtr->myReadLength = 0;
myRecordPtr->myMateReferenceID = -1;
myRecordPtr->myMatePosition = -1;
myRecordPtr->myInsertSize = 0;
// Set the sam values for the variable length fields.
// TODO - one way to speed this up might be to not set to "*" and just
// clear them, and write out a '*' for SAM if it is empty.
myReadName = DEFAULT_READ_NAME;
myReferenceName = "*";
myMateReferenceName = "*";
myCigar = "*";
mySequence = "*";
mySeqWithEq.clear();
mySeqWithoutEq.clear();
myQuality = "*";
myNeedToSetTagsFromBuffer = false;
myNeedToSetTagsInBuffer = false;
// Initialize the calculated alignment info to the uncalculated value.
myAlignmentLength = -1;
myUnclippedStartOffset = -1;
myUnclippedEndOffset = -1;
clearTags();
// Set the bam values for the variable length fields.
// Only the read name needs to be set, the others are a length of 0.
// Set the read name. The min size of myRecordPtr includes the size for
// the default read name.
memcpy(&(myRecordPtr->myData), myReadName.c_str(),
myRecordPtr->myReadNameLength);
// Set that the variable length buffer fields are valid.
myIsReadNameBufferValid = true;
myIsCigarBufferValid = true;
myPackedSequence =
(unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength +
myRecordPtr->myCigarLength * sizeof(int);
myIsSequenceBufferValid = true;
myBufferSequenceTranslation = NONE;
myPackedQuality = myPackedSequence;
myIsQualityBufferValid = true;
myIsTagsBufferValid = true;
myIsBinValid = true;
myCigarTempBufferLength = -1;
myStatus = SamStatus::SUCCESS;
NOT_FOUND_TAG_STRING = "";
NOT_FOUND_TAG_INT = -1; // TODO - deprecate
}
| bool SamRecord::rmTag | ( | const char * | tag, |
| char | type | ||
| ) |
Remove a tag.
| tag | tag to remove. |
| type | of the tag to be removed. |
Definition at line 980 of file SamRecord.cpp.
References getString(), StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{
// Check the length of tag.
if(strlen(tag) != 2)
{
// Tag is the wrong length.
myStatus.setStatus(SamStatus::INVALID,
"rmTag called with tag that is not 2 characters\n");
return(false);
}
myStatus = SamStatus::SUCCESS;
if(myNeedToSetTagsFromBuffer)
{
if(!setTagsFromBuffer())
{
// Failed to read the tags from the buffer, so cannot
// get tags.
return(false);
}
}
// Construct the key.
int key = MAKEKEY(tag[0], tag[1], type);
// Look to see if the key exsists in the hash.
int offset = extras.Find(key);
if(offset < 0)
{
// Not found, so return true, successfully removed since
// it is not in tag.
return(true);
}
// Offset is set, so the key was found.
// First if it is an integer, determine the actual type of the int.
char vtype;
getTypeFromKey(key, vtype);
if(vtype == 'i')
{
vtype = getIntegerType(offset);
}
// Offset is set, so recalculate the buffer size without this entry.
// Do NOT remove from strings, integers, or floats because then
// extras would need to be updated for all entries with the new indexes
// into those variables.
int rmBuffSize = 0;
switch(vtype)
{
case 'A':
case 'c':
case 'C':
rmBuffSize = 4;
break;
case 's':
case 'S':
rmBuffSize = 5;
break;
case 'i':
case 'I':
rmBuffSize = 7;
break;
case 'f':
rmBuffSize = 7;
break;
case 'Z':
rmBuffSize = 4 + getString(offset).Length();
break;
case 'B':
rmBuffSize = 3 + getBtagBufferSize(getString(offset));
break;
default:
myStatus.setStatus(SamStatus::INVALID,
"rmTag called with unknown type.\n");
return(false);
break;
};
// The buffer tags are now out of sync.
myNeedToSetTagsInBuffer = true;
myIsTagsBufferValid = false;
myIsBufferSynced = false;
myTagBufferSize -= rmBuffSize;
// Remove from the hash.
extras.Delete(offset);
return(true);
}
| bool SamRecord::rmTags | ( | const char * | tags | ) |
Remove tags.
The delimiter between the tags is ',' or ';'. ',' was added since the original delimiter, ';', requires the string to be quoted on the command-line.
| tags | tags to remove, formatted as Tag:Type,Tag:Type,Tag:Type... |
Definition at line 1071 of file SamRecord.cpp.
References getString(), StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{
const char* currentTagPtr = tags;
myStatus = SamStatus::SUCCESS;
if(myNeedToSetTagsFromBuffer)
{
if(!setTagsFromBuffer())
{
// Failed to read the tags from the buffer, so cannot
// get tags.
return(false);
}
}
bool returnStatus = true;
int rmBuffSize = 0;
while(*currentTagPtr != '\0')
{
// Tags are formatted as: XY:Z
// Where X is [A-Za-z], Y is [A-Za-z], and
// Z is A,i,f,Z,H (cCsSI are also excepted)
if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
(currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
{
myStatus.setStatus(SamStatus::INVALID,
"rmTags called with improperly formatted tags.\n");
returnStatus = false;
break;
}
// Construct the key.
int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
currentTagPtr[3]);
// Look to see if the key exsists in the hash.
int offset = extras.Find(key);
if(offset >= 0)
{
// Offset is set, so the key was found.
// First if it is an integer, determine the actual type of the int.
char vtype;
getTypeFromKey(key, vtype);
if(vtype == 'i')
{
vtype = getIntegerType(offset);
}
// Offset is set, so recalculate the buffer size without this entry.
// Do NOT remove from strings, integers, or floats because then
// extras would need to be updated for all entries with the new indexes
// into those variables.
switch(vtype)
{
case 'A':
case 'c':
case 'C':
rmBuffSize += 4;
break;
case 's':
case 'S':
rmBuffSize += 5;
break;
case 'i':
case 'I':
rmBuffSize += 7;
break;
case 'f':
rmBuffSize += 7;
break;
case 'Z':
rmBuffSize += 4 + getString(offset).Length();
break;
case 'B':
rmBuffSize += 3 + getBtagBufferSize(getString(offset));
break;
default:
myStatus.setStatus(SamStatus::INVALID,
"rmTag called with unknown type.\n");
returnStatus = false;
break;
};
// Remove from the hash.
extras.Delete(offset);
}
// Increment to the next tag.
if((currentTagPtr[4] == ';') || (currentTagPtr[4] == ','))
{
// Increment once more.
currentTagPtr += 5;
}
else if(currentTagPtr[4] != '\0')
{
// Invalid tag format.
myStatus.setStatus(SamStatus::INVALID,
"rmTags called with improperly formatted tags.\n");
returnStatus = false;
break;
}
else
{
// Last Tag.
currentTagPtr += 4;
}
}
// The buffer tags are now out of sync.
myNeedToSetTagsInBuffer = true;
myIsTagsBufferValid = false;
myIsBufferSynced = false;
myTagBufferSize -= rmBuffSize;
return(returnStatus);
}
| bool SamRecord::set0BasedMatePosition | ( | int32_t | matePosition | ) |
Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
| position | 0-based start position |
Definition at line 328 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by set1BasedMatePosition().
{
myStatus = SamStatus::SUCCESS;
myRecordPtr->myMatePosition = matePosition;
return true;
}
| bool SamRecord::set0BasedPosition | ( | int32_t | position | ) |
Set the leftmost position using the specified 0-based (BAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
| position | 0-based start position |
Definition at line 242 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by set1BasedPosition(), and SamFilter::softClip().
{
myStatus = SamStatus::SUCCESS;
myRecordPtr->myPosition = position;
myIsBinValid = false;
return true;
}
| bool SamRecord::set1BasedMatePosition | ( | int32_t | matePosition | ) |
Set the mate/next fragment's leftmost position (PNEXT) using the specified 1-based (SAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
| position | 1-based start position |
Definition at line 322 of file SamRecord.cpp.
References set0BasedMatePosition().
{
return(set0BasedMatePosition(matePosition - 1));
}
| bool SamRecord::set1BasedPosition | ( | int32_t | position | ) |
Set the leftmost position (POS) using the specified 1-based (SAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
| position | 1-based start position |
Definition at line 236 of file SamRecord.cpp.
References set0BasedPosition().
{
return(set0BasedPosition(position - 1));
}
| SamStatus::Status SamRecord::setBuffer | ( | const char * | fromBuffer, |
| uint32_t | fromBufferSize, | ||
| SamFileHeader & | header | ||
| ) |
Sets the SamRecord to contain the information in the BAM formatted fromBuffer.
| fromBuffer | buffer to read the BAM record from. |
| fromBufferSize | size of the buffer containing the BAM record. |
| header | BAM header for the record. |
Definition at line 525 of file SamRecord.cpp.
References StatGenStatus::FAIL_MEM, StatGenStatus::FAIL_PARSE, resetRecord(), StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
if((fromBuffer == NULL) || (fromBufferSize == 0))
{
// Buffer is empty.
myStatus.setStatus(SamStatus::FAIL_PARSE,
"Cannot parse an empty file.");
return(SamStatus::FAIL_PARSE);
}
// Clear the record.
resetRecord();
// allocate space for the record size.
if(!allocateRecordStructure(fromBufferSize))
{
// Failed to allocate space.
return(SamStatus::FAIL_MEM);
}
memcpy(myRecordPtr, fromBuffer, fromBufferSize);
setVariablesForNewBuffer(header);
// Return the status of the record.
return(SamStatus::SUCCESS);
}
| SamStatus::Status SamRecord::setBufferFromFile | ( | IFILE | filePtr, |
| SamFileHeader & | header | ||
| ) |
Read the BAM record from a file.
| filePtr | file to read the buffer from. |
| header | BAM header for the record. |
Definition at line 558 of file SamRecord.cpp.
References StatGenStatus::FAIL_IO, StatGenStatus::FAIL_MEM, StatGenStatus::FAIL_ORDER, StatGenStatus::FAIL_PARSE, ifeof(), ifread(), InputFile::isOpen(), StatGenStatus::NO_MORE_RECS, resetRecord(), StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
if((filePtr == NULL) || (filePtr->isOpen() == false))
{
// File is not open, return failure.
myStatus.setStatus(SamStatus::FAIL_ORDER,
"Can't read from an unopened file.");
return(SamStatus::FAIL_ORDER);
}
// Clear the record.
resetRecord();
// read the record size.
int numBytes =
ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t));
// Check to see if the end of the file was hit and no bytes were read.
if(ifeof(filePtr) && (numBytes == 0))
{
// End of file, nothing was read, no more records.
myStatus.setStatus(SamStatus::NO_MORE_RECS,
"No more records left to read.");
return(SamStatus::NO_MORE_RECS);
}
if(numBytes != sizeof(int32_t))
{
// Failed to read the entire block size. Either the end of the file
// was reached early or there was an error.
if(ifeof(filePtr))
{
// Error: end of the file reached prior to reading the rest of the
// record.
myStatus.setStatus(SamStatus::FAIL_PARSE,
"EOF reached in the middle of a record.");
return(SamStatus::FAIL_PARSE);
}
else
{
// Error reading.
myStatus.setStatus(SamStatus::FAIL_IO,
"Failed to read the record size.");
return(SamStatus::FAIL_IO);
}
}
// allocate space for the record size.
if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t)))
{
// Failed to allocate space.
// Status is set by allocateRecordStructure.
return(SamStatus::FAIL_MEM);
}
// Read the rest of the alignment block, starting at the reference id.
if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize)
!= (unsigned int)myRecordPtr->myBlockSize)
{
// Error reading the record. Reset it and return failure.
resetRecord();
myStatus.setStatus(SamStatus::FAIL_IO,
"Failed to read the record");
return(SamStatus::FAIL_IO);
}
setVariablesForNewBuffer(header);
// Return the status of the record.
return(SamStatus::SUCCESS);
}
| bool SamRecord::setCigar | ( | const char * | cigar | ) |
Set the CIGAR to the specified SAM formatted cigar string.
Internal processing handles the switching between SAM/BAM formats when read/written.
| cigar | string containing the SAM formatted cigar. |
Definition at line 259 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamFilter::filterRead(), shiftIndelsLeft(), and SamFilter::softClip().
{
myStatus = SamStatus::SUCCESS;
myCigar = cigar;
myIsBufferSynced = false;
myIsCigarBufferValid = false;
myCigarTempBufferLength = -1;
myIsBinValid = false;
// Initialize the calculated alignment info to the uncalculated value.
myAlignmentLength = -1;
myUnclippedStartOffset = -1;
myUnclippedEndOffset = -1;
return true;
}
| bool SamRecord::setCigar | ( | const Cigar & | cigar | ) |
Set the CIGAR to the specified Cigar object.
Internal processing handles the switching between SAM/BAM formats when read/written.
| cigar | object to set this record's cigar to have. |
Definition at line 278 of file SamRecord.cpp.
References Cigar::getCigarString(), and StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
cigar.getCigarString(myCigar);
myIsBufferSynced = false;
myIsCigarBufferValid = false;
myCigarTempBufferLength = -1;
myIsBinValid = false;
// Initialize the calculated alignment info to the uncalculated value.
myAlignmentLength = -1;
myUnclippedStartOffset = -1;
myUnclippedEndOffset = -1;
return true;
}
| bool SamRecord::setFlag | ( | uint16_t | flag | ) |
Set the bitwise FLAG to the specified value.
| flag | integer flag to use. |
Definition at line 215 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamFilter::filterRead().
{
myStatus = SamStatus::SUCCESS;
myRecordPtr->myFlag = flag;
return true;
}
| bool SamRecord::setInsertSize | ( | int32_t | insertSize | ) |
Sets the inferred insert size (ISIZE)/observed template length (TLEN).
| insertSize | inferred insert size/observed template length. |
Definition at line 336 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
myRecordPtr->myInsertSize = insertSize;
return true;
}
| bool SamRecord::setMapQuality | ( | uint8_t | mapQuality | ) |
Set the mapping quality (MAPQ).
| mapQuality | map quality to set in the record. |
Definition at line 251 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamFilter::filterRead().
{
myStatus = SamStatus::SUCCESS;
myRecordPtr->myMapQuality = mapQuality;
return true;
}
| bool SamRecord::setMateReferenceName | ( | SamFileHeader & | header, |
| const char * | mateReferenceName | ||
| ) |
Set the mate/next fragment's reference sequence name (RNEXT) to the specified name, using the header to determine the mate reference id.
| header | SAM/BAM header to use to determine the mate reference id. |
| referenceName | mate reference name to use. |
Definition at line 297 of file SamRecord.cpp.
References SamFileHeader::getReferenceID(), and StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
// Set the mate reference, if it is "=", set it to be equal
// to myReferenceName. This assumes that myReferenceName has already
// been called.
if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0)
{
myMateReferenceName = myReferenceName;
}
else
{
myMateReferenceName = mateReferenceName;
}
// Set the Mate Reference ID.
// If the reference ID does not already exist, add it (pass true)
myRecordPtr->myMateReferenceID =
header.getReferenceID(myMateReferenceName, true);
return true;
}
| bool SamRecord::setQuality | ( | const char * | quality | ) |
Sets the quality (QUAL) to the specified SAM formatted quality string.
Internal processing handles switching between SAM/BAM formats when read/written.
| quality | SAM quality string. |
Definition at line 357 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
myQuality = quality;
myIsBufferSynced = false;
myIsQualityBufferValid = false;
return true;
}
| bool SamRecord::setReadName | ( | const char * | readName | ) |
Set QNAME to the passed in name.
| readName | the readname to set the QNAME to. |
Definition at line 193 of file SamRecord.cpp.
References StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{
myReadName = readName;
myIsBufferSynced = false;
myIsReadNameBufferValid = false;
myStatus = SamStatus::SUCCESS;
// The read name must at least have some length, otherwise this is a parsing
// error.
if(myReadName.Length() == 0)
{
// Invalid - reset ReadName return false.
myReadName = DEFAULT_READ_NAME;
myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
myStatus.setStatus(SamStatus::INVALID, "0 length Query Name.");
return(false);
}
return true;
}
| void SamRecord::setReference | ( | GenomeSequence * | reference | ) |
Set the reference to the specified genome sequence object.
| reference | pointer to the GenomeSequence object. |
Definition at line 178 of file SamRecord.cpp.
Referenced by SamFile::GetNumOverlaps(), SamFile::ReadRecord(), SamFile::validateSortOrder(), and SamFile::WriteRecord().
{
myRefPtr = reference;
}
| bool SamRecord::setReferenceName | ( | SamFileHeader & | header, |
| const char * | referenceName | ||
| ) |
Set the reference sequence name (RNAME) to the specified name, using the header to determine the reference id.
| header | SAM/BAM header to use to determine the reference id. |
| referenceName | reference name to use. |
Definition at line 223 of file SamRecord.cpp.
References SamFileHeader::getReferenceID(), and StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
myReferenceName = referenceName;
// If the reference ID does not already exist, add it (pass true)
myRecordPtr->myReferenceID = header.getReferenceID(referenceName, true);
return true;
}
| bool SamRecord::setSequence | ( | const char * | seq | ) |
Sets the sequence (SEQ) to the specified SAM formatted sequence string.
Internal processing handles switching between SAM/BAM formats when read/written.
| seq | SAM sequence string. May contain '='. |
Definition at line 344 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
mySequence = seq;
mySeqWithEq.clear();
mySeqWithoutEq.clear();
myIsBufferSynced = false;
myIsSequenceBufferValid = false;
return true;
}
| void SamRecord::setSequenceTranslation | ( | SequenceTranslation | translation | ) |
Set the type of sequence translation to use when getting the sequence.
The default type (if this method is never called) is NONE (the sequence is left as-is). Can be over-ridden by using the accessors that take a SequenceTranslation parameter.
| translation | type of sequence translation to use. |
Definition at line 187 of file SamRecord.cpp.
Referenced by SamFile::GetNumOverlaps(), SamFile::ReadRecord(), and SamFile::validateSortOrder().
{
mySequenceTranslation = translation;
}
| bool SamRecord::shiftIndelsLeft | ( | ) |
Shift the indels (if any) to the left by updating the CIGAR.
Definition at line 368 of file SamRecord.cpp.
References BASES, Cigar::foundInQuery(), getSequence(), CigarRoller::IncrementCount(), Cigar::insert, Cigar::isMatchOrMismatch(), CigarRoller::Remove(), setCigar(), Cigar::size(), and CigarRoller::Update().
{
// Check to see whether or not the Cigar has already been
// set - this is determined by checking if alignment length
// is set since alignment length and the cigar are set
// at the same time.
if(myAlignmentLength == -1)
{
// Not been set, so calculate it.
parseCigar();
}
// Track whether or not there was a shift.
bool shifted = false;
// Cigar is set, so now myCigarRoller can be used.
// Track where in the read we are.
uint32_t currentPos = 0;
// Since the loop starts at 1 because the first operation can't be shifted,
// increment the currentPos past the first operation.
if(Cigar::foundInQuery(myCigarRoller[0]))
{
// This op was found in the read, increment the current position.
currentPos += myCigarRoller[0].count;
}
int numOps = myCigarRoller.size();
// Loop through the cigar operations from the 2nd operation since
// the first operation is already on the end and can't shift.
for(int currentOp = 1; currentOp < numOps; currentOp++)
{
if(myCigarRoller[currentOp].operation == Cigar::insert)
{
// For now, only shift a max of 1 operation.
int prevOpIndex = currentOp-1;
// Track the next op for seeing if it is the same as the
// previous for merging reasons.
int nextOpIndex = currentOp+1;
if(nextOpIndex == numOps)
{
// There is no next op, so set it equal to the current one.
nextOpIndex = currentOp;
}
// The start of the previous operation, so we know when we hit it
// so we don't shift past it.
uint32_t prevOpStart =
currentPos - myCigarRoller[prevOpIndex].count;
// We can only shift if the previous operation
if(!Cigar::isMatchOrMismatch(myCigarRoller[prevOpIndex]))
{
// TODO - shift past pads
// An insert is in the read, so increment the position.
currentPos += myCigarRoller[currentOp].count;
// Not a match/mismatch, so can't shift into it.
continue;
}
// It is a match or mismatch, so check to see if we can
// shift into it.
// The end of the insert is calculated by adding the size
// of this insert minus 1 to the start of the insert.
uint32_t insertEndPos =
currentPos + myCigarRoller[currentOp].count - 1;
// The insert starts at the current position.
uint32_t insertStartPos = currentPos;
// Loop as long as the position before the insert start
// matches the last character in the insert. If they match,
// the insert can be shifted one index left because the
// implied reference will not change. If they do not match,
// we can't shift because the implied reference would change.
// Stop loop when insertStartPos = prevOpStart, because we
// don't want to move past that.
while((insertStartPos > prevOpStart) &&
(getSequence(insertEndPos,BASES) ==
getSequence(insertStartPos - 1, BASES)))
{
// We can shift, so move the insert start & end one left.
--insertEndPos;
--insertStartPos;
}
// Determine if a shift has occurred.
int shiftLen = currentPos - insertStartPos;
if(shiftLen > 0)
{
// Shift occured, so adjust the cigar if the cigar will
// not become more operations.
// If the next operation is the same as the previous or
// if the insert and the previous operation switch positions
// then the cigar has the same number of operations.
// If the next operation is different, and the shift splits
// the previous operation in 2, then the cigar would
// become longer, so we do not want to shift.
if(myCigarRoller[nextOpIndex].operation ==
myCigarRoller[prevOpIndex].operation)
{
// The operations are the same, so merge them by adding
// the length of the shift to the next operation.
myCigarRoller.IncrementCount(nextOpIndex, shiftLen);
myCigarRoller.IncrementCount(prevOpIndex, -shiftLen);
// If the previous op length is 0, just remove that
// operation.
if(myCigarRoller[prevOpIndex].count == 0)
{
myCigarRoller.Remove(prevOpIndex);
}
shifted = true;
}
else
{
// Can only shift if the insert shifts past the
// entire previous operation, otherwise an operation
// would need to be added.
if(insertStartPos == prevOpStart)
{
// Swap the positions of the insert and the
// previous operation.
myCigarRoller.Update(currentOp,
myCigarRoller[prevOpIndex].operation,
myCigarRoller[prevOpIndex].count);
// Size of the previous op is the entire
// shift length.
myCigarRoller.Update(prevOpIndex,
Cigar::insert,
shiftLen);
shifted = true;
}
}
}
// An insert is in the read, so increment the position.
currentPos += myCigarRoller[currentOp].count;
}
else if(Cigar::foundInQuery(myCigarRoller[currentOp]))
{
// This op was found in the read, increment the current position.
currentPos += myCigarRoller[currentOp].count;
}
}
if(shifted)
{
// TODO - setCigar is currently inefficient because later the cigar
// roller will be recalculated, but for now it will work.
setCigar(myCigarRoller);
}
return(shifted);
}
| SamStatus::Status SamRecord::writeRecordBuffer | ( | IFILE | filePtr | ) |
Write the record as a BAM into the specified already opened file.
| filePtr | file to write the BAM record into. |
Definition at line 1225 of file SamRecord.cpp.
{
return(writeRecordBuffer(filePtr, mySequenceTranslation));
}
| SamStatus::Status SamRecord::writeRecordBuffer | ( | IFILE | filePtr, |
| SequenceTranslation | translation | ||
| ) |
Write the record as a BAM into the specified already opened file using the specified translation on the sequence.
| filePtr | file to write the BAM record into. |
| translation | type of sequence translation to use. |
Definition at line 1232 of file SamRecord.cpp.
References StatGenStatus::FAIL_IO, StatGenStatus::FAIL_ORDER, StatGenStatus::getStatus(), ifwrite(), InputFile::isOpen(), StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{
myStatus = SamStatus::SUCCESS;
if((filePtr == NULL) || (filePtr->isOpen() == false))
{
// File is not open, return failure.
myStatus.setStatus(SamStatus::FAIL_ORDER,
"Can't write to an unopened file.");
return(SamStatus::FAIL_ORDER);
}
if((myIsBufferSynced == false) ||
(myBufferSequenceTranslation != translation))
{
if(!fixBuffer(translation))
{
return(myStatus.getStatus());
}
}
// Write the record.
unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t);
unsigned int numBytesWritten =
ifwrite(filePtr, myRecordPtr, numBytesToWrite);
// Return status based on if the correct number of bytes were written.
if(numBytesToWrite == numBytesWritten)
{
return(SamStatus::SUCCESS);
}
// The correct number of bytes were not written.
myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record.");
return(SamStatus::FAIL_IO);
}