CigarRoller.cpp

00001 /*
00002  *  Copyright (C) 2010-2011  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include <stdio.h>
00019 #include <stdlib.h>
00020 #include "CigarRoller.h"
00021 
00022 ////////////////////////////////////////////////////////////////////////
00023 //
00024 // Cigar Roller Class
00025 //
00026 
00027 
00028 CigarRoller & CigarRoller::operator += (CigarRoller &rhs)
00029 {
00030     std::vector<CigarOperator>::iterator i;
00031     for (i = rhs.cigarOperations.begin(); i != rhs.cigarOperations.end(); i++)
00032     {
00033         (*this) += *i;
00034     }
00035     return *this;
00036 }
00037 
00038 
00039 //
00040 // Append a new operator at the end of the sequence.
00041 //
00042 CigarRoller & CigarRoller::operator += (const CigarOperator &rhs)
00043 {
00044     // Adding to the cigar, so the query & reference indexes would be
00045     // incomplete, so just clear them.
00046     clearQueryAndReferenceIndexes();
00047 
00048     if (rhs.count==0)
00049     {
00050         // nothing to do
00051     }
00052     else if (cigarOperations.empty() || cigarOperations.back() != rhs)
00053     {
00054         cigarOperations.push_back(rhs);
00055     }
00056     else
00057     {
00058         // last stored operation is the same as the new one, so just add it in
00059         cigarOperations.back().count += rhs.count;
00060     }
00061     return *this;
00062 }
00063 
00064 
00065 CigarRoller & CigarRoller::operator = (CigarRoller &rhs)
00066 {
00067     clear();
00068 
00069     (*this) += rhs;
00070     
00071     return *this;
00072 }
00073 
00074 
00075 //
00076 void CigarRoller::Add(Operation operation, int count)
00077 {
00078     CigarOperator rhs(operation, count);
00079     (*this) += rhs;
00080 }
00081 
00082 
00083 void CigarRoller::Add(char operation, int count)
00084 {
00085     switch (operation)
00086     {
00087         case 0:
00088         case 'M':
00089             Add(match, count);
00090             break;
00091         case 1:
00092         case 'I':
00093             Add(insert, count);
00094             break;
00095         case 2:
00096         case 'D':
00097             Add(del, count);
00098             break;
00099         case 3:
00100         case 'N':
00101             Add(skip, count);
00102             break;
00103         case 4:
00104         case 'S':
00105             Add(softClip, count);
00106             break;
00107         case 5:
00108         case 'H':
00109             Add(hardClip, count);
00110             break;
00111         case 6:
00112         case 'P':
00113             Add(pad, count);
00114             break;
00115         default:
00116             // Hmmm... what to do?
00117             std::cerr << "ERROR "
00118                       << "(" << __FILE__ << ":" << __LINE__ <<"): "
00119                       << "Parsing CIGAR - invalid character found "
00120                       << "with parameter " << operation << " and " << count
00121                       << std::endl;
00122             break;
00123     }
00124 }
00125 
00126 
00127 void CigarRoller::Add(const char *cigarString)
00128 {
00129     int operationCount = 0;
00130     while (*cigarString)
00131     {
00132         if (isdigit(*cigarString))
00133         {
00134             char *endPtr;
00135             operationCount = strtol((char *) cigarString, &endPtr, 10);
00136             cigarString = endPtr;
00137         }
00138         else
00139         {
00140             Add(*cigarString, operationCount);
00141             cigarString++;
00142         }
00143     }
00144 }
00145 
00146 
00147 bool CigarRoller::Remove(int index)
00148 {
00149     if((index < 0) || ((unsigned int)index >= cigarOperations.size()))
00150     {
00151         // can't remove, out of range, return false.
00152         return(false);
00153     }
00154     cigarOperations.erase(cigarOperations.begin() + index);
00155     // Modifying the cigar, so the query & reference indexes are out of date,
00156     // so clear them.
00157     clearQueryAndReferenceIndexes();
00158     return(true);
00159 }
00160 
00161 
00162 bool CigarRoller::IncrementCount(int index, int increment)
00163 {
00164     if((index < 0) || ((unsigned int)index >= cigarOperations.size()))
00165     {
00166         // can't update, out of range, return false.
00167         return(false);
00168     }
00169     cigarOperations[index].count += increment;
00170 
00171     // Modifying the cigar, so the query & reference indexes are out of date,
00172     // so clear them.
00173     clearQueryAndReferenceIndexes();
00174     return(true);
00175 }
00176 
00177 
00178 bool CigarRoller::Update(int index, Operation op, int count)
00179 {
00180     if((index < 0) || ((unsigned int)index >= cigarOperations.size()))
00181     {
00182         // can't update, out of range, return false.
00183         return(false);
00184     }
00185     cigarOperations[index].operation = op;
00186     cigarOperations[index].count = count;
00187 
00188     // Modifying the cigar, so the query & reference indexes are out of date,
00189     // so clear them.
00190     clearQueryAndReferenceIndexes();
00191     return(true);
00192 }
00193 
00194 
00195 void CigarRoller::Set(const char *cigarString)
00196 {
00197     clear();
00198     Add(cigarString);
00199 }
00200 
00201 
00202 void CigarRoller::Set(const uint32_t* cigarBuffer, uint16_t bufferLen)
00203 {
00204     clear();
00205 
00206     // Parse the buffer.
00207     for (int i = 0; i < bufferLen; i++)
00208     {
00209         int opLen = cigarBuffer[i] >> 4;
00210 
00211         Add(cigarBuffer[i] & 0xF, opLen);
00212     }
00213 }
00214 
00215 
00216 //
00217 // when we examine CIGAR strings, we need to know how
00218 // many cumulative insert and delete positions there are
00219 // so that we can adjust the read location appropriately.
00220 //
00221 // Here, we iterate over the vector of CIGAR operations,
00222 // summaring the count for each insert or delete (insert
00223 // increases the offset, delete decreases it).
00224 //
00225 // The use case for this is when we have a genome match
00226 // position based on an index word other than the first one,
00227 // and there is also a insert or delete between the beginning
00228 // of the read and the index word.  We can't simply report
00229 // the match position without taking into account the indels,
00230 // otherwise we'll be off by N where N is the sum of this
00231 // indel count.
00232 //
00233 // DEPRECATED - do not use.  There are better ways to accomplish that by using
00234 // read lengths, reference lengths, span of the read, etc.
00235 int CigarRoller::getMatchPositionOffset()
00236 {
00237     int offset = 0;
00238     std::vector<CigarOperator>::iterator i;
00239 
00240     for (i = cigarOperations.begin(); i != cigarOperations.end(); i++)
00241     {
00242         switch (i->operation)
00243         {
00244             case insert:
00245                 offset += i->count;
00246                 break;
00247             case del:
00248                 offset -= i->count;
00249                 break;
00250                 // TODO anything for case skip:????
00251             default:
00252                 break;
00253         }
00254     }
00255     return offset;
00256 }
00257 
00258 
00259 //
00260 // Get the string reprentation of the Cigar operations in this object.
00261 // Caller must delete the returned value.
00262 //
00263 const char * CigarRoller::getString()
00264 {
00265     // NB: the exact size of the string is not important, it just needs to be guaranteed
00266     // larger than the largest number of characters we could put into it.
00267 
00268     // we do not explicitly manage memory usage, and we expect when program exits, the memory used here will be freed
00269     static char *ret = NULL;
00270     static unsigned int retSize = 0;
00271 
00272     if (ret == NULL)
00273     {
00274         retSize = cigarOperations.size() * 12 + 1;  // 12 == a magic number -> > 1 + log base 10 of MAXINT
00275         ret = (char*) malloc(sizeof(char) * retSize);
00276         assert(ret != NULL);
00277 
00278     }
00279     else
00280     {
00281         // currently, ret pointer has enough memory to use
00282         if (retSize > cigarOperations.size() * 12 + 1)
00283         {
00284         }
00285         else
00286         {
00287             retSize = cigarOperations.size() * 12 + 1;
00288             free(ret);
00289             ret = (char*) malloc(sizeof(char) * retSize);
00290         }
00291         assert(ret != NULL);
00292     }
00293 
00294     char *ptr = ret;
00295     char buf[12];   // > 1 + log base 10 of MAXINT
00296 
00297     std::vector<CigarOperator>::iterator i;
00298 
00299     // Progressively append the character representations of the operations to
00300     // the cigar string we allocated above.
00301 
00302     *ptr = '\0';    // clear result string
00303     for (i = cigarOperations.begin(); i != cigarOperations.end(); i++)
00304     {
00305         sprintf(buf, "%d%c", (*i).count, (*i).getChar());
00306         strcat(ptr, buf);
00307         while (*ptr)
00308         {
00309             ptr++;    // limit the cost of strcat above
00310         }
00311     }
00312     return ret;
00313 }
00314 
00315 
00316 void CigarRoller::clear()
00317 {
00318     // Clearing the cigar, so the query & reference indexes are out of
00319     // date, so clear them.
00320     clearQueryAndReferenceIndexes();
00321     cigarOperations.clear();
00322 }
Generated on Tue Aug 23 18:19:05 2011 for libStatGen Software by  doxygen 1.6.3