libStatGen Software  1
CigarRoller.cpp
00001 /*
00002  *  Copyright (C) 2010-2011  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include <stdio.h>
00019 #include <stdlib.h>
00020 #include <ctype.h>
00021 #include "CigarRoller.h"
00022 
00023 ////////////////////////////////////////////////////////////////////////
00024 //
00025 // Cigar Roller Class
00026 //
00027 
00028 
00029 CigarRoller & CigarRoller::operator += (CigarRoller &rhs)
00030 {
00031     std::vector<CigarOperator>::iterator i;
00032     for (i = rhs.cigarOperations.begin(); i != rhs.cigarOperations.end(); i++)
00033     {
00034         (*this) += *i;
00035     }
00036     return *this;
00037 }
00038 
00039 
00040 //
00041 // Append a new operator at the end of the sequence.
00042 //
00043 CigarRoller & CigarRoller::operator += (const CigarOperator &rhs)
00044 {
00045     // Adding to the cigar, so the query & reference indexes would be
00046     // incomplete, so just clear them.
00047     clearQueryAndReferenceIndexes();
00048 
00049     if (rhs.count==0)
00050     {
00051         // nothing to do
00052     }
00053     else if (cigarOperations.empty() || cigarOperations.back() != rhs)
00054     {
00055         cigarOperations.push_back(rhs);
00056     }
00057     else
00058     {
00059         // last stored operation is the same as the new one, so just add it in
00060         cigarOperations.back().count += rhs.count;
00061     }
00062     return *this;
00063 }
00064 
00065 
00066 CigarRoller & CigarRoller::operator = (CigarRoller &rhs)
00067 {
00068     clear();
00069 
00070     (*this) += rhs;
00071     
00072     return *this;
00073 }
00074 
00075 
00076 //
00077 void CigarRoller::Add(Operation operation, int count)
00078 {
00079     CigarOperator rhs(operation, count);
00080     (*this) += rhs;
00081 }
00082 
00083 
00084 void CigarRoller::Add(char operation, int count)
00085 {
00086     switch (operation)
00087     {
00088         case 0:
00089         case 'M':
00090             Add(match, count);
00091             break;
00092         case 1:
00093         case 'I':
00094             Add(insert, count);
00095             break;
00096         case 2:
00097         case 'D':
00098             Add(del, count);
00099             break;
00100         case 3:
00101         case 'N':
00102             Add(skip, count);
00103             break;
00104         case 4:
00105         case 'S':
00106             Add(softClip, count);
00107             break;
00108         case 5:
00109         case 'H':
00110             Add(hardClip, count);
00111             break;
00112         case 6:
00113         case 'P':
00114             Add(pad, count);
00115             break;
00116         case 7:
00117         case '=':
00118             Add(match, count);
00119             break;
00120         case 8:
00121         case 'X':
00122             Add(match, count);
00123             break;
00124         default:
00125             // Hmmm... what to do?
00126             std::cerr << "ERROR "
00127                       << "(" << __FILE__ << ":" << __LINE__ <<"): "
00128                       << "Parsing CIGAR - invalid character found "
00129                       << "with parameter " << operation << " and " << count
00130                       << std::endl;
00131             break;
00132     }
00133 }
00134 
00135 
00136 void CigarRoller::Add(const char *cigarString)
00137 {
00138     int operationCount = 0;
00139     while (*cigarString)
00140     {
00141         if (isdigit(*cigarString))
00142         {
00143             char *endPtr;
00144             operationCount = strtol((char *) cigarString, &endPtr, 10);
00145             cigarString = endPtr;
00146         }
00147         else
00148         {
00149             Add(*cigarString, operationCount);
00150             cigarString++;
00151         }
00152     }
00153 }
00154 
00155 
00156 bool CigarRoller::Remove(int index)
00157 {
00158     if((index < 0) || ((unsigned int)index >= cigarOperations.size()))
00159     {
00160         // can't remove, out of range, return false.
00161         return(false);
00162     }
00163     cigarOperations.erase(cigarOperations.begin() + index);
00164     // Modifying the cigar, so the query & reference indexes are out of date,
00165     // so clear them.
00166     clearQueryAndReferenceIndexes();
00167     return(true);
00168 }
00169 
00170 
00171 bool CigarRoller::IncrementCount(int index, int increment)
00172 {
00173     if((index < 0) || ((unsigned int)index >= cigarOperations.size()))
00174     {
00175         // can't update, out of range, return false.
00176         return(false);
00177     }
00178     cigarOperations[index].count += increment;
00179 
00180     // Modifying the cigar, so the query & reference indexes are out of date,
00181     // so clear them.
00182     clearQueryAndReferenceIndexes();
00183     return(true);
00184 }
00185 
00186 
00187 bool CigarRoller::Update(int index, Operation op, int count)
00188 {
00189     if((index < 0) || ((unsigned int)index >= cigarOperations.size()))
00190     {
00191         // can't update, out of range, return false.
00192         return(false);
00193     }
00194     cigarOperations[index].operation = op;
00195     cigarOperations[index].count = count;
00196 
00197     // Modifying the cigar, so the query & reference indexes are out of date,
00198     // so clear them.
00199     clearQueryAndReferenceIndexes();
00200     return(true);
00201 }
00202 
00203 
00204 void CigarRoller::Set(const char *cigarString)
00205 {
00206     clear();
00207     Add(cigarString);
00208 }
00209 
00210 
00211 void CigarRoller::Set(const uint32_t* cigarBuffer, uint16_t bufferLen)
00212 {
00213     clear();
00214 
00215     // Parse the buffer.
00216     for (int i = 0; i < bufferLen; i++)
00217     {
00218         int opLen = cigarBuffer[i] >> 4;
00219 
00220         Add(cigarBuffer[i] & 0xF, opLen);
00221     }
00222 }
00223 
00224 
00225 //
00226 // when we examine CIGAR strings, we need to know how
00227 // many cumulative insert and delete positions there are
00228 // so that we can adjust the read location appropriately.
00229 //
00230 // Here, we iterate over the vector of CIGAR operations,
00231 // summaring the count for each insert or delete (insert
00232 // increases the offset, delete decreases it).
00233 //
00234 // The use case for this is when we have a genome match
00235 // position based on an index word other than the first one,
00236 // and there is also a insert or delete between the beginning
00237 // of the read and the index word.  We can't simply report
00238 // the match position without taking into account the indels,
00239 // otherwise we'll be off by N where N is the sum of this
00240 // indel count.
00241 //
00242 // DEPRECATED - do not use.  There are better ways to accomplish that by using
00243 // read lengths, reference lengths, span of the read, etc.
00244 int CigarRoller::getMatchPositionOffset()
00245 {
00246     int offset = 0;
00247     std::vector<CigarOperator>::iterator i;
00248 
00249     for (i = cigarOperations.begin(); i != cigarOperations.end(); i++)
00250     {
00251         switch (i->operation)
00252         {
00253             case insert:
00254                 offset += i->count;
00255                 break;
00256             case del:
00257                 offset -= i->count;
00258                 break;
00259                 // TODO anything for case skip:????
00260             default:
00261                 break;
00262         }
00263     }
00264     return offset;
00265 }
00266 
00267 
00268 //
00269 // Get the string reprentation of the Cigar operations in this object.
00270 // Caller must delete the returned value.
00271 //
00272 const char * CigarRoller::getString()
00273 {
00274     // NB: the exact size of the string is not important, it just needs to be guaranteed
00275     // larger than the largest number of characters we could put into it.
00276 
00277     // we do not explicitly manage memory usage, and we expect when program exits, the memory used here will be freed
00278     static char *ret = NULL;
00279     static unsigned int retSize = 0;
00280 
00281     if (ret == NULL)
00282     {
00283         retSize = cigarOperations.size() * 12 + 1;  // 12 == a magic number -> > 1 + log base 10 of MAXINT
00284         ret = (char*) malloc(sizeof(char) * retSize);
00285         assert(ret != NULL);
00286 
00287     }
00288     else
00289     {
00290         // currently, ret pointer has enough memory to use
00291         if (retSize > cigarOperations.size() * 12 + 1)
00292         {
00293         }
00294         else
00295         {
00296             retSize = cigarOperations.size() * 12 + 1;
00297             free(ret);
00298             ret = (char*) malloc(sizeof(char) * retSize);
00299         }
00300         assert(ret != NULL);
00301     }
00302 
00303     char *ptr = ret;
00304     char buf[12];   // > 1 + log base 10 of MAXINT
00305 
00306     std::vector<CigarOperator>::iterator i;
00307 
00308     // Progressively append the character representations of the operations to
00309     // the cigar string we allocated above.
00310 
00311     *ptr = '\0';    // clear result string
00312     for (i = cigarOperations.begin(); i != cigarOperations.end(); i++)
00313     {
00314         sprintf(buf, "%d%c", (*i).count, (*i).getChar());
00315         strcat(ptr, buf);
00316         while (*ptr)
00317         {
00318             ptr++;    // limit the cost of strcat above
00319         }
00320     }
00321     return ret;
00322 }
00323 
00324 
00325 void CigarRoller::clear()
00326 {
00327     // Clearing the cigar, so the query & reference indexes are out of
00328     // date, so clear them.
00329     clearQueryAndReferenceIndexes();
00330     cigarOperations.clear();
00331 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends