libStatGen Software  1
NonOverlapRegions.cpp
00001 /*
00002  *  Copyright (C) 2011  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 //////////////////////////////////////////////////////////////////////////
00019 
00020 #include "NonOverlapRegions.h"
00021 #include <iostream>
00022 
00023 NonOverlapRegions::NonOverlapRegions()
00024     : myRegions()
00025 {
00026 }
00027 
00028 
00029 NonOverlapRegions::~NonOverlapRegions()
00030 {
00031     myRegions.clear();
00032 }
00033 
00034 
00035 void NonOverlapRegions::add(const char* chrom, int32_t start, int32_t end)
00036 {
00037     // Add the region.
00038     myRegions[chrom].add(start, end);
00039 }
00040 
00041 
00042 bool NonOverlapRegions::inRegion(const char* chrom, int32_t pos)
00043 {
00044     // Return whether or not the position was found within a region.
00045     // Note, this will create a NonOverlapRegion for this chrom if it
00046     // did not already exist, but it won't have any regions.
00047     return(myRegions[chrom].inRegion(pos));
00048 }
00049 
00050 
00051 NonOverlapRegionPos::NonOverlapRegionPos()
00052     : myRegions()
00053 {
00054     myRegionIter = myRegions.begin();
00055     myTmpIter = myRegions.begin();
00056 }
00057 
00058 
00059 NonOverlapRegionPos::NonOverlapRegionPos(const NonOverlapRegionPos& reg)
00060     : myRegions()
00061 {
00062     myRegionIter = myRegions.begin();
00063     myTmpIter = myRegions.begin();
00064 }
00065 
00066 
00067 NonOverlapRegionPos::~NonOverlapRegionPos()
00068 {
00069     myRegionIter = myRegions.begin();
00070     myTmpIter = myRegions.begin();
00071     myRegions.clear();
00072 }
00073 
00074 
00075 void NonOverlapRegionPos::add(int32_t start, int32_t end)
00076 {
00077     // Check to see if the start/end are valid in relation.
00078     if(start >= end)
00079     {
00080         std::cerr << "NonOverlapRegionPos::add: Invalid Range, "
00081                   << "start must be < end, but " << start << " >= " << end 
00082                   << std::endl;
00083         return;
00084     }
00085 
00086     bool added = false;
00087     // Locate the correct position in the region list for this start/end.
00088     if(inRegion(start))
00089     {
00090         // Check if the region end needs to be updated.
00091         if(end > myRegionIter->second)
00092         {
00093             myRegionIter->second = end;
00094         }
00095         added = true;
00096     }
00097     else
00098     {
00099         // Check to see if we are at the end.
00100         if(myRegionIter != myRegions.end())
00101         {
00102             // Not at the end.
00103             // Check to see if the region overlaps the current region.
00104             if(end >= myRegionIter->first)
00105             {
00106                 // Overlaps, so update the start.
00107                 myRegionIter->first = start;
00108                 // Check if the end needs to be updated.
00109                 if(myRegionIter->second < end)
00110                 {
00111                     myRegionIter->second = end;
00112                 }
00113                 added = true;
00114             }
00115         }
00116     }
00117 
00118     // If we already added the record, check to see if the end of the
00119     // new region overlaps any additional regions (know that myRegionIter is
00120     // not at the end.
00121     if(added)
00122     {
00123         // Check to see if any other regions were overlapped by this record.
00124         myTmpIter = myRegionIter;
00125         ++myTmpIter;
00126         while(myTmpIter != myRegions.end())
00127         {
00128             // If the region starts before the end of this one, consume it.
00129             if(myTmpIter->first <= end)
00130             {
00131                 if(myTmpIter->second > myRegionIter->second)
00132                 {
00133                     // Update this region with the new end.
00134                     myRegionIter->second = myTmpIter->second;
00135                 }
00136                 
00137                 myTmpIter = myRegions.erase(myTmpIter);
00138             }
00139             else
00140             {
00141                 // This region is not overlapped by the new region, so stop.
00142                 break;
00143             }
00144         }
00145     }
00146     else
00147     {
00148         // Add the region.
00149         myRegionIter = myRegions.insert(myRegionIter, 
00150                                          std::make_pair(start, end));
00151     }
00152 }
00153 
00154 
00155 bool NonOverlapRegionPos::inRegion(int32_t pos)
00156 {
00157     // Return whether or not the position was found within a region.
00158     // If it is found within the region, myRegionIter will point to the region
00159     // otherwise myRegionIter will point to the region after the position 
00160     // or to the end if the position is after the last region.
00161 
00162     // Determine if it needs to search to the left
00163     //   a) it is at the end
00164     //   b) the region starts after the position.
00165     if(myRegionIter == myRegions.end())
00166     {
00167         // If the iterator is at the end, search to the left.
00168         return(findLeft(pos));
00169     }
00170     else if(pos < myRegionIter->first)
00171     {
00172         // Not at the end, so search left if the position is less
00173         // than this region's start.
00174         return(findLeft(pos));
00175     }
00176     else
00177     {
00178         return(findRight(pos));
00179     }
00180 }
00181 
00182 
00183 bool NonOverlapRegionPos::findRight(int32_t pos)
00184 {
00185     // Keep looping until the end or until the position is found.
00186     while(myRegionIter != myRegions.end())
00187     {
00188         // Check to see if we have passed the position.
00189         if(pos < myRegionIter->first)
00190         {
00191             // stop here, position comes before this region,
00192             // so myRegionIter is pointing to just after it,
00193             // but was not found in the region.
00194             return(false);
00195         }
00196         else if(pos < myRegionIter->second)
00197         {
00198             // the position is in the region, so return true.
00199             return(true);
00200         }
00201         else
00202         {
00203             // The position is after this region, so increment.
00204             ++myRegionIter;
00205         }
00206     }
00207     // exited because we are at the end of the regions and the position was
00208     // not found.
00209     return(false);
00210 }
00211 
00212 
00213 bool NonOverlapRegionPos::findLeft(int32_t pos)
00214 {
00215     if(myRegionIter == myRegions.end())
00216     {
00217         if(myRegionIter == myRegions.begin())
00218         {
00219             // There is nothing in this list, so just return.
00220             return(false);
00221         }
00222         // Move 1 lower than the end.
00223         --myRegionIter;
00224     }
00225 
00226     while(myRegionIter->first > pos)
00227     {
00228         // This region is before our position, so move to the previous region
00229         // unless this is the first region in the list.
00230         if(myRegionIter == myRegions.begin())
00231         {
00232             // Did not find the position and the iter is at the element
00233             // just after the position.
00234             return(false);
00235         }
00236         // Not yet to the beginning of the list, so decrement.
00237         --myRegionIter;
00238     }
00239 
00240     // At this point, the regions iter points to a region that starts
00241     // before the position.
00242     // Determine if the position is in the region by checking if it is 
00243     // less than the end of the region.
00244     if(pos < myRegionIter->second)
00245     {
00246         // in the region.
00247         return(true);
00248     }
00249 
00250     // This region ends before this position.  The iterator needs to point
00251     // to the region after the position, so increment it.
00252     ++myRegionIter;
00253     return(false);
00254 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends