libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2011 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 ////////////////////////////////////////////////////////////////////////// 00019 00020 #include "NonOverlapRegions.h" 00021 #include <iostream> 00022 00023 NonOverlapRegions::NonOverlapRegions() 00024 : myRegions() 00025 { 00026 } 00027 00028 00029 NonOverlapRegions::~NonOverlapRegions() 00030 { 00031 myRegions.clear(); 00032 } 00033 00034 00035 void NonOverlapRegions::add(const char* chrom, int32_t start, int32_t end) 00036 { 00037 // Add the region. 00038 myRegions[chrom].add(start, end); 00039 } 00040 00041 00042 bool NonOverlapRegions::inRegion(const char* chrom, int32_t pos) 00043 { 00044 // Return whether or not the position was found within a region. 00045 // Note, this will create a NonOverlapRegion for this chrom if it 00046 // did not already exist, but it won't have any regions. 00047 return(myRegions[chrom].inRegion(pos)); 00048 } 00049 00050 00051 NonOverlapRegionPos::NonOverlapRegionPos() 00052 : myRegions() 00053 { 00054 myRegionIter = myRegions.begin(); 00055 myTmpIter = myRegions.begin(); 00056 } 00057 00058 00059 NonOverlapRegionPos::NonOverlapRegionPos(const NonOverlapRegionPos& reg) 00060 : myRegions() 00061 { 00062 myRegionIter = myRegions.begin(); 00063 myTmpIter = myRegions.begin(); 00064 } 00065 00066 00067 NonOverlapRegionPos::~NonOverlapRegionPos() 00068 { 00069 myRegionIter = myRegions.begin(); 00070 myTmpIter = myRegions.begin(); 00071 myRegions.clear(); 00072 } 00073 00074 00075 void NonOverlapRegionPos::add(int32_t start, int32_t end) 00076 { 00077 // Check to see if the start/end are valid in relation. 00078 if(start >= end) 00079 { 00080 std::cerr << "NonOverlapRegionPos::add: Invalid Range, " 00081 << "start must be < end, but " << start << " >= " << end 00082 << std::endl; 00083 return; 00084 } 00085 00086 bool added = false; 00087 // Locate the correct position in the region list for this start/end. 00088 if(inRegion(start)) 00089 { 00090 // Check if the region end needs to be updated. 00091 if(end > myRegionIter->second) 00092 { 00093 myRegionIter->second = end; 00094 } 00095 added = true; 00096 } 00097 else 00098 { 00099 // Check to see if we are at the end. 00100 if(myRegionIter != myRegions.end()) 00101 { 00102 // Not at the end. 00103 // Check to see if the region overlaps the current region. 00104 if(end >= myRegionIter->first) 00105 { 00106 // Overlaps, so update the start. 00107 myRegionIter->first = start; 00108 // Check if the end needs to be updated. 00109 if(myRegionIter->second < end) 00110 { 00111 myRegionIter->second = end; 00112 } 00113 added = true; 00114 } 00115 } 00116 } 00117 00118 // If we already added the record, check to see if the end of the 00119 // new region overlaps any additional regions (know that myRegionIter is 00120 // not at the end. 00121 if(added) 00122 { 00123 // Check to see if any other regions were overlapped by this record. 00124 myTmpIter = myRegionIter; 00125 ++myTmpIter; 00126 while(myTmpIter != myRegions.end()) 00127 { 00128 // If the region starts before the end of this one, consume it. 00129 if(myTmpIter->first <= end) 00130 { 00131 if(myTmpIter->second > myRegionIter->second) 00132 { 00133 // Update this region with the new end. 00134 myRegionIter->second = myTmpIter->second; 00135 } 00136 00137 myTmpIter = myRegions.erase(myTmpIter); 00138 } 00139 else 00140 { 00141 // This region is not overlapped by the new region, so stop. 00142 break; 00143 } 00144 } 00145 } 00146 else 00147 { 00148 // Add the region. 00149 myRegionIter = myRegions.insert(myRegionIter, 00150 std::make_pair(start, end)); 00151 } 00152 } 00153 00154 00155 bool NonOverlapRegionPos::inRegion(int32_t pos) 00156 { 00157 // Return whether or not the position was found within a region. 00158 // If it is found within the region, myRegionIter will point to the region 00159 // otherwise myRegionIter will point to the region after the position 00160 // or to the end if the position is after the last region. 00161 00162 // Determine if it needs to search to the left 00163 // a) it is at the end 00164 // b) the region starts after the position. 00165 if(myRegionIter == myRegions.end()) 00166 { 00167 // If the iterator is at the end, search to the left. 00168 return(findLeft(pos)); 00169 } 00170 else if(pos < myRegionIter->first) 00171 { 00172 // Not at the end, so search left if the position is less 00173 // than this region's start. 00174 return(findLeft(pos)); 00175 } 00176 else 00177 { 00178 return(findRight(pos)); 00179 } 00180 } 00181 00182 00183 bool NonOverlapRegionPos::findRight(int32_t pos) 00184 { 00185 // Keep looping until the end or until the position is found. 00186 while(myRegionIter != myRegions.end()) 00187 { 00188 // Check to see if we have passed the position. 00189 if(pos < myRegionIter->first) 00190 { 00191 // stop here, position comes before this region, 00192 // so myRegionIter is pointing to just after it, 00193 // but was not found in the region. 00194 return(false); 00195 } 00196 else if(pos < myRegionIter->second) 00197 { 00198 // the position is in the region, so return true. 00199 return(true); 00200 } 00201 else 00202 { 00203 // The position is after this region, so increment. 00204 ++myRegionIter; 00205 } 00206 } 00207 // exited because we are at the end of the regions and the position was 00208 // not found. 00209 return(false); 00210 } 00211 00212 00213 bool NonOverlapRegionPos::findLeft(int32_t pos) 00214 { 00215 if(myRegionIter == myRegions.end()) 00216 { 00217 if(myRegionIter == myRegions.begin()) 00218 { 00219 // There is nothing in this list, so just return. 00220 return(false); 00221 } 00222 // Move 1 lower than the end. 00223 --myRegionIter; 00224 } 00225 00226 while(myRegionIter->first > pos) 00227 { 00228 // This region is before our position, so move to the previous region 00229 // unless this is the first region in the list. 00230 if(myRegionIter == myRegions.begin()) 00231 { 00232 // Did not find the position and the iter is at the element 00233 // just after the position. 00234 return(false); 00235 } 00236 // Not yet to the beginning of the list, so decrement. 00237 --myRegionIter; 00238 } 00239 00240 // At this point, the regions iter points to a region that starts 00241 // before the position. 00242 // Determine if the position is in the region by checking if it is 00243 // less than the end of the region. 00244 if(pos < myRegionIter->second) 00245 { 00246 // in the region. 00247 return(true); 00248 } 00249 00250 // This region ends before this position. The iterator needs to point 00251 // to the region after the position, so increment it. 00252 ++myRegionIter; 00253 return(false); 00254 }