libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include "SamInterface.h" 00019 #include "SamRecordHelper.h" 00020 00021 #include <limits> 00022 #include <stdint.h> 00023 00024 SamInterface::SamInterface() 00025 { 00026 } 00027 00028 00029 SamInterface::~SamInterface() 00030 { 00031 } 00032 00033 00034 // Read a SAM file's header. 00035 bool SamInterface::readHeader(IFILE filePtr, SamFileHeader& header, 00036 SamStatus& status) 00037 { 00038 if(filePtr == NULL) 00039 { 00040 // File is not open. 00041 status.setStatus(SamStatus::FAIL_ORDER, 00042 "Cannot read header since the file pointer is null"); 00043 return(false); 00044 } 00045 00046 // Clear the passed in header. 00047 header.resetHeader(); 00048 00049 int numValid = 0; 00050 int numInvalid = 0; 00051 std::string errorMessages = ""; 00052 00053 do { 00054 StringIntHash tags; 00055 StringArray values; 00056 buffer.ReadLine(filePtr); 00057 00058 // Stop reading header lines if at the end of the file or 00059 // if the line is not blank and does not start with an @. 00060 if ( ifeof(filePtr) || 00061 ((buffer.Length() != 0) && (buffer[0] != '@')) ) 00062 { 00063 break; 00064 } 00065 00066 // This is a header line, so add it to header. 00067 if(header.addHeaderLine(buffer.c_str())) 00068 { 00069 if(buffer.Length() != 0) 00070 { 00071 ++numValid; 00072 } 00073 } 00074 else 00075 { 00076 ++numInvalid; 00077 // Failed reading the header. 00078 errorMessages += header.getErrorMessage(); 00079 // Skip further processing on this line since it was an error. 00080 continue; 00081 } 00082 } while (1); 00083 00084 // Store the first record since it was read. 00085 myFirstRecord = buffer; 00086 00087 if(numInvalid > 0) 00088 { 00089 if(numValid == 0) 00090 { 00091 std::cerr << "Failed to parse " << numInvalid << " header lines"; 00092 std::cerr << ". No valid header lines.\n"; 00093 status.setStatus(SamStatus::FAIL_PARSE, errorMessages.c_str()); 00094 return(false); 00095 } 00096 } 00097 00098 // Successfully read. 00099 return(true); 00100 } 00101 00102 bool SamInterface::writeHeader(IFILE filePtr, SamFileHeader& header, 00103 SamStatus& status) 00104 { 00105 if((filePtr == NULL) || (filePtr->isOpen() == false)) 00106 { 00107 // File is not open, return failure. 00108 status.setStatus(SamStatus::FAIL_ORDER, 00109 "Cannot write header since the file pointer is null"); 00110 return(false); 00111 } 00112 00113 //////////////////////////////// 00114 // Write the header to the file. 00115 //////////////////////////////// 00116 // Construct a string containing the entire header. 00117 std::string headerString = ""; 00118 header.getHeaderString(headerString); 00119 00120 int32_t headerLen = headerString.length(); 00121 int numWrite = 0; 00122 00123 // Write the header to the file. 00124 numWrite = ifwrite(filePtr, headerString.c_str(), headerLen); 00125 if(numWrite != headerLen) 00126 { 00127 status.setStatus(SamStatus::FAIL_IO, 00128 "Failed to write the SAM header."); 00129 return(false); 00130 } 00131 return(true); 00132 } 00133 00134 00135 void SamInterface::readRecord(IFILE filePtr, SamFileHeader& header, 00136 SamRecord& record, 00137 SamStatus& samStatus) 00138 { 00139 // Initialize the status to success - will be set to false on failure. 00140 samStatus = SamStatus::SUCCESS; 00141 00142 if((filePtr == NULL) || (filePtr->isOpen() == false)) 00143 { 00144 // File is not open. 00145 samStatus.addError(SamStatus::FAIL_ORDER, 00146 "filePtr does not point to an open file."); 00147 return; 00148 } 00149 00150 // If the first record has been set, use that and clear it, 00151 // otherwise read the record from the file. 00152 if(myFirstRecord.Length() != 0) 00153 { 00154 buffer = myFirstRecord; 00155 myFirstRecord.Clear(); 00156 } 00157 else 00158 { 00159 // Read the next record. 00160 buffer.Clear(); 00161 buffer.ReadLine(filePtr); 00162 // If the end of the file and nothing was read, return false. 00163 if ((ifeof(filePtr)) && (buffer.Length() == 0)) 00164 { 00165 // end of the file and nothing to process. 00166 samStatus.addError(SamStatus::NO_MORE_RECS, 00167 "No more records in the file."); 00168 return; 00169 } 00170 } 00171 00172 tokens.ReplaceColumns(buffer, '\t'); 00173 00174 00175 // Error string for reporting a parsing failure. 00176 String errorString = ""; 00177 00178 if (tokens.Length() < 11) 00179 { 00180 errorString = "Too few columns ("; 00181 errorString += tokens.Length(); 00182 errorString += ") in the Record, expected at least 11."; 00183 samStatus.addError(SamStatus::FAIL_PARSE, 00184 errorString.c_str()); 00185 return; 00186 } 00187 00188 // Reset the record before setting any fields. 00189 record.resetRecord(); 00190 00191 if(!record.setReadName(tokens[0])) 00192 { 00193 samStatus.addError(record.getStatus()); 00194 } 00195 00196 long flagInt = 0; 00197 if(!tokens[1].AsInteger(flagInt)) 00198 { 00199 errorString = "flag, "; 00200 errorString += tokens[1].c_str(); 00201 errorString += ", is not an integer."; 00202 samStatus.addError(SamStatus::FAIL_PARSE, 00203 errorString.c_str()); 00204 } 00205 else if((flagInt < 0) || (flagInt > UINT16_MAX)) 00206 { 00207 errorString = "flag, "; 00208 errorString += tokens[1].c_str(); 00209 errorString += ", is not between 0 and (2^16)-1 = 65535."; 00210 samStatus.addError(SamStatus::FAIL_PARSE, 00211 errorString.c_str()); 00212 } 00213 else if(!record.setFlag(flagInt)) 00214 { 00215 samStatus.addError(record.getStatus().getStatus(), 00216 record.getStatus().getStatusMessage()); 00217 } 00218 00219 if(!record.setReferenceName(header, tokens[2])) 00220 { 00221 samStatus.addError(record.getStatus().getStatus(), 00222 record.getStatus().getStatusMessage()); 00223 } 00224 00225 long posInt = 0; 00226 if(!tokens[3].AsInteger(posInt)) 00227 { 00228 errorString = "position, "; 00229 errorString += tokens[3].c_str(); 00230 errorString += ", is not an integer."; 00231 samStatus.addError(SamStatus::FAIL_PARSE, 00232 errorString.c_str()); 00233 } 00234 else if((posInt < INT32_MIN) || (posInt > INT32_MAX)) 00235 { 00236 // If it is not in this range, it cannot fit into a 32 bit int. 00237 errorString = "position, "; 00238 errorString += tokens[3].c_str(); 00239 errorString += ", does not fit in a 32 bit signed int."; 00240 samStatus.addError(SamStatus::FAIL_PARSE, 00241 errorString.c_str()); 00242 } 00243 else if(!record.set1BasedPosition(posInt)) 00244 { 00245 samStatus.addError(record.getStatus().getStatus(), 00246 record.getStatus().getStatusMessage()); 00247 } 00248 00249 long mapInt = 0; 00250 if(!tokens[4].AsInteger(mapInt)) 00251 { 00252 errorString = "map quality, "; 00253 errorString += tokens[4].c_str(); 00254 errorString += ", is not an integer."; 00255 samStatus.addError(SamStatus::FAIL_PARSE, 00256 errorString.c_str()); 00257 } 00258 else if((mapInt < 0) || (mapInt > UINT8_MAX)) 00259 { 00260 errorString = "map quality, "; 00261 errorString += tokens[4].c_str(); 00262 errorString += ", is not between 0 and (2^8)-1 = 255."; 00263 samStatus.addError(SamStatus::FAIL_PARSE, 00264 errorString.c_str()); 00265 } 00266 else if(!record.setMapQuality(mapInt)) 00267 { 00268 samStatus.addError(record.getStatus().getStatus(), 00269 record.getStatus().getStatusMessage()); 00270 } 00271 00272 if(!record.setCigar(tokens[5])) 00273 { 00274 samStatus.addError(record.getStatus().getStatus(), 00275 record.getStatus().getStatusMessage()); 00276 } 00277 00278 if(!record.setMateReferenceName(header, tokens[6])) 00279 { 00280 samStatus.addError(record.getStatus().getStatus(), 00281 record.getStatus().getStatusMessage()); 00282 } 00283 00284 long matePosInt = 0; 00285 if(!tokens[7].AsInteger(matePosInt)) 00286 { 00287 errorString = "mate position, "; 00288 errorString += tokens[7].c_str(); 00289 errorString += ", is not an integer."; 00290 samStatus.addError(SamStatus::FAIL_PARSE, 00291 errorString.c_str()); 00292 } 00293 else if(!record.set1BasedMatePosition(matePosInt)) 00294 { 00295 samStatus.addError(record.getStatus().getStatus(), 00296 record.getStatus().getStatusMessage()); 00297 } 00298 00299 long insertInt = 0; 00300 if(!tokens[8].AsInteger(insertInt)) 00301 { 00302 errorString = "insert size, "; 00303 errorString += tokens[8].c_str(); 00304 errorString += ", is not an integer."; 00305 samStatus.addError(SamStatus::FAIL_PARSE, 00306 errorString.c_str()); 00307 } 00308 else if(!record.setInsertSize(insertInt)) 00309 { 00310 samStatus.addError(record.getStatus().getStatus(), 00311 record.getStatus().getStatusMessage()); 00312 } 00313 00314 if(!record.setSequence(tokens[9])) 00315 { 00316 samStatus.addError(record.getStatus().getStatus(), 00317 record.getStatus().getStatusMessage()); 00318 } 00319 00320 if(!record.setQuality(tokens[10])) 00321 { 00322 samStatus.addError(record.getStatus().getStatus(), 00323 record.getStatus().getStatusMessage()); 00324 } 00325 00326 // Clear the tag fields. 00327 record.clearTags(); 00328 00329 // Add the tags to the record. 00330 for (int i = 11; i < tokens.Length(); i++) 00331 { 00332 String & nugget = tokens[i]; 00333 00334 if (nugget.Length() < 6 || nugget[2] != ':' || nugget[4] != ':') 00335 { 00336 // invalid tag format. 00337 errorString = "Invalid Tag Format: "; 00338 errorString += nugget.c_str(); 00339 errorString += ", should be cc:c:x*."; 00340 samStatus.addError(SamStatus::FAIL_PARSE, 00341 errorString.c_str()); 00342 continue; 00343 } 00344 00345 // Valid tag format. 00346 // Add the tag. 00347 if(!record.addTag((const char *)nugget, nugget[3], 00348 (const char *)nugget + 5)) 00349 { 00350 samStatus.addError(record.getStatus().getStatus(), 00351 record.getStatus().getStatusMessage()); 00352 } 00353 } 00354 00355 return; 00356 } 00357 00358 00359 SamStatus::Status SamInterface::writeRecord(IFILE filePtr, 00360 SamFileHeader& header, 00361 SamRecord& record, 00362 SamRecord::SequenceTranslation translation) 00363 { 00364 // Store all the fields into a string, then write the string. 00365 String recordString = record.getReadName(); 00366 recordString += "\t"; 00367 recordString += record.getFlag(); 00368 recordString += "\t"; 00369 recordString += record.getReferenceName(); 00370 recordString += "\t"; 00371 recordString += record.get1BasedPosition(); 00372 recordString += "\t"; 00373 recordString += record.getMapQuality(); 00374 recordString += "\t"; 00375 recordString += record.getCigar(); 00376 recordString += "\t"; 00377 recordString += record.getMateReferenceNameOrEqual(); 00378 recordString += "\t"; 00379 recordString += record.get1BasedMatePosition(); 00380 recordString += "\t"; 00381 recordString += record.getInsertSize(); 00382 recordString += "\t"; 00383 recordString += record.getSequence(translation); 00384 recordString += "\t"; 00385 recordString += record.getQuality(); 00386 00387 // If there are any tags, add a preceding tab. 00388 if(record.getTagLength() != 0) 00389 { 00390 recordString += "\t"; 00391 SamRecordHelper::genSamTagsString(record, recordString); 00392 } 00393 00394 recordString += "\n"; 00395 00396 00397 // Write the record. 00398 ifwrite(filePtr, recordString.c_str(), recordString.Length()); 00399 return(SamStatus::SUCCESS); 00400 } 00401 00402 00403 void SamInterface::ParseHeaderLine(StringIntHash & tags, StringArray & values) 00404 { 00405 tags.Clear(); 00406 values.Clear(); 00407 00408 tokens.AddColumns(buffer, '\t'); 00409 00410 for (int i = 1; i < tokens.Length(); i++) 00411 { 00412 tags.Add(tokens[i].Left(2), i - 1); 00413 values.Push(tokens[i].SubStr(3)); 00414 } 00415 } 00416