00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "SamInterface.h"
00019 #include "SamRecordHelper.h"
00020
00021 #include <limits>
00022 #include <stdint.h>
00023
00024 SamInterface::SamInterface()
00025 {
00026 }
00027
00028
00029 SamInterface::~SamInterface()
00030 {
00031 }
00032
00033
00034
00035 bool SamInterface::readHeader(IFILE filePtr, SamFileHeader& header,
00036 SamStatus& status)
00037 {
00038 if(filePtr == NULL)
00039 {
00040
00041 status.setStatus(SamStatus::FAIL_ORDER,
00042 "Cannot read header since the file pointer is null");
00043 return(false);
00044 }
00045
00046
00047 header.resetHeader();
00048
00049 int numValid = 0;
00050 int numInvalid = 0;
00051 std::string errorMessages = "";
00052
00053 do {
00054 StringIntHash tags;
00055 StringArray values;
00056 buffer.ReadLine(filePtr);
00057
00058
00059
00060 if ( ifeof(filePtr) ||
00061 ((buffer.Length() != 0) && (buffer[0] != '@')) )
00062 {
00063 break;
00064 }
00065
00066
00067 if(header.addHeaderLine(buffer.c_str()))
00068 {
00069 if(buffer.Length() != 0)
00070 {
00071 ++numValid;
00072 }
00073 }
00074 else
00075 {
00076 ++numInvalid;
00077
00078 errorMessages += header.getErrorMessage();
00079
00080 continue;
00081 }
00082 } while (1);
00083
00084
00085 myFirstRecord = buffer;
00086
00087 if(numInvalid > 0)
00088 {
00089 if(numValid == 0)
00090 {
00091 std::cerr << "Failed to parse " << numInvalid << " header lines";
00092 std::cerr << ". No valid header lines.\n";
00093 status.setStatus(SamStatus::FAIL_PARSE, errorMessages.c_str());
00094 return(false);
00095 }
00096 }
00097
00098
00099 return(true);
00100 }
00101
00102 bool SamInterface::writeHeader(IFILE filePtr, SamFileHeader& header,
00103 SamStatus& status)
00104 {
00105 if((filePtr == NULL) || (filePtr->isOpen() == false))
00106 {
00107
00108 status.setStatus(SamStatus::FAIL_ORDER,
00109 "Cannot write header since the file pointer is null");
00110 return(false);
00111 }
00112
00113
00114
00115
00116
00117 std::string headerString = "";
00118 header.getHeaderString(headerString);
00119
00120 int32_t headerLen = headerString.length();
00121 int numWrite = 0;
00122
00123
00124 numWrite = ifwrite(filePtr, headerString.c_str(), headerLen);
00125 if(numWrite != headerLen)
00126 {
00127 status.setStatus(SamStatus::FAIL_IO,
00128 "Failed to write the SAM header.");
00129 return(false);
00130 }
00131 return(true);
00132 }
00133
00134
00135 void SamInterface::readRecord(IFILE filePtr, SamFileHeader& header,
00136 SamRecord& record,
00137 SamStatus& samStatus)
00138 {
00139
00140 samStatus = SamStatus::SUCCESS;
00141
00142 if((filePtr == NULL) || (filePtr->isOpen() == false))
00143 {
00144
00145 samStatus.addError(SamStatus::FAIL_ORDER,
00146 "filePtr does not point to an open file.");
00147 return;
00148 }
00149
00150
00151
00152 if(myFirstRecord.Length() != 0)
00153 {
00154 buffer = myFirstRecord;
00155 myFirstRecord.Clear();
00156 }
00157 else
00158 {
00159
00160 buffer.Clear();
00161 buffer.ReadLine(filePtr);
00162
00163 if ((ifeof(filePtr)) && (buffer.Length() == 0))
00164 {
00165
00166 samStatus.addError(SamStatus::NO_MORE_RECS,
00167 "No more records in the file.");
00168 return;
00169 }
00170 }
00171
00172 tokens.ReplaceColumns(buffer, '\t');
00173
00174
00175
00176 String errorString = "";
00177
00178 if (tokens.Length() < 11)
00179 {
00180 errorString = "Too few columns (";
00181 errorString += tokens.Length();
00182 errorString += ") in the Record, expected at least 11.";
00183 samStatus.addError(SamStatus::FAIL_PARSE,
00184 errorString.c_str());
00185 return;
00186 }
00187
00188
00189 record.resetRecord();
00190
00191 if(!record.setReadName(tokens[0]))
00192 {
00193 samStatus.addError(record.getStatus());
00194 }
00195
00196 long flagInt = 0;
00197 if(!tokens[1].AsInteger(flagInt))
00198 {
00199 errorString = "flag, ";
00200 errorString += tokens[1].c_str();
00201 errorString += ", is not an integer.";
00202 samStatus.addError(SamStatus::FAIL_PARSE,
00203 errorString.c_str());
00204 }
00205 else if((flagInt < 0) || (flagInt > UINT16_MAX))
00206 {
00207 errorString = "flag, ";
00208 errorString += tokens[1].c_str();
00209 errorString += ", is not between 0 and (2^16)-1 = 65535.";
00210 samStatus.addError(SamStatus::FAIL_PARSE,
00211 errorString.c_str());
00212 }
00213 else if(!record.setFlag(flagInt))
00214 {
00215 samStatus.addError(record.getStatus().getStatus(),
00216 record.getStatus().getStatusMessage());
00217 }
00218
00219 if(!record.setReferenceName(header, tokens[2]))
00220 {
00221 samStatus.addError(record.getStatus().getStatus(),
00222 record.getStatus().getStatusMessage());
00223 }
00224
00225 long posInt = 0;
00226 if(!tokens[3].AsInteger(posInt))
00227 {
00228 errorString = "position, ";
00229 errorString += tokens[3].c_str();
00230 errorString += ", is not an integer.";
00231 samStatus.addError(SamStatus::FAIL_PARSE,
00232 errorString.c_str());
00233 }
00234 else if((posInt < INT32_MIN) || (posInt > INT32_MAX))
00235 {
00236
00237 errorString = "position, ";
00238 errorString += tokens[3].c_str();
00239 errorString += ", does not fit in a 32 bit signed int.";
00240 samStatus.addError(SamStatus::FAIL_PARSE,
00241 errorString.c_str());
00242 }
00243 else if(!record.set1BasedPosition(posInt))
00244 {
00245 samStatus.addError(record.getStatus().getStatus(),
00246 record.getStatus().getStatusMessage());
00247 }
00248
00249 long mapInt = 0;
00250 if(!tokens[4].AsInteger(mapInt))
00251 {
00252 errorString = "map quality, ";
00253 errorString += tokens[4].c_str();
00254 errorString += ", is not an integer.";
00255 samStatus.addError(SamStatus::FAIL_PARSE,
00256 errorString.c_str());
00257 }
00258 else if((mapInt < 0) || (mapInt > UINT8_MAX))
00259 {
00260 errorString = "map quality, ";
00261 errorString += tokens[4].c_str();
00262 errorString += ", is not between 0 and (2^8)-1 = 255.";
00263 samStatus.addError(SamStatus::FAIL_PARSE,
00264 errorString.c_str());
00265 }
00266 else if(!record.setMapQuality(mapInt))
00267 {
00268 samStatus.addError(record.getStatus().getStatus(),
00269 record.getStatus().getStatusMessage());
00270 }
00271
00272 if(!record.setCigar(tokens[5]))
00273 {
00274 samStatus.addError(record.getStatus().getStatus(),
00275 record.getStatus().getStatusMessage());
00276 }
00277
00278 if(!record.setMateReferenceName(header, tokens[6]))
00279 {
00280 samStatus.addError(record.getStatus().getStatus(),
00281 record.getStatus().getStatusMessage());
00282 }
00283
00284 long matePosInt = 0;
00285 if(!tokens[7].AsInteger(matePosInt))
00286 {
00287 errorString = "mate position, ";
00288 errorString += tokens[7].c_str();
00289 errorString += ", is not an integer.";
00290 samStatus.addError(SamStatus::FAIL_PARSE,
00291 errorString.c_str());
00292 }
00293 else if(!record.set1BasedMatePosition(matePosInt))
00294 {
00295 samStatus.addError(record.getStatus().getStatus(),
00296 record.getStatus().getStatusMessage());
00297 }
00298
00299 long insertInt = 0;
00300 if(!tokens[8].AsInteger(insertInt))
00301 {
00302 errorString = "insert size, ";
00303 errorString += tokens[8].c_str();
00304 errorString += ", is not an integer.";
00305 samStatus.addError(SamStatus::FAIL_PARSE,
00306 errorString.c_str());
00307 }
00308 else if(!record.setInsertSize(insertInt))
00309 {
00310 samStatus.addError(record.getStatus().getStatus(),
00311 record.getStatus().getStatusMessage());
00312 }
00313
00314 if(!record.setSequence(tokens[9]))
00315 {
00316 samStatus.addError(record.getStatus().getStatus(),
00317 record.getStatus().getStatusMessage());
00318 }
00319
00320 if(!record.setQuality(tokens[10]))
00321 {
00322 samStatus.addError(record.getStatus().getStatus(),
00323 record.getStatus().getStatusMessage());
00324 }
00325
00326
00327 record.clearTags();
00328
00329
00330 for (int i = 11; i < tokens.Length(); i++)
00331 {
00332 String & nugget = tokens[i];
00333
00334 if (nugget.Length() < 6 || nugget[2] != ':' || nugget[4] != ':')
00335 {
00336
00337 errorString = "Invalid Tag Format: ";
00338 errorString += nugget.c_str();
00339 errorString += ", should be cc:c:x*.";
00340 samStatus.addError(SamStatus::FAIL_PARSE,
00341 errorString.c_str());
00342 continue;
00343 }
00344
00345
00346
00347 if(!record.addTag((const char *)nugget, nugget[3],
00348 (const char *)nugget + 5))
00349 {
00350 samStatus.addError(record.getStatus().getStatus(),
00351 record.getStatus().getStatusMessage());
00352 }
00353 }
00354
00355 return;
00356 }
00357
00358
00359 SamStatus::Status SamInterface::writeRecord(IFILE filePtr,
00360 SamFileHeader& header,
00361 SamRecord& record,
00362 SamRecord::SequenceTranslation translation)
00363 {
00364
00365 String recordString = record.getReadName();
00366 recordString += "\t";
00367 recordString += record.getFlag();
00368 recordString += "\t";
00369 recordString += record.getReferenceName();
00370 recordString += "\t";
00371 recordString += record.get1BasedPosition();
00372 recordString += "\t";
00373 recordString += record.getMapQuality();
00374 recordString += "\t";
00375 recordString += record.getCigar();
00376 recordString += "\t";
00377 recordString += record.getMateReferenceNameOrEqual();
00378 recordString += "\t";
00379 recordString += record.get1BasedMatePosition();
00380 recordString += "\t";
00381 recordString += record.getInsertSize();
00382 recordString += "\t";
00383 recordString += record.getSequence(translation);
00384 recordString += "\t";
00385 recordString += record.getQuality();
00386
00387
00388 if(record.getTagLength() != 0)
00389 {
00390 recordString += "\t";
00391 SamRecordHelper::genSamTagsString(record, recordString);
00392 }
00393
00394 recordString += "\n";
00395
00396
00397
00398 ifwrite(filePtr, recordString.c_str(), recordString.Length());
00399 return(SamStatus::SUCCESS);
00400 }
00401
00402
00403 void SamInterface::ParseHeaderLine(StringIntHash & tags, StringArray & values)
00404 {
00405 tags.Clear();
00406 values.Clear();
00407
00408 tokens.AddColumns(buffer, '\t');
00409
00410 for (int i = 1; i < tokens.Length(); i++)
00411 {
00412 tags.Add(tokens[i].Left(2), i - 1);
00413 values.Push(tokens[i].SubStr(3));
00414 }
00415 }
00416