libStatGen Software  1
SamInterface.cpp
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "SamInterface.h"
00019 #include "SamRecordHelper.h"
00020 
00021 #include <limits>
00022 #include <stdint.h>
00023 
00024 SamInterface::SamInterface()
00025 {
00026 }
00027 
00028 
00029 SamInterface::~SamInterface()
00030 {
00031 }
00032 
00033 
00034 // Read a SAM file's header.
00035 bool SamInterface::readHeader(IFILE filePtr, SamFileHeader& header,
00036                               SamStatus& status)
00037 {
00038     if(filePtr == NULL)
00039     {
00040         // File is not open.
00041         status.setStatus(SamStatus::FAIL_ORDER, 
00042                            "Cannot read header since the file pointer is null");
00043         return(false);
00044     }
00045 
00046     // Clear the passed in header.
00047     header.resetHeader();
00048 
00049     int numValid = 0;
00050     int numInvalid = 0;
00051     std::string errorMessages = "";
00052 
00053     do {
00054         StringIntHash tags;
00055         StringArray   values;
00056         buffer.ReadLine(filePtr);
00057       
00058         // Stop reading header lines if at the end of the file or
00059         // if the line is not blank and does not start with an @.
00060         if ( ifeof(filePtr) || 
00061              ((buffer.Length() != 0) && (buffer[0] != '@')) )
00062         {
00063             break;
00064         }
00065       
00066         // This is a header line, so add it to header.
00067         if(header.addHeaderLine(buffer.c_str()))
00068         {
00069             if(buffer.Length() != 0)
00070             {
00071                 ++numValid;
00072             }
00073         }
00074         else
00075         {
00076             ++numInvalid;
00077             // Failed reading the header.
00078             errorMessages += header.getErrorMessage();
00079             // Skip further processing on this line since it was an error.
00080             continue;
00081         }
00082     } while (1);
00083    
00084     // Store the first record since it was read.
00085     myFirstRecord = buffer;
00086 
00087     if(numInvalid > 0)
00088     {
00089         if(numValid == 0)
00090         {
00091             std::cerr << "Failed to parse " << numInvalid << " header lines";
00092             std::cerr << ".  No valid header lines.\n";
00093             status.setStatus(SamStatus::FAIL_PARSE, errorMessages.c_str());
00094             return(false);
00095         }
00096     }
00097  
00098     // Successfully read.
00099     return(true);
00100 }
00101 
00102 bool SamInterface::writeHeader(IFILE filePtr, SamFileHeader& header,
00103                                SamStatus& status)
00104 {
00105     if((filePtr == NULL) || (filePtr->isOpen() == false))
00106     {
00107         // File is not open, return failure.
00108         status.setStatus(SamStatus::FAIL_ORDER, 
00109                          "Cannot write header since the file pointer is null");
00110         return(false);
00111     }
00112 
00113     ////////////////////////////////
00114     // Write the header to the file.
00115     ////////////////////////////////
00116     // Construct a string containing the entire header.
00117     std::string headerString = "";
00118     header.getHeaderString(headerString);
00119     
00120     int32_t headerLen = headerString.length();
00121     int numWrite = 0;
00122     
00123     // Write the header to the file.
00124     numWrite = ifwrite(filePtr, headerString.c_str(), headerLen);
00125     if(numWrite != headerLen)
00126     {
00127         status.setStatus(SamStatus::FAIL_IO, 
00128                          "Failed to write the SAM header.");
00129         return(false);
00130     }
00131     return(true);
00132 }
00133 
00134 
00135 void SamInterface::readRecord(IFILE filePtr, SamFileHeader& header,
00136                               SamRecord& record, 
00137                               SamStatus& samStatus)
00138 {
00139     // Initialize the status to success - will be set to false on failure.
00140     samStatus = SamStatus::SUCCESS;
00141 
00142     if((filePtr == NULL) || (filePtr->isOpen() == false))
00143     {
00144         // File is not open.
00145         samStatus.addError(SamStatus::FAIL_ORDER, 
00146                            "filePtr does not point to an open file.");
00147         return;
00148     }
00149     
00150     // If the first record has been set, use that and clear it,
00151     // otherwise read the record from the file.
00152     if(myFirstRecord.Length() != 0)
00153     {
00154         buffer = myFirstRecord;
00155         myFirstRecord.Clear();
00156     }
00157     else
00158     {
00159         // Read the next record.
00160         buffer.Clear();
00161         buffer.ReadLine(filePtr);
00162         // If the end of the file and nothing was read, return false.
00163         if ((ifeof(filePtr)) && (buffer.Length() == 0))
00164         {
00165             // end of the file and nothing to process.
00166             samStatus.addError(SamStatus::NO_MORE_RECS, 
00167                                "No more records in the file.");
00168             return;
00169         }
00170     }
00171     
00172     tokens.ReplaceColumns(buffer, '\t');
00173     
00174     
00175     // Error string for reporting a parsing failure.
00176     String errorString = "";
00177     
00178     if (tokens.Length() < 11)
00179     {
00180         errorString = "Too few columns (";
00181         errorString += tokens.Length();
00182         errorString += ") in the Record, expected at least 11.";
00183         samStatus.addError(SamStatus::FAIL_PARSE,
00184                            errorString.c_str());
00185         return;
00186     }
00187         
00188     // Reset the record before setting any fields.
00189     record.resetRecord();
00190 
00191     if(!record.setReadName(tokens[0]))
00192     {
00193         samStatus.addError(record.getStatus());
00194     }
00195     
00196     long flagInt = 0;
00197     if(!tokens[1].AsInteger(flagInt))
00198     {
00199         errorString = "flag, ";
00200         errorString += tokens[1].c_str();
00201         errorString += ", is not an integer.";
00202         samStatus.addError(SamStatus::FAIL_PARSE,
00203                            errorString.c_str());
00204     }
00205     else if((flagInt < 0) || (flagInt > UINT16_MAX))
00206     {
00207         errorString = "flag, ";
00208         errorString += tokens[1].c_str();
00209         errorString += ", is not between 0 and (2^16)-1 = 65535.";
00210         samStatus.addError(SamStatus::FAIL_PARSE,
00211                            errorString.c_str());
00212     }
00213     else if(!record.setFlag(flagInt))
00214     {
00215         samStatus.addError(record.getStatus().getStatus(),
00216                            record.getStatus().getStatusMessage());
00217     }
00218 
00219     if(!record.setReferenceName(header, tokens[2]))
00220     {
00221         samStatus.addError(record.getStatus().getStatus(),
00222                            record.getStatus().getStatusMessage());
00223     }
00224 
00225     long posInt = 0;
00226     if(!tokens[3].AsInteger(posInt))
00227     {
00228         errorString = "position, ";
00229         errorString += tokens[3].c_str();
00230         errorString += ", is not an integer.";
00231         samStatus.addError(SamStatus::FAIL_PARSE,
00232                            errorString.c_str());
00233     }
00234     else if((posInt < INT32_MIN) || (posInt > INT32_MAX))
00235     {
00236         // If it is not in this range, it cannot fit into a 32 bit int.
00237         errorString = "position, ";
00238         errorString += tokens[3].c_str();
00239         errorString += ", does not fit in a 32 bit signed int.";
00240         samStatus.addError(SamStatus::FAIL_PARSE,
00241                            errorString.c_str());
00242     }
00243     else if(!record.set1BasedPosition(posInt))
00244     {
00245         samStatus.addError(record.getStatus().getStatus(),
00246                            record.getStatus().getStatusMessage());
00247     }
00248 
00249     long mapInt = 0;
00250     if(!tokens[4].AsInteger(mapInt))
00251     {
00252         errorString = "map quality, ";
00253         errorString += tokens[4].c_str();
00254         errorString += ", is not an integer.";
00255         samStatus.addError(SamStatus::FAIL_PARSE,
00256                            errorString.c_str());
00257     }
00258     else if((mapInt < 0) || (mapInt > UINT8_MAX))
00259     {
00260         errorString = "map quality, ";
00261         errorString += tokens[4].c_str();
00262         errorString += ", is not between 0 and (2^8)-1 = 255.";
00263         samStatus.addError(SamStatus::FAIL_PARSE,
00264                            errorString.c_str());
00265     }
00266     else if(!record.setMapQuality(mapInt))
00267     {
00268         samStatus.addError(record.getStatus().getStatus(),
00269                            record.getStatus().getStatusMessage());
00270     }
00271 
00272     if(!record.setCigar(tokens[5]))
00273     {
00274         samStatus.addError(record.getStatus().getStatus(),
00275                            record.getStatus().getStatusMessage());
00276     }
00277 
00278     if(!record.setMateReferenceName(header, tokens[6]))
00279     {
00280         samStatus.addError(record.getStatus().getStatus(),
00281                            record.getStatus().getStatusMessage());
00282     }
00283 
00284     long matePosInt = 0;
00285     if(!tokens[7].AsInteger(matePosInt))
00286     {
00287         errorString = "mate position, ";
00288         errorString += tokens[7].c_str();
00289         errorString += ", is not an integer.";
00290         samStatus.addError(SamStatus::FAIL_PARSE,
00291                            errorString.c_str());
00292     }
00293     else if(!record.set1BasedMatePosition(matePosInt))
00294     {
00295         samStatus.addError(record.getStatus().getStatus(),
00296                            record.getStatus().getStatusMessage());
00297     }
00298 
00299     long insertInt = 0;
00300     if(!tokens[8].AsInteger(insertInt))
00301     {
00302         errorString = "insert size, ";
00303         errorString += tokens[8].c_str();
00304         errorString += ", is not an integer.";
00305         samStatus.addError(SamStatus::FAIL_PARSE,
00306                            errorString.c_str());
00307     }
00308     else if(!record.setInsertSize(insertInt))
00309     {
00310         samStatus.addError(record.getStatus().getStatus(),
00311                            record.getStatus().getStatusMessage());
00312     }
00313 
00314     if(!record.setSequence(tokens[9]))
00315     {
00316         samStatus.addError(record.getStatus().getStatus(),
00317                            record.getStatus().getStatusMessage());
00318     }
00319 
00320     if(!record.setQuality(tokens[10]))
00321     {
00322         samStatus.addError(record.getStatus().getStatus(),
00323                            record.getStatus().getStatusMessage());
00324     }
00325     
00326     // Clear the tag fields.
00327     record.clearTags();
00328     
00329     // Add the tags to the record.
00330     for (int i = 11; i < tokens.Length(); i++)
00331     {
00332         String & nugget = tokens[i];
00333         
00334         if (nugget.Length() < 6 || nugget[2] != ':' || nugget[4] != ':')
00335         {
00336             // invalid tag format.
00337             errorString = "Invalid Tag Format: ";
00338             errorString += nugget.c_str();
00339             errorString += ", should be cc:c:x*.";
00340             samStatus.addError(SamStatus::FAIL_PARSE,
00341                                errorString.c_str());
00342             continue;
00343         }
00344         
00345         // Valid tag format.
00346         // Add the tag.
00347         if(!record.addTag((const char *)nugget, nugget[3],
00348                           (const char *)nugget + 5))
00349         {
00350             samStatus.addError(record.getStatus().getStatus(),
00351                                record.getStatus().getStatusMessage());
00352         }
00353     }
00354 
00355     return;
00356 }
00357 
00358 
00359 SamStatus::Status SamInterface::writeRecord(IFILE filePtr,
00360                                             SamFileHeader& header, 
00361                                             SamRecord& record,
00362                                             SamRecord::SequenceTranslation translation)
00363 {
00364     // Store all the fields into a string, then write the string.
00365     String recordString = record.getReadName();
00366     recordString += "\t";
00367     recordString += record.getFlag();
00368     recordString += "\t";
00369     recordString += record.getReferenceName();
00370     recordString += "\t";
00371     recordString += record.get1BasedPosition();
00372     recordString += "\t";
00373     recordString += record.getMapQuality();
00374     recordString += "\t";
00375     recordString += record.getCigar();
00376     recordString += "\t";
00377     recordString += record.getMateReferenceNameOrEqual();
00378     recordString += "\t";
00379     recordString += record.get1BasedMatePosition();
00380     recordString += "\t";
00381     recordString += record.getInsertSize();
00382     recordString += "\t";
00383     recordString += record.getSequence(translation);
00384     recordString += "\t";
00385     recordString += record.getQuality();
00386 
00387     // If there are any tags, add a preceding tab.
00388     if(record.getTagLength() != 0)
00389     {
00390         recordString += "\t";
00391         SamRecordHelper::genSamTagsString(record, recordString);
00392     }
00393 
00394     recordString += "\n";
00395    
00396    
00397     // Write the record.
00398     ifwrite(filePtr, recordString.c_str(), recordString.Length());
00399     return(SamStatus::SUCCESS);
00400 }
00401 
00402 
00403 void SamInterface::ParseHeaderLine(StringIntHash & tags, StringArray & values)
00404 {
00405     tags.Clear();
00406     values.Clear();
00407 
00408     tokens.AddColumns(buffer, '\t');
00409 
00410     for (int i = 1; i < tokens.Length(); i++)
00411     {
00412         tags.Add(tokens[i].Left(2), i - 1);
00413         values.Push(tokens[i].SubStr(3));
00414     }
00415 }
00416 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends