SamInterface.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "SamInterface.h"
00019 
00020 #include <limits>
00021 #include <stdint.h>
00022 
00023 SamInterface::SamInterface()
00024 {
00025 }
00026 
00027 
00028 SamInterface::~SamInterface()
00029 {
00030 }
00031 
00032 
00033 // Read a SAM file's header.
00034 SamStatus::Status SamInterface::readHeader(IFILE filePtr, SamFileHeader& header)
00035 {
00036     if(filePtr == NULL)
00037     {
00038         // File is not open.
00039         return(SamStatus::FAIL_ORDER);
00040     }
00041 
00042     // Clear the passed in header.
00043     header.resetHeader();
00044 
00045     do {
00046         StringIntHash tags;
00047         StringArray   values;
00048         buffer.ReadLine(filePtr);
00049       
00050         // Stop reading header lines if at the end of the file or
00051         // if the line is not blank and does not start with an @.
00052         if ( ifeof(filePtr) || 
00053              ((buffer.Length() != 0) && (buffer[0] != '@')) )
00054         {
00055             break;
00056         }
00057       
00058         // This is a header line, so add it to header.
00059         header.addHeaderLine(buffer.c_str());
00060 
00061         // Continue to the next line if this line is less than 3 characters
00062         // or is not an SQ line.
00063         if ((buffer.Length() < 3) || (buffer[1] != 'S') || (buffer[2] != 'Q'))
00064             continue;
00065       
00066         ParseHeaderLine(tags, values);
00067       
00068         int name = tags.Integer("SN");
00069         int length = tags.Integer("LN");
00070       
00071         if (name < 0 || length < 0) continue;
00072 
00073         header.addReferenceInfo(values[name], 
00074                                 values[length].AsInteger());
00075       
00076     } while (1);
00077    
00078     // Store the first record since it was read.
00079     myFirstRecord = buffer;
00080 
00081     // Successfully read.
00082     return(SamStatus::SUCCESS);
00083 }
00084 
00085 SamStatus::Status SamInterface::writeHeader(IFILE filePtr,
00086                                             SamFileHeader& header)
00087 {
00088     if((filePtr == NULL) || (filePtr->isOpen() == false))
00089     {
00090         // File is not open, return failure.
00091         return(SamStatus::FAIL_ORDER);
00092     }
00093 
00094     ////////////////////////////////
00095     // Write the header to the file.
00096     ////////////////////////////////
00097     // Construct a string containing the entire header.
00098     std::string headerString = "";
00099     header.getHeaderString(headerString);
00100     
00101     int32_t headerLen = headerString.length();
00102     int numWrite = 0;
00103     
00104     // Write the header to the file.
00105     numWrite = ifwrite(filePtr, headerString.c_str(), headerLen);
00106     if(numWrite != headerLen)
00107     {
00108         return(SamStatus::FAIL_IO);
00109     }
00110     return(SamStatus::SUCCESS);
00111 }
00112 
00113 
00114 void SamInterface::readRecord(IFILE filePtr, SamFileHeader& header,
00115                               SamRecord& record, 
00116                               SamStatus& samStatus)
00117 {
00118     // Initialize the status to success - will be set to false on failure.
00119     samStatus = SamStatus::SUCCESS;
00120 
00121     if((filePtr == NULL) || (filePtr->isOpen() == false))
00122     {
00123         // File is not open.
00124         samStatus.addError(SamStatus::FAIL_ORDER, 
00125                            "filePtr does not point to an open file.");
00126         return;
00127     }
00128     
00129     // If the first record has been set, use that and clear it,
00130     // otherwise read the record from the file.
00131     if(myFirstRecord.Length() != 0)
00132     {
00133         buffer = myFirstRecord;
00134         myFirstRecord.Clear();
00135     }
00136     else
00137     {
00138         // Read the next record.
00139         buffer.Clear();
00140         buffer.ReadLine(filePtr);
00141         // If the end of the file and nothing was read, return false.
00142         if ((ifeof(filePtr)) && (buffer.Length() == 0))
00143         {
00144             // end of the file and nothing to process.
00145             samStatus.addError(SamStatus::NO_MORE_RECS, 
00146                                "No more records in the file.");
00147             return;
00148         }
00149     }
00150     
00151     tokens.ReplaceColumns(buffer, '\t');
00152     
00153     
00154     // Error string for reporting a parsing failure.
00155     String errorString = "";
00156     
00157     if (tokens.Length() < 11)
00158     {
00159         errorString = "Too few columns (";
00160         errorString += tokens.Length();
00161         errorString += ") in the Record, expected at least 11.";
00162         samStatus.addError(SamStatus::FAIL_PARSE,
00163                            errorString.c_str());
00164         return;
00165     }
00166         
00167     // Reset the record before setting any fields.
00168     record.resetRecord();
00169 
00170     if(!record.setReadName(tokens[0]))
00171     {
00172         samStatus.addError(record.getStatus());
00173     }
00174     
00175     long flagInt = 0;
00176     if(!tokens[1].AsInteger(flagInt))
00177     {
00178         errorString = "flag, ";
00179         errorString += tokens[1].c_str();
00180         errorString += ", is not an integer.";
00181         samStatus.addError(SamStatus::FAIL_PARSE,
00182                            errorString.c_str());
00183     }
00184     else if((flagInt < 0) || (flagInt > UINT16_MAX))
00185     {
00186         errorString = "flag, ";
00187         errorString += tokens[1].c_str();
00188         errorString += ", is not between 0 and (2^16)-1 = 65535.";
00189         samStatus.addError(SamStatus::FAIL_PARSE,
00190                            errorString.c_str());
00191     }
00192     else if(!record.setFlag(flagInt))
00193     {
00194         samStatus.addError(record.getStatus().getStatus(),
00195                            record.getStatus().getStatusMessage());
00196     }
00197 
00198     if(!record.setReferenceName(header, tokens[2]))
00199     {
00200         samStatus.addError(record.getStatus().getStatus(),
00201                            record.getStatus().getStatusMessage());
00202     }
00203 
00204     long posInt = 0;
00205     if(!tokens[3].AsInteger(posInt))
00206     {
00207         errorString = "position, ";
00208         errorString += tokens[3].c_str();
00209         errorString += ", is not an integer.";
00210         samStatus.addError(SamStatus::FAIL_PARSE,
00211                            errorString.c_str());
00212     }
00213     else if((posInt < INT32_MIN) || (posInt > INT32_MAX))
00214     {
00215         // If it is not in this range, it cannot fit into a 32 bit int.
00216         errorString = "position, ";
00217         errorString += tokens[3].c_str();
00218         errorString += ", does not fit in a 32 bit signed int.";
00219         samStatus.addError(SamStatus::FAIL_PARSE,
00220                            errorString.c_str());
00221     }
00222     else if(!record.set1BasedPosition(posInt))
00223     {
00224         samStatus.addError(record.getStatus().getStatus(),
00225                            record.getStatus().getStatusMessage());
00226     }
00227 
00228     long mapInt = 0;
00229     if(!tokens[4].AsInteger(mapInt))
00230     {
00231         errorString = "map quality, ";
00232         errorString += tokens[4].c_str();
00233         errorString += ", is not an integer.";
00234         samStatus.addError(SamStatus::FAIL_PARSE,
00235                            errorString.c_str());
00236     }
00237     else if((mapInt < 0) || (mapInt > UINT8_MAX))
00238     {
00239         errorString = "map quality, ";
00240         errorString += tokens[4].c_str();
00241         errorString += ", is not between 0 and (2^8)-1 = 255.";
00242         samStatus.addError(SamStatus::FAIL_PARSE,
00243                            errorString.c_str());
00244     }
00245     else if(!record.setMapQuality(mapInt))
00246     {
00247         samStatus.addError(record.getStatus().getStatus(),
00248                            record.getStatus().getStatusMessage());
00249     }
00250 
00251     if(!record.setCigar(tokens[5]))
00252     {
00253         samStatus.addError(record.getStatus().getStatus(),
00254                            record.getStatus().getStatusMessage());
00255     }
00256 
00257     if(!record.setMateReferenceName(header, tokens[6]))
00258     {
00259         samStatus.addError(record.getStatus().getStatus(),
00260                            record.getStatus().getStatusMessage());
00261     }
00262 
00263     long matePosInt = 0;
00264     if(!tokens[7].AsInteger(matePosInt))
00265     {
00266         errorString = "mate position, ";
00267         errorString += tokens[7].c_str();
00268         errorString += ", is not an integer.";
00269         samStatus.addError(SamStatus::FAIL_PARSE,
00270                            errorString.c_str());
00271     }
00272     else if(!record.set1BasedMatePosition(matePosInt))
00273     {
00274         samStatus.addError(record.getStatus().getStatus(),
00275                            record.getStatus().getStatusMessage());
00276     }
00277 
00278     long insertInt = 0;
00279     if(!tokens[8].AsInteger(insertInt))
00280     {
00281         errorString = "insert size, ";
00282         errorString += tokens[8].c_str();
00283         errorString += ", is not an integer.";
00284         samStatus.addError(SamStatus::FAIL_PARSE,
00285                            errorString.c_str());
00286     }
00287     else if(!record.setInsertSize(insertInt))
00288     {
00289         samStatus.addError(record.getStatus().getStatus(),
00290                            record.getStatus().getStatusMessage());
00291     }
00292 
00293     if(!record.setSequence(tokens[9]))
00294     {
00295         samStatus.addError(record.getStatus().getStatus(),
00296                            record.getStatus().getStatusMessage());
00297     }
00298 
00299     if(!record.setQuality(tokens[10]))
00300     {
00301         samStatus.addError(record.getStatus().getStatus(),
00302                            record.getStatus().getStatusMessage());
00303     }
00304     
00305     // Clear the tag fields.
00306     record.clearTags();
00307     
00308     // Add the tags to the record.
00309     for (int i = 11; i < tokens.Length(); i++)
00310     {
00311         String & nugget = tokens[i];
00312         
00313         if (nugget.Length() < 6 || nugget[2] != ':' || nugget[4] != ':')
00314         {
00315             // invalid tag format.
00316             errorString = "Invalid Tag Format: ";
00317             errorString += nugget.c_str();
00318             errorString += ", should be cc:c:x*.";
00319             samStatus.addError(SamStatus::FAIL_PARSE,
00320                                errorString.c_str());
00321             continue;
00322         }
00323         
00324         // Valid tag format.
00325         // Add the tag.
00326         if(!record.addTag((const char *)nugget, nugget[3],
00327                           (const char *)nugget + 5))
00328         {
00329             samStatus.addError(record.getStatus().getStatus(),
00330                                record.getStatus().getStatusMessage());
00331         }
00332     }
00333 
00334     return;
00335 }
00336 
00337 
00338 SamStatus::Status SamInterface::writeRecord(IFILE filePtr,
00339                                             SamFileHeader& header, 
00340                                             SamRecord& record,
00341                                             SamRecord::SequenceTranslation translation)
00342 {
00343     // Store all the fields into a string, then write the string.
00344     String recordString = record.getReadName();
00345     recordString += "\t";
00346     recordString += record.getFlag();
00347     recordString += "\t";
00348     recordString += record.getReferenceName();
00349     recordString += "\t";
00350     recordString += record.get1BasedPosition();
00351     recordString += "\t";
00352     recordString += record.getMapQuality();
00353     recordString += "\t";
00354     recordString += record.getCigar();
00355     recordString += "\t";
00356     recordString += record.getMateReferenceNameOrEqual();
00357     recordString += "\t";
00358     recordString += record.get1BasedMatePosition();
00359     recordString += "\t";
00360     recordString += record.getInsertSize();
00361     recordString += "\t";
00362     recordString += record.getSequence(translation);
00363     recordString += "\t";
00364     recordString += record.getQuality();
00365    
00366     char tag[3];
00367     char vtype;
00368     void* value;
00369 
00370     // Reset the tag iterator to ensure that all the tags are written.
00371     record.resetTagIter();
00372 
00373     // While there are more tags, write them to the recordString.
00374     while(record.getNextSamTag(tag, vtype, &value) != false)
00375     {
00376         recordString += "\t";
00377         recordString += tag;
00378         recordString += ":"; 
00379         recordString += vtype;
00380         recordString += ":";
00381         if(record.isIntegerType(vtype))
00382         {
00383             recordString += (int)*(int*)value;
00384         }
00385         else if(record.isDoubleType(vtype))
00386         {
00387             recordString += (double)*(double*)value;
00388         }
00389         else if(record.isCharType(vtype))
00390         {
00391             recordString += (char)*(char*)value;
00392         }
00393         else
00394         {
00395             // String type.
00396             recordString += (String)*(String*)value;
00397         }
00398     }
00399 
00400     recordString += "\n";
00401    
00402    
00403     // Write the record.
00404     ifwrite(filePtr, recordString.c_str(), recordString.Length());
00405     return(SamStatus::SUCCESS);
00406 }
00407 
00408 
00409 void SamInterface::ParseHeaderLine(StringIntHash & tags, StringArray & values)
00410 {
00411     tags.Clear();
00412     values.Clear();
00413 
00414     tokens.AddColumns(buffer, '\t');
00415 
00416     for (int i = 1; i < tokens.Length(); i++)
00417     {
00418         tags.Add(tokens[i].Left(2), i - 1);
00419         values.Push(tokens[i].SubStr(3));
00420     }
00421 }
00422 
Generated on Tue Sep 6 17:51:59 2011 for libStatGen Software by  doxygen 1.6.3