libStatGen Software  1
BamInterface.cpp
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "BamInterface.h"
00019 #include "CharBuffer.h"
00020 
00021 BamInterface::BamInterface()
00022 {
00023 }
00024 
00025 
00026 BamInterface::~BamInterface()
00027 {
00028 }
00029 
00030 
00031 // Read a BAM file's header.
00032 bool BamInterface::readHeader(IFILE filePtr, SamFileHeader& header,
00033                               SamStatus& status)
00034 {
00035     if(filePtr == NULL)
00036     {
00037         // File is not open, return false.
00038         status.setStatus(SamStatus::FAIL_ORDER, 
00039                            "Cannot read header since the file pointer is null");
00040         return(false);
00041     }
00042     if(filePtr->isOpen() == false)
00043     {
00044         status.setStatus(SamStatus::FAIL_ORDER, 
00045                          "Cannot read header since the file is not open");
00046         return(false);
00047     }
00048 
00049     // Clear the passed in header.
00050     header.resetHeader();
00051 
00052     int32_t headerLength;
00053     int readSize = ifread(filePtr, &headerLength, sizeof(headerLength));
00054     
00055     if(readSize != sizeof(headerLength))
00056     {
00057         String errMsg = "Failed to read the BAM header length, read ";
00058         errMsg += readSize;
00059         errMsg += " bytes instead of ";
00060         errMsg += (unsigned int)sizeof(headerLength);
00061         status.setStatus(SamStatus::FAIL_IO, errMsg.c_str());
00062         return(false);
00063     }
00064 
00065     String headerStr;
00066     if(headerLength > 0)
00067     {
00068         // Read the header.
00069         readSize = 
00070             ifread(filePtr, headerStr.LockBuffer(headerLength + 1), headerLength);
00071         headerStr[headerLength] = 0;
00072         headerStr.UnlockBuffer();
00073         if(readSize != headerLength)
00074         {
00075             // Failed to read the header.
00076             status.setStatus(SamStatus::FAIL_IO, "Failed to read the BAM header.");
00077             return(false);
00078         }
00079     }
00080     
00081     // Parse the header that was read.
00082     if(!header.addHeader(headerStr))
00083     {
00084         // Status is set in the method on failure.
00085         status.setStatus(SamStatus::FAIL_PARSE, header.getErrorMessage());
00086         return(false);
00087     }
00088 
00089     int referenceCount;
00090     // Read the number of references sequences.
00091     ifread(filePtr, &referenceCount, sizeof(int));
00092 
00093     // Get and clear the reference info so it can be set
00094     // from the bam reference table.
00095     SamReferenceInfo& refInfo = 
00096         header.getReferenceInfoForBamInterface();
00097     refInfo.clear();
00098 
00099     CharBuffer refName;
00100     
00101     // Read each reference sequence
00102     for (int i = 0; i < referenceCount; i++)
00103     {
00104         int nameLength;
00105         int rc;
00106         // Read the length of the reference name.
00107         rc = ifread(filePtr, &nameLength, sizeof(int));
00108         if(rc != sizeof(int))
00109         {
00110             status.setStatus(SamStatus::FAIL_IO, 
00111                              "Failed to read the BAM reference dictionary.");
00112             return(false);
00113         }
00114       
00115         // Read the name.
00116         refName.readFromFile(filePtr, nameLength);
00117 
00118         // Read the length of the reference sequence.
00119         int32_t refLen;
00120         rc = ifread(filePtr, &refLen, sizeof(int));
00121 
00122         if(rc != sizeof(int)) {
00123             status.setStatus(SamStatus::FAIL_IO, 
00124                              "Failed to read the BAM reference dictionary.");
00125             return(false);
00126         }
00127 
00128         refInfo.add(refName.c_str(), refLen);
00129     }
00130 
00131     // Successfully read the file.
00132     return(true);
00133 }
00134 
00135 
00136 bool BamInterface::writeHeader(IFILE filePtr, SamFileHeader& header,
00137                                SamStatus& status)
00138 {
00139     if((filePtr == NULL) || (filePtr->isOpen() == false))
00140     {
00141         // File is not open, return false.
00142         status.setStatus(SamStatus::FAIL_ORDER, 
00143                          "Cannot write header since the file pointer is null");
00144         return(false);
00145     }
00146 
00147     char magic[4];
00148     magic[0] = 'B';
00149     magic[1] = 'A';
00150     magic[2] = 'M';
00151     magic[3] = 1;
00152 
00153     // Write magic to the file.
00154     ifwrite(filePtr, magic, 4);
00155 
00156     ////////////////////////////////
00157     // Write the header to the file.
00158     ////////////////////////////////
00159     // Construct a string containing the entire header.
00160     std::string headerString = "";
00161     header.getHeaderString(headerString);
00162 
00163     int32_t headerLen = headerString.length();
00164     int numWrite = 0;
00165     
00166     // Write the header length.
00167     numWrite = ifwrite(filePtr, &headerLen, sizeof(int32_t));
00168     if(numWrite != sizeof(int32_t))
00169     {
00170         status.setStatus(SamStatus::FAIL_IO, 
00171                          "Failed to write the BAM header length.");
00172         return(false);
00173     }
00174    
00175     // Write the header to the file.
00176     numWrite = ifwrite(filePtr, headerString.c_str(), headerLen);
00177     if(numWrite != headerLen)
00178     {
00179         status.setStatus(SamStatus::FAIL_IO, 
00180                          "Failed to write the BAM header.");
00181         return(false);
00182     }
00183     
00184     ////////////////////////////////////////////////////////
00185     // Write the Reference Information.
00186     const SamReferenceInfo& refInfo = header.getReferenceInfo();
00187 
00188     // Get the number of sequences.    
00189     int32_t numSeq = refInfo.getNumEntries();
00190     ifwrite(filePtr, &numSeq, sizeof(int32_t));
00191 
00192     // Write each reference sequence
00193     for (int i = 0; i < numSeq; i++)
00194     {
00195         const char* refName = refInfo.getReferenceName(i);
00196         // Add one for the null value.
00197         int32_t nameLength = strlen(refName) + 1;
00198         // Write the length of the reference name.
00199         ifwrite(filePtr, &nameLength, sizeof(int32_t));
00200       
00201         // Write the name.
00202         ifwrite(filePtr, refName, nameLength);
00203         // Write the length of the reference sequence.
00204         int32_t refLen = refInfo.getReferenceLength(i);
00205         ifwrite(filePtr, &refLen, sizeof(int32_t));
00206     }
00207 
00208     return(true);
00209 }
00210 
00211 
00212 void BamInterface::readRecord(IFILE filePtr, SamFileHeader& header,
00213                               SamRecord& record, 
00214                               SamStatus& samStatus)
00215 {
00216     // TODO - need to validate there are @SQ lines in both sam/bam - MAYBE!
00217 
00218     // SetBufferFromFile will reset the record prior to reading a new one.
00219     if(record.setBufferFromFile(filePtr, header) != SamStatus::SUCCESS)
00220     {
00221         // Failed, so add the error message.
00222         samStatus.addError(record.getStatus());
00223     }
00224 }
00225 
00226 SamStatus::Status BamInterface::writeRecord(IFILE filePtr, 
00227                                             SamFileHeader& header,
00228                                             SamRecord& record,
00229                                             SamRecord::SequenceTranslation translation)
00230 {
00231     // Write the file, returning the status.
00232     return(record.writeRecordBuffer(filePtr, translation));
00233 }
00234 
00235 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends