libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include "BamInterface.h" 00019 #include "CharBuffer.h" 00020 00021 BamInterface::BamInterface() 00022 { 00023 } 00024 00025 00026 BamInterface::~BamInterface() 00027 { 00028 } 00029 00030 00031 // Read a BAM file's header. 00032 bool BamInterface::readHeader(IFILE filePtr, SamFileHeader& header, 00033 SamStatus& status) 00034 { 00035 if(filePtr == NULL) 00036 { 00037 // File is not open, return false. 00038 status.setStatus(SamStatus::FAIL_ORDER, 00039 "Cannot read header since the file pointer is null"); 00040 return(false); 00041 } 00042 if(filePtr->isOpen() == false) 00043 { 00044 status.setStatus(SamStatus::FAIL_ORDER, 00045 "Cannot read header since the file is not open"); 00046 return(false); 00047 } 00048 00049 // Clear the passed in header. 00050 header.resetHeader(); 00051 00052 int32_t headerLength; 00053 int readSize = ifread(filePtr, &headerLength, sizeof(headerLength)); 00054 00055 if(readSize != sizeof(headerLength)) 00056 { 00057 String errMsg = "Failed to read the BAM header length, read "; 00058 errMsg += readSize; 00059 errMsg += " bytes instead of "; 00060 errMsg += (unsigned int)sizeof(headerLength); 00061 status.setStatus(SamStatus::FAIL_IO, errMsg.c_str()); 00062 return(false); 00063 } 00064 00065 String headerStr; 00066 if(headerLength > 0) 00067 { 00068 // Read the header. 00069 readSize = 00070 ifread(filePtr, headerStr.LockBuffer(headerLength + 1), headerLength); 00071 headerStr[headerLength] = 0; 00072 headerStr.UnlockBuffer(); 00073 if(readSize != headerLength) 00074 { 00075 // Failed to read the header. 00076 status.setStatus(SamStatus::FAIL_IO, "Failed to read the BAM header."); 00077 return(false); 00078 } 00079 } 00080 00081 // Parse the header that was read. 00082 if(!header.addHeader(headerStr)) 00083 { 00084 // Status is set in the method on failure. 00085 status.setStatus(SamStatus::FAIL_PARSE, header.getErrorMessage()); 00086 return(false); 00087 } 00088 00089 int referenceCount; 00090 // Read the number of references sequences. 00091 ifread(filePtr, &referenceCount, sizeof(int)); 00092 00093 // Get and clear the reference info so it can be set 00094 // from the bam reference table. 00095 SamReferenceInfo& refInfo = 00096 header.getReferenceInfoForBamInterface(); 00097 refInfo.clear(); 00098 00099 CharBuffer refName; 00100 00101 // Read each reference sequence 00102 for (int i = 0; i < referenceCount; i++) 00103 { 00104 int nameLength; 00105 int rc; 00106 // Read the length of the reference name. 00107 rc = ifread(filePtr, &nameLength, sizeof(int)); 00108 if(rc != sizeof(int)) 00109 { 00110 status.setStatus(SamStatus::FAIL_IO, 00111 "Failed to read the BAM reference dictionary."); 00112 return(false); 00113 } 00114 00115 // Read the name. 00116 refName.readFromFile(filePtr, nameLength); 00117 00118 // Read the length of the reference sequence. 00119 int32_t refLen; 00120 rc = ifread(filePtr, &refLen, sizeof(int)); 00121 00122 if(rc != sizeof(int)) { 00123 status.setStatus(SamStatus::FAIL_IO, 00124 "Failed to read the BAM reference dictionary."); 00125 return(false); 00126 } 00127 00128 refInfo.add(refName.c_str(), refLen); 00129 } 00130 00131 // Successfully read the file. 00132 return(true); 00133 } 00134 00135 00136 bool BamInterface::writeHeader(IFILE filePtr, SamFileHeader& header, 00137 SamStatus& status) 00138 { 00139 if((filePtr == NULL) || (filePtr->isOpen() == false)) 00140 { 00141 // File is not open, return false. 00142 status.setStatus(SamStatus::FAIL_ORDER, 00143 "Cannot write header since the file pointer is null"); 00144 return(false); 00145 } 00146 00147 char magic[4]; 00148 magic[0] = 'B'; 00149 magic[1] = 'A'; 00150 magic[2] = 'M'; 00151 magic[3] = 1; 00152 00153 // Write magic to the file. 00154 ifwrite(filePtr, magic, 4); 00155 00156 //////////////////////////////// 00157 // Write the header to the file. 00158 //////////////////////////////// 00159 // Construct a string containing the entire header. 00160 std::string headerString = ""; 00161 header.getHeaderString(headerString); 00162 00163 int32_t headerLen = headerString.length(); 00164 int numWrite = 0; 00165 00166 // Write the header length. 00167 numWrite = ifwrite(filePtr, &headerLen, sizeof(int32_t)); 00168 if(numWrite != sizeof(int32_t)) 00169 { 00170 status.setStatus(SamStatus::FAIL_IO, 00171 "Failed to write the BAM header length."); 00172 return(false); 00173 } 00174 00175 // Write the header to the file. 00176 numWrite = ifwrite(filePtr, headerString.c_str(), headerLen); 00177 if(numWrite != headerLen) 00178 { 00179 status.setStatus(SamStatus::FAIL_IO, 00180 "Failed to write the BAM header."); 00181 return(false); 00182 } 00183 00184 //////////////////////////////////////////////////////// 00185 // Write the Reference Information. 00186 const SamReferenceInfo& refInfo = header.getReferenceInfo(); 00187 00188 // Get the number of sequences. 00189 int32_t numSeq = refInfo.getNumEntries(); 00190 ifwrite(filePtr, &numSeq, sizeof(int32_t)); 00191 00192 // Write each reference sequence 00193 for (int i = 0; i < numSeq; i++) 00194 { 00195 const char* refName = refInfo.getReferenceName(i); 00196 // Add one for the null value. 00197 int32_t nameLength = strlen(refName) + 1; 00198 // Write the length of the reference name. 00199 ifwrite(filePtr, &nameLength, sizeof(int32_t)); 00200 00201 // Write the name. 00202 ifwrite(filePtr, refName, nameLength); 00203 // Write the length of the reference sequence. 00204 int32_t refLen = refInfo.getReferenceLength(i); 00205 ifwrite(filePtr, &refLen, sizeof(int32_t)); 00206 } 00207 00208 return(true); 00209 } 00210 00211 00212 void BamInterface::readRecord(IFILE filePtr, SamFileHeader& header, 00213 SamRecord& record, 00214 SamStatus& samStatus) 00215 { 00216 // TODO - need to validate there are @SQ lines in both sam/bam - MAYBE! 00217 00218 // SetBufferFromFile will reset the record prior to reading a new one. 00219 if(record.setBufferFromFile(filePtr, header) != SamStatus::SUCCESS) 00220 { 00221 // Failed, so add the error message. 00222 samStatus.addError(record.getStatus()); 00223 } 00224 } 00225 00226 SamStatus::Status BamInterface::writeRecord(IFILE filePtr, 00227 SamFileHeader& header, 00228 SamRecord& record, 00229 SamRecord::SequenceTranslation translation) 00230 { 00231 // Write the file, returning the status. 00232 return(record.writeRecordBuffer(filePtr, translation)); 00233 } 00234 00235