FastQValidator.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include <iostream>
00019 
00020 #include "StringArray.h"
00021 #include "StringHash.h"
00022 #include "Parameters.h"
00023 #include "FastQFile.h"
00024 
00025 int main(int argc, char ** argv)
00026 {   
00027    ParameterList inputParameters;
00028    String filename;
00029    int minReadLength = 10;
00030    int printableErrors = 20;
00031    int maxErrors = -1;
00032    String testParam;
00033    BaseAsciiMap::SPACE_TYPE myBaseType = BaseAsciiMap::UNKNOWN;
00034    
00035    // Read the parameters from the command line.
00036    bool baseSpace = false;
00037    bool colorSpace = false;
00038    bool autoDetect = false;
00039    bool ignoreErrors = false;
00040    bool baseComposition = false;
00041    bool quiet = false;
00042    bool params = false;
00043 
00044    BEGIN_LONG_PARAMETERS(longParameterList)
00045       LONG_STRINGPARAMETER("file", &filename)
00046       LONG_PARAMETER("baseComposition", &baseComposition)
00047       LONG_PARAMETER("quiet", &quiet)
00048       LONG_PARAMETER("params", &params)
00049       LONG_INTPARAMETER("minReadLen", &minReadLength)
00050       LONG_INTPARAMETER("maxErrors", &maxErrors)
00051       LONG_PARAMETER_GROUP("Space Type")
00052          EXCLUSIVE_PARAMETER("baseSpace", &baseSpace)
00053          EXCLUSIVE_PARAMETER("colorSpace", &colorSpace)
00054          EXCLUSIVE_PARAMETER("auto", &autoDetect)
00055       LONG_PARAMETER_GROUP("Errors")
00056          EXCLUSIVE_PARAMETER("ignoreErrors", &ignoreErrors)
00057          LONG_SMARTINTPARAMETER("printableErrors", &printableErrors)
00058    BEGIN_LEGACY_PARAMETERS()
00059       LONG_PARAMETER("printBaseComp", &baseComposition)       
00060       LONG_PARAMETER("disableAllMessages", &quiet)
00061       LONG_INTPARAMETER("quitAfterErrorNum", &maxErrors)
00062       LONG_PARAMETER_GROUP("Space Type")
00063          EXCLUSIVE_PARAMETER("baseSpace", &baseSpace)
00064          EXCLUSIVE_PARAMETER("colorSpace", &colorSpace)
00065          EXCLUSIVE_PARAMETER("autoDetect", &autoDetect)
00066       LONG_PARAMETER_GROUP("Errors")
00067          EXCLUSIVE_PARAMETER("ignoreAllErrors", &ignoreErrors)
00068          LONG_SMARTINTPARAMETER("maxReportedErrors", &printableErrors)
00069    END_LONG_PARAMETERS();
00070    
00071    inputParameters.Add(new LongParameters ("Input Parameters", longParameterList));
00072 
00073    inputParameters.Read(argc, argv);
00074 
00075    if(ignoreErrors)
00076    {
00077       // Ignore all errors, so set printableErrors to 0.
00078       printableErrors = 0;
00079    }
00080 
00081    // Set the base type based on the passed in parameters.
00082    if(baseSpace)
00083    {
00084       // Base Space
00085       myBaseType = BaseAsciiMap::BASE_SPACE;
00086    }
00087    else if(colorSpace)
00088    {
00089       myBaseType = BaseAsciiMap::COLOR_SPACE;
00090    }
00091    else
00092    {
00093       myBaseType = BaseAsciiMap::UNKNOWN;
00094       // Set autoDetect
00095       autoDetect = true;
00096    }
00097 
00098    // DO not print status if set to quiet.
00099    if((!quiet) && params)
00100    {
00101       inputParameters.Status();
00102    }
00103 
00104    if(filename == "")
00105    {
00106       if(quiet)
00107       {
00108          return(-1);
00109       }
00110       // No filename was specified so print a usage description.
00111       std::cout << "ERROR: No filename specified.  See below for usage help.";
00112       std::cout << std::endl << std::endl;
00113 
00114       std::cout << "  Required Parameters:" << std::endl;
00115       std::cout << "\t--file  :  FastQ filename with path to be prorcessed.\n";
00116       std::cout << std::endl;
00117 
00118       std::cout << "  Optional Parameters:" << std::endl;
00119       std::cout << "\t--minReadLen         : Minimum allowed read length (Defaults to 10).\n";
00120       std::cout << "\t--maxErrors          : Number of errors to allow before quitting\n";
00121       std::cout << "\t                       reading/validating the file.\n";
00122       std::cout << "\t                       -1 (default) indicates to not quit until\n";
00123       std::cout << "\t                       the entire file is read.\n";
00124       std::cout << "\t                       0 indicates not to read/validate anything\n";
00125       std::cout << "\t--printableErrors    : Maximum number of errors to print before\n";
00126       std::cout << "\t                       suppressing them (Defaults to 20).\n";
00127       std::cout << "\t                       Different than maxErrors since \n";
00128       std::cout << "\t                       printableErrors will continue reading and\n";
00129       std::cout << "\t                       validating the file until the end, but\n";
00130       std::cout << "\t                       just doesn't print the errors.\n";
00131       std::cout << "\t--ignoreErrors       : Ignore all errors (same as printableErrors = 0)\n";
00132       std::cout << "\t                       overwrites the printableErrors option.\n";
00133       std::cout << "\t--baseComposition    : Print the Base Composition Statistics.\n";
00134       std::cout << "\t--quiet              : Suppresses the display of errors and summary statistics.\n";
00135       std::cout << "\t                       Does not affect the printing of Base Composition Statistics.\n";
00136 
00137       std::cout << "\n  Optional Space Options for Raw Sequence (Last one specified is used):\n";
00138       std::cout << "\t--auto       : Determine baseSpace/colorSpace from the Raw Sequence in the file (Default).\n";
00139       std::cout << "\t--baseSpace  : ACTGN only\n";
00140       std::cout << "\t--colorSpace : 0123. only\n";
00141       std::cout << std::endl;
00142 
00143       std::cout << "  Usage:" << std::endl;
00144       std::cout << "\t./fastQValidator --file <fileName> [--minReadLen <minReadLen>] [--maxErrors <numErrors>] [--printableErrors <printableErrors>|--ignoreErrors] [--baseComposition] [--quiet] [--baseSpace|--colorSpace|--auto]\n\n";
00145       std::cout << "  Examples:" << std::endl;
00146       std::cout << "\t../fastQValidator --file testFile.txt\n";
00147       std::cout << "\t../fastQValidator --file testFile.txt --minReadLen 10 --baseSpace --printableErrors 100\n";
00148       std::cout << "\t./fastQValidator --file test/testFile.txt --minReadLen 10 --colorSpace --ignoreErrors\n";
00149       std::cout << std::endl;
00150       return (-1);
00151    }
00152    
00153    FastQFile validator(minReadLength, printableErrors);
00154    
00155    if(quiet)
00156    {
00157       validator.disableMessages();
00158    }
00159 
00160    validator.setMaxErrors(maxErrors);
00161 
00162    FastQStatus::Status status = validator.validateFastQFile(filename, baseComposition, myBaseType);
00163 
00164    if(!quiet)
00165    {
00166       std::cout << "Returning: " << status << " : " << FastQStatus::getStatusString(status)
00167                 << std::endl;
00168    }
00169 
00170    return(status);
00171 }
Generated on Wed Nov 17 15:38:28 2010 for StatGen Software by  doxygen 1.6.3